diff --git a/.cirrus.yml b/.cirrus.yml index f53c519447d..02c43a074a1 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,61 +1,6 @@ env: CIRRUS_CLONE_DEPTH: 1 -freebsd_12_task: - freebsd_instance: - image_family: freebsd-12-2 - cpu: 8 - memory: 8G - install_script: - - ASSUME_ALWAYS_YES=yes pkg bootstrap -f ; - - pkg install -y bash curl cyrus-sasl git glib gmake gnutls gsed - nettle perl5 pixman pkgconf png usbredir ninja - script: - - mkdir build - - cd build - # TODO: Enable gnutls again once FreeBSD's libtasn1 got fixed - # See: https://gitlab.com/gnutls/libtasn1/-/merge_requests/71 - - ../configure --enable-werror --disable-gnutls - || { cat config.log meson-logs/meson-log.txt; exit 1; } - - gmake -j$(sysctl -n hw.ncpu) - - gmake -j$(sysctl -n hw.ncpu) check V=1 - -macos_task: - osx_instance: - image: catalina-base - install_script: - - brew install pkg-config python gnu-sed glib pixman make sdl2 bash ninja - script: - - mkdir build - - cd build - - ../configure --python=/usr/local/bin/python3 --enable-werror - --extra-cflags='-Wno-error=deprecated-declarations' - || { cat config.log meson-logs/meson-log.txt; exit 1; } - - gmake -j$(sysctl -n hw.ncpu) - - gmake check-unit V=1 - - gmake check-block V=1 - - gmake check-qapi-schema V=1 - - gmake check-softfloat V=1 - - gmake check-qtest-x86_64 V=1 - -macos_xcode_task: - osx_instance: - # this is an alias for the latest Xcode - image: catalina-xcode - install_script: - - brew install pkg-config gnu-sed glib pixman make sdl2 bash ninja - script: - - mkdir build - - cd build - - ../configure --extra-cflags='-Wno-error=deprecated-declarations' --enable-modules - --enable-werror --cc=clang || { cat config.log meson-logs/meson-log.txt; exit 1; } - - gmake -j$(sysctl -n hw.ncpu) - - gmake check-unit V=1 - - gmake check-block V=1 - - gmake check-qapi-schema V=1 - - gmake check-softfloat V=1 - - gmake check-qtest-x86_64 V=1 - windows_msys2_task: timeout_in: 90m windows_container: @@ -67,7 +12,7 @@ windows_msys2_task: CIRRUS_SHELL: powershell MSYS: winsymlinks:nativestrict MSYSTEM: MINGW64 - MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2021-01-05/msys2-base-x86_64-20210105.sfx.exe + MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2021-04-19/msys2-base-x86_64-20210419.sfx.exe MSYS2_FINGERPRINT: 0 MSYS2_PACKAGES: " diffutils git grep make pkg-config sed @@ -130,7 +75,7 @@ windows_msys2_task: taskkill /F /FI "MODULES eq msys-2.0.dll" tasklist C:\tools\msys64\usr\bin\bash.exe -lc "mv -f /etc/pacman.conf.pacnew /etc/pacman.conf || true" - C:\tools\msys64\usr\bin\bash.exe -lc "pacman --noconfirm -Suu --overwrite=*" + C:\tools\msys64\usr\bin\bash.exe -lc "pacman --noconfirm -Syuu --overwrite=*" Write-Output "Core install time taken: $((Get-Date).Subtract($start_time))" $start_time = Get-Date diff --git a/.github/lockdown.yml b/.github/lockdown.yml deleted file mode 100644 index 07fc2f31eef..00000000000 --- a/.github/lockdown.yml +++ /dev/null @@ -1,34 +0,0 @@ -# Configuration for Repo Lockdown - https://github.com/dessant/repo-lockdown - -# Close issues and pull requests -close: true - -# Lock issues and pull requests -lock: true - -issues: - comment: | - Thank you for your interest in the QEMU project. - - This repository is a read-only mirror of the project's repostories hosted - at https://gitlab.com/qemu-project/qemu.git. - The project does not process issues filed on GitHub. - - The project issues are tracked on Launchpad: - https://bugs.launchpad.net/qemu - - QEMU welcomes bug report contributions. You can file new ones on: - https://bugs.launchpad.net/qemu/+filebug - -pulls: - comment: | - Thank you for your interest in the QEMU project. - - This repository is a read-only mirror of the project's repostories hosted - on https://gitlab.com/qemu-project/qemu.git. - The project does not process merge requests filed on GitHub. - - QEMU welcomes contributions of code (either fixing bugs or adding new - functionality). However, we get a lot of patches, and so we have some - guidelines about contributing on the project website: - https://www.qemu.org/contribute/ diff --git a/.github/workflows/lockdown.yml b/.github/workflows/lockdown.yml new file mode 100644 index 00000000000..ad8b8f7e30f --- /dev/null +++ b/.github/workflows/lockdown.yml @@ -0,0 +1,30 @@ +# Configuration for Repo Lockdown - https://github.com/dessant/repo-lockdown + +name: 'Repo Lockdown' + +on: + pull_request_target: + types: opened + +permissions: + pull-requests: write + +jobs: + action: + runs-on: ubuntu-latest + steps: + - uses: dessant/repo-lockdown@v2 + with: + pull-comment: | + Thank you for your interest in the QEMU project. + + This repository is a read-only mirror of the project's repostories hosted + on https://gitlab.com/qemu-project/qemu.git. + The project does not process merge requests filed on GitHub. + + QEMU welcomes contributions of code (either fixing bugs or adding new + functionality). However, we get a lot of patches, and so we have some + guidelines about contributing on the project website: + https://www.qemu.org/contribute/ + lock-pull: true + close-pull: true diff --git a/.gitignore b/.gitignore index 75a4be07240..eb2553026c5 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ GTAGS *~ *.ast_raw *.depend_raw +*.swp +*.patch diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml new file mode 100644 index 00000000000..2c7980a4f6a --- /dev/null +++ b/.gitlab-ci.d/buildtest-template.yml @@ -0,0 +1,81 @@ +.native_build_job_template: + stage: build + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + before_script: + - JOBS=$(expr $(nproc) + 1) + script: + - if test -n "$LD_JOBS"; + then + scripts/git-submodule.sh update meson ; + fi + - mkdir build + - cd build + - if test -n "$TARGETS"; + then + ../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=git} $CONFIGURE_ARGS --target-list="$TARGETS" ; + else + ../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=git} $CONFIGURE_ARGS ; + fi || { cat config.log meson-logs/meson-log.txt && exit 1; } + - if test -n "$LD_JOBS"; + then + ../meson/meson.py configure . -Dbackend_max_links="$LD_JOBS" ; + fi || exit 1; + - make -j"$JOBS" + - if test -n "$MAKE_CHECK_ARGS"; + then + make -j"$JOBS" $MAKE_CHECK_ARGS ; + fi + +.native_test_job_template: + stage: test + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + script: + - scripts/git-submodule.sh update + $(sed -n '/GIT_SUBMODULES=/ s/.*=// p' build/config-host.mak) + - cd build + - find . -type f -exec touch {} + + # Avoid recompiling by hiding ninja with NINJA=":" + - make NINJA=":" $MAKE_CHECK_ARGS + +.avocado_test_job_template: + extends: .native_test_job_template + cache: + key: "${CI_JOB_NAME}-cache" + paths: + - ${CI_PROJECT_DIR}/avocado-cache + policy: pull-push + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + when: on_failure + expire_in: 7 days + paths: + - build/tests/results/latest/results.xml + - build/tests/results/latest/test-results + reports: + junit: build/tests/results/latest/results.xml + before_script: + - mkdir -p ~/.config/avocado + - echo "[datadir.paths]" > ~/.config/avocado/avocado.conf + - echo "cache_dirs = ['${CI_PROJECT_DIR}/avocado-cache']" + >> ~/.config/avocado/avocado.conf + - echo -e '[job.output.testlogs]\nstatuses = ["FAIL", "INTERRUPT"]' + >> ~/.config/avocado/avocado.conf + - if [ -d ${CI_PROJECT_DIR}/avocado-cache ]; then + du -chs ${CI_PROJECT_DIR}/avocado-cache ; + fi + - export AVOCADO_ALLOW_UNTRUSTED_CODE=1 + after_script: + - cd build + - du -chs ${CI_PROJECT_DIR}/avocado-cache + rules: + # Only run these jobs if running on the mainstream namespace, + # or if the user set the QEMU_CI_AVOCADO_TESTING variable (either + # in its namespace setting or via git-push option, see documentation + # in /.gitlab-ci.yml of this repository). + - if: '$CI_PROJECT_NAMESPACE == "qemu-project"' + when: on_success + - if: '$QEMU_CI_AVOCADO_TESTING' + when: on_success + # Otherwise, set to manual (the jobs are created but not run). + - when: manual + allow_failure: true diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml new file mode 100644 index 00000000000..71d0f407add --- /dev/null +++ b/.gitlab-ci.d/buildtest.yml @@ -0,0 +1,649 @@ +include: + - local: '/.gitlab-ci.d/buildtest-template.yml' + +build-system-alpine: + extends: .native_build_job_template + needs: + - job: amd64-alpine-container + variables: + IMAGE: alpine + TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu + microblazeel-softmmu mips64el-softmmu + MAKE_CHECK_ARGS: check-build + CONFIGURE_ARGS: --enable-docs --enable-trace-backends=log,simple,syslog + artifacts: + expire_in: 2 days + paths: + - .git-submodule-status + - build + +check-system-alpine: + extends: .native_test_job_template + needs: + - job: build-system-alpine + artifacts: true + variables: + IMAGE: alpine + MAKE_CHECK_ARGS: check + +avocado-system-alpine: + extends: .avocado_test_job_template + needs: + - job: build-system-alpine + artifacts: true + variables: + IMAGE: alpine + MAKE_CHECK_ARGS: check-avocado + +build-system-ubuntu: + extends: .native_build_job_template + needs: + job: amd64-ubuntu2004-container + variables: + IMAGE: ubuntu2004 + CONFIGURE_ARGS: --enable-docs --enable-fdt=system --enable-slirp=system + TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu + microblazeel-softmmu mips64el-softmmu + MAKE_CHECK_ARGS: check-build + artifacts: + expire_in: 2 days + paths: + - build + +check-system-ubuntu: + extends: .native_test_job_template + needs: + - job: build-system-ubuntu + artifacts: true + variables: + IMAGE: ubuntu2004 + MAKE_CHECK_ARGS: check + +avocado-system-ubuntu: + extends: .avocado_test_job_template + needs: + - job: build-system-ubuntu + artifacts: true + variables: + IMAGE: ubuntu2004 + MAKE_CHECK_ARGS: check-avocado + +build-system-debian: + extends: .native_build_job_template + needs: + job: amd64-debian-container + variables: + IMAGE: debian-amd64 + TARGETS: arm-softmmu avr-softmmu i386-softmmu mipsel-softmmu + riscv64-softmmu sh4eb-softmmu sparc-softmmu xtensaeb-softmmu + MAKE_CHECK_ARGS: check-build + artifacts: + expire_in: 2 days + paths: + - build + +check-system-debian: + extends: .native_test_job_template + needs: + - job: build-system-debian + artifacts: true + variables: + IMAGE: debian-amd64 + MAKE_CHECK_ARGS: check + +avocado-system-debian: + extends: .avocado_test_job_template + needs: + - job: build-system-debian + artifacts: true + variables: + IMAGE: debian-amd64 + MAKE_CHECK_ARGS: check-avocado + +build-system-fedora: + extends: .native_build_job_template + needs: + job: amd64-fedora-container + variables: + IMAGE: fedora + CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs + --enable-fdt=system --enable-slirp=system --enable-capstone=system + TARGETS: tricore-softmmu microblaze-softmmu mips-softmmu + xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu + MAKE_CHECK_ARGS: check-build + artifacts: + expire_in: 2 days + paths: + - build + +check-system-fedora: + extends: .native_test_job_template + needs: + - job: build-system-fedora + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check + +avocado-system-fedora: + extends: .avocado_test_job_template + needs: + - job: build-system-fedora + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check-avocado + +build-system-centos: + extends: .native_build_job_template + needs: + job: amd64-centos8-container + variables: + IMAGE: centos8 + CONFIGURE_ARGS: --disable-nettle --enable-gcrypt --enable-fdt=system + --enable-modules --enable-trace-backends=dtrace + TARGETS: ppc64-softmmu or1k-softmmu s390x-softmmu + x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu + MAKE_CHECK_ARGS: check-build + artifacts: + expire_in: 2 days + paths: + - build + +check-system-centos: + extends: .native_test_job_template + needs: + - job: build-system-centos + artifacts: true + variables: + IMAGE: centos8 + MAKE_CHECK_ARGS: check + +avocado-system-centos: + extends: .avocado_test_job_template + needs: + - job: build-system-centos + artifacts: true + variables: + IMAGE: centos8 + MAKE_CHECK_ARGS: check-avocado + +build-system-opensuse: + extends: .native_build_job_template + needs: + job: amd64-opensuse-leap-container + variables: + IMAGE: opensuse-leap + CONFIGURE_ARGS: --enable-fdt=system + TARGETS: s390x-softmmu x86_64-softmmu aarch64-softmmu + MAKE_CHECK_ARGS: check-build + artifacts: + expire_in: 2 days + paths: + - build + +check-system-opensuse: + extends: .native_test_job_template + needs: + - job: build-system-opensuse + artifacts: true + variables: + IMAGE: opensuse-leap + MAKE_CHECK_ARGS: check + +avocado-system-opensuse: + extends: .avocado_test_job_template + needs: + - job: build-system-opensuse + artifacts: true + variables: + IMAGE: opensuse-leap + MAKE_CHECK_ARGS: check-avocado + + +# This jobs explicitly disable TCG (--disable-tcg), KVM is detected by +# the configure script. The container doesn't contain Xen headers so +# Xen accelerator is not detected / selected. As result it build the +# i386-softmmu and x86_64-softmmu with KVM being the single accelerator +# available. +# Also use a different coroutine implementation (which is only really of +# interest to KVM users, i.e. with TCG disabled) +build-tcg-disabled: + extends: .native_build_job_template + needs: + job: amd64-centos8-container + variables: + IMAGE: centos8 + script: + - mkdir build + - cd build + - ../configure --disable-tcg --audio-drv-list="" --with-coroutine=ucontext + || { cat config.log meson-logs/meson-log.txt && exit 1; } + - make -j"$JOBS" + - make check-unit + - make check-qapi-schema + - cd tests/qemu-iotests/ + - ./check -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 048 + 052 063 077 086 101 104 106 113 148 150 151 152 157 159 160 163 + 170 171 183 184 192 194 208 221 226 227 236 253 277 image-fleecing + - ./check -qcow2 028 051 056 057 058 065 068 082 085 091 095 096 102 122 + 124 132 139 142 144 145 151 152 155 157 165 194 196 200 202 + 208 209 216 218 227 234 246 247 248 250 254 255 257 258 + 260 261 262 263 264 270 272 273 277 279 image-fleecing + +build-user: + extends: .native_build_job_template + needs: + job: amd64-debian-user-cross-container + variables: + IMAGE: debian-all-test-cross + CONFIGURE_ARGS: --disable-tools --disable-system + MAKE_CHECK_ARGS: check-tcg + +build-user-static: + extends: .native_build_job_template + needs: + job: amd64-debian-user-cross-container + variables: + IMAGE: debian-all-test-cross + CONFIGURE_ARGS: --disable-tools --disable-system --static + MAKE_CHECK_ARGS: check-tcg + +# Because the hexagon cross-compiler takes so long to build we don't rely +# on the CI system to build it and hence this job has an optional dependency +# declared. The image is manually uploaded. +build-user-hexagon: + extends: .native_build_job_template + needs: + job: hexagon-cross-container + optional: true + variables: + IMAGE: debian-hexagon-cross + TARGETS: hexagon-linux-user + CONFIGURE_ARGS: --disable-tools --disable-docs --enable-debug-tcg + MAKE_CHECK_ARGS: check-tcg + +# Only build the softmmu targets we have check-tcg tests for +build-some-softmmu: + extends: .native_build_job_template + needs: + job: amd64-debian-user-cross-container + variables: + IMAGE: debian-all-test-cross + CONFIGURE_ARGS: --disable-tools --enable-debug + TARGETS: xtensa-softmmu arm-softmmu aarch64-softmmu alpha-softmmu + MAKE_CHECK_ARGS: check-tcg + +# We build tricore in a very minimal tricore only container +build-tricore-softmmu: + extends: .native_build_job_template + needs: + job: tricore-debian-cross-container + variables: + IMAGE: debian-tricore-cross + CONFIGURE_ARGS: --disable-tools --disable-fdt --enable-debug + TARGETS: tricore-softmmu + MAKE_CHECK_ARGS: check-tcg + +clang-system: + extends: .native_build_job_template + needs: + job: amd64-fedora-container + variables: + IMAGE: fedora + CONFIGURE_ARGS: --cc=clang --cxx=clang++ + --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined + TARGETS: alpha-softmmu arm-softmmu m68k-softmmu mips64-softmmu + ppc-softmmu s390x-softmmu + MAKE_CHECK_ARGS: check-qtest check-tcg + +clang-user: + extends: .native_build_job_template + needs: + job: amd64-debian-user-cross-container + variables: + IMAGE: debian-all-test-cross + CONFIGURE_ARGS: --cc=clang --cxx=clang++ --disable-system + --target-list-exclude=microblazeel-linux-user,aarch64_be-linux-user,i386-linux-user,m68k-linux-user,mipsn32el-linux-user,xtensaeb-linux-user + --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined + MAKE_CHECK_ARGS: check-unit check-tcg + +# Set LD_JOBS=1 because this requires LTO and ld consumes a large amount of memory. +# On gitlab runners, default value sometimes end up calling 2 lds concurrently and +# triggers an Out-Of-Memory error +# +# Since slirp callbacks are used in QEMU Timers, slirp needs to be compiled together +# with QEMU and linked as a static library to avoid false positives in CFI checks. +# This can be accomplished by using -enable-slirp=git, which avoids the use of +# a system-wide version of the library +# +# Split in three sets of build/check/avocado to limit the execution time of each +# job +build-cfi-aarch64: + extends: .native_build_job_template + needs: + - job: amd64-fedora-container + variables: + LD_JOBS: 1 + AR: llvm-ar + IMAGE: fedora + CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug + --enable-safe-stack --enable-slirp=git + TARGETS: aarch64-softmmu + MAKE_CHECK_ARGS: check-build + timeout: 70m + artifacts: + expire_in: 2 days + paths: + - build + rules: + # FIXME: This job is often failing, likely due to out-of-memory problems in + # the constrained containers of the shared runners. Thus this is marked as + # manual until the situation has been solved. + - when: manual + allow_failure: true + +check-cfi-aarch64: + extends: .native_test_job_template + needs: + - job: build-cfi-aarch64 + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check + +avocado-cfi-aarch64: + extends: .avocado_test_job_template + needs: + - job: build-cfi-aarch64 + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check-avocado + +build-cfi-ppc64-s390x: + extends: .native_build_job_template + needs: + - job: amd64-fedora-container + variables: + LD_JOBS: 1 + AR: llvm-ar + IMAGE: fedora + CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug + --enable-safe-stack --enable-slirp=git + TARGETS: ppc64-softmmu s390x-softmmu + MAKE_CHECK_ARGS: check-build + timeout: 70m + artifacts: + expire_in: 2 days + paths: + - build + rules: + # FIXME: This job is often failing, likely due to out-of-memory problems in + # the constrained containers of the shared runners. Thus this is marked as + # manual until the situation has been solved. + - when: manual + allow_failure: true + +check-cfi-ppc64-s390x: + extends: .native_test_job_template + needs: + - job: build-cfi-ppc64-s390x + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check + +avocado-cfi-ppc64-s390x: + extends: .avocado_test_job_template + needs: + - job: build-cfi-ppc64-s390x + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check-avocado + +build-cfi-x86_64: + extends: .native_build_job_template + needs: + - job: amd64-fedora-container + variables: + LD_JOBS: 1 + AR: llvm-ar + IMAGE: fedora + CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug + --enable-safe-stack --enable-slirp=git + TARGETS: x86_64-softmmu + MAKE_CHECK_ARGS: check-build + timeout: 70m + artifacts: + expire_in: 2 days + paths: + - build + +check-cfi-x86_64: + extends: .native_test_job_template + needs: + - job: build-cfi-x86_64 + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check + +avocado-cfi-x86_64: + extends: .avocado_test_job_template + needs: + - job: build-cfi-x86_64 + artifacts: true + variables: + IMAGE: fedora + MAKE_CHECK_ARGS: check-avocado + +tsan-build: + extends: .native_build_job_template + needs: + job: amd64-ubuntu2004-container + variables: + IMAGE: ubuntu2004 + CONFIGURE_ARGS: --enable-tsan --cc=clang-10 --cxx=clang++-10 + --enable-trace-backends=ust --enable-fdt=system --enable-slirp=system + TARGETS: x86_64-softmmu ppc64-softmmu riscv64-softmmu x86_64-linux-user + MAKE_CHECK_ARGS: bench V=1 + +# These targets are on the way out +build-deprecated: + extends: .native_build_job_template + needs: + job: amd64-debian-user-cross-container + variables: + IMAGE: debian-all-test-cross + CONFIGURE_ARGS: --disable-tools + MAKE_CHECK_ARGS: build-tcg + TARGETS: ppc64abi32-linux-user + artifacts: + expire_in: 2 days + paths: + - build + +# We split the check-tcg step as test failures are expected but we still +# want to catch the build breaking. +check-deprecated: + extends: .native_test_job_template + needs: + - job: build-deprecated + artifacts: true + variables: + IMAGE: debian-all-test-cross + MAKE_CHECK_ARGS: check-tcg + allow_failure: true + +# gprof/gcov are GCC features +build-gprof-gcov: + extends: .native_build_job_template + needs: + job: amd64-ubuntu2004-container + variables: + IMAGE: ubuntu2004 + CONFIGURE_ARGS: --enable-gprof --enable-gcov + TARGETS: aarch64-softmmu ppc64-softmmu s390x-softmmu x86_64-softmmu + artifacts: + expire_in: 1 days + paths: + - build + +check-gprof-gcov: + extends: .native_test_job_template + needs: + - job: build-gprof-gcov + artifacts: true + variables: + IMAGE: ubuntu2004 + MAKE_CHECK_ARGS: check + after_script: + - ${CI_PROJECT_DIR}/scripts/ci/coverage-summary.sh + +build-oss-fuzz: + extends: .native_build_job_template + needs: + job: amd64-fedora-container + variables: + IMAGE: fedora + script: + - mkdir build-oss-fuzz + - CC="clang" CXX="clang++" CFLAGS="-fsanitize=address" + ./scripts/oss-fuzz/build.sh + - export ASAN_OPTIONS="fast_unwind_on_malloc=0" + - for fuzzer in $(find ./build-oss-fuzz/DEST_DIR/ -executable -type f + | grep -v slirp); do + grep "LLVMFuzzerTestOneInput" ${fuzzer} > /dev/null 2>&1 || continue ; + echo Testing ${fuzzer} ... ; + "${fuzzer}" -runs=1 -seed=1 || exit 1 ; + done + # Unrelated to fuzzer: run some tests with -fsanitize=address + - cd build-oss-fuzz && make check-qtest-i386 check-unit + +build-tci: + extends: .native_build_job_template + needs: + job: amd64-debian-user-cross-container + variables: + IMAGE: debian-all-test-cross + script: + - TARGETS="aarch64 alpha arm hppa m68k microblaze ppc64 s390x x86_64" + - mkdir build + - cd build + - ../configure --enable-tcg-interpreter + --target-list="$(for tg in $TARGETS; do echo -n ${tg}'-softmmu '; done)" || { cat config.log meson-logs/meson-log.txt && exit 1; } + - make -j"$JOBS" + - make tests/qtest/boot-serial-test tests/qtest/cdrom-test tests/qtest/pxe-test + - for tg in $TARGETS ; do + export QTEST_QEMU_BINARY="./qemu-system-${tg}" ; + ./tests/qtest/boot-serial-test || exit 1 ; + ./tests/qtest/cdrom-test || exit 1 ; + done + - QTEST_QEMU_BINARY="./qemu-system-x86_64" ./tests/qtest/pxe-test + - QTEST_QEMU_BINARY="./qemu-system-s390x" ./tests/qtest/pxe-test -m slow + - make check-tcg + +# Alternate coroutines implementations are only really of interest to KVM users +# However we can't test against KVM on Gitlab-CI so we can only run unit tests +build-coroutine-sigaltstack: + extends: .native_build_job_template + needs: + job: amd64-ubuntu2004-container + variables: + IMAGE: ubuntu2004 + CONFIGURE_ARGS: --with-coroutine=sigaltstack --disable-tcg + --enable-trace-backends=ftrace + MAKE_CHECK_ARGS: check-unit + +# Check our reduced build configurations +build-without-default-devices: + extends: .native_build_job_template + needs: + job: amd64-centos8-container + variables: + IMAGE: centos8 + CONFIGURE_ARGS: --without-default-devices --disable-user + +build-without-default-features: + extends: .native_build_job_template + needs: + job: amd64-fedora-container + variables: + IMAGE: fedora + CONFIGURE_ARGS: + --without-default-features + --disable-capstone + --disable-pie + --disable-qom-cast-debug + --disable-slirp + --disable-strip + TARGETS: avr-softmmu i386-softmmu mips64-softmmu s390x-softmmu sh4-softmmu + sparc64-softmmu hexagon-linux-user i386-linux-user s390x-linux-user + MAKE_CHECK_ARGS: check-unit check-qtest SPEED=slow + +build-libvhost-user: + stage: build + image: $CI_REGISTRY_IMAGE/qemu/fedora:latest + needs: + job: amd64-fedora-container + script: + - mkdir subprojects/libvhost-user/build + - cd subprojects/libvhost-user/build + - meson + - ninja + +# No targets are built here, just tools, docs, and unit tests. This +# also feeds into the eventual documentation deployment steps later +build-tools-and-docs-debian: + extends: .native_build_job_template + needs: + job: amd64-debian-container + variables: + IMAGE: debian-amd64 + MAKE_CHECK_ARGS: check-unit check-softfloat ctags TAGS cscope + CONFIGURE_ARGS: --disable-system --disable-user --enable-docs --enable-tools + artifacts: + expire_in: 2 days + paths: + - build + +# Prepare for GitLab pages deployment. Anything copied into the +# "public" directory will be deployed to $USER.gitlab.io/$PROJECT +# +# GitLab publishes from any branch that triggers a CI pipeline +# +# For the main repo we don't want to publish from 'staging' +# since that content may not be pushed, nor do we wish to +# publish from 'stable-NNN' branches as that content is outdated. +# Thus we restrict to just the default branch +# +# For contributor forks we want to publish from any repo so +# that users can see the results of their commits, regardless +# of what topic branch they're currently using +pages: + image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:latest + stage: test + needs: + - job: build-tools-and-docs-debian + script: + - mkdir -p public + # HTML-ised source tree + - make gtags + - htags -anT --tree-view=filetree -m qemu_init + -t "Welcome to the QEMU sourcecode" + - mv HTML public/src + # Project documentation + - make -C build install DESTDIR=$(pwd)/temp-install + - mv temp-install/usr/local/share/doc/qemu/* public/ + artifacts: + paths: + - public + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH' + when: on_success + - if: '$CI_PROJECT_NAMESPACE == "qemu-project"' + when: never + - if: '$CI_PROJECT_NAMESPACE != "qemu-project"' + when: on_success diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml new file mode 100644 index 00000000000..d273a9e7130 --- /dev/null +++ b/.gitlab-ci.d/cirrus.yml @@ -0,0 +1,91 @@ +# Jobs that we delegate to Cirrus CI because they require an operating +# system other than Linux. These jobs will only run if the required +# setup has been performed on the GitLab account. +# +# The Cirrus CI configuration is generated by replacing target-specific +# variables in a generic template: some of these variables are provided +# when the GitLab CI job is defined, others are taken from a shell +# snippet generated using lcitool. +# +# Note that the $PATH environment variable has to be treated with +# special care, because we can't just override it at the GitLab CI job +# definition level or we risk breaking it completely. +.cirrus_build_job: + stage: build + image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master + needs: [] + timeout: 80m + allow_failure: true + script: + - source .gitlab-ci.d/cirrus/$NAME.vars + - sed -e "s|[@]CI_REPOSITORY_URL@|$CI_REPOSITORY_URL|g" + -e "s|[@]CI_COMMIT_REF_NAME@|$CI_COMMIT_REF_NAME|g" + -e "s|[@]CI_COMMIT_SHA@|$CI_COMMIT_SHA|g" + -e "s|[@]CIRRUS_VM_INSTANCE_TYPE@|$CIRRUS_VM_INSTANCE_TYPE|g" + -e "s|[@]CIRRUS_VM_IMAGE_SELECTOR@|$CIRRUS_VM_IMAGE_SELECTOR|g" + -e "s|[@]CIRRUS_VM_IMAGE_NAME@|$CIRRUS_VM_IMAGE_NAME|g" + -e "s|[@]CIRRUS_VM_CPUS@|$CIRRUS_VM_CPUS|g" + -e "s|[@]CIRRUS_VM_RAM@|$CIRRUS_VM_RAM|g" + -e "s|[@]UPDATE_COMMAND@|$UPDATE_COMMAND|g" + -e "s|[@]INSTALL_COMMAND@|$INSTALL_COMMAND|g" + -e "s|[@]PATH@|$PATH_EXTRA${PATH_EXTRA:+:}\$PATH|g" + -e "s|[@]PKG_CONFIG_PATH@|$PKG_CONFIG_PATH|g" + -e "s|[@]PKGS@|$PKGS|g" + -e "s|[@]MAKE@|$MAKE|g" + -e "s|[@]PYTHON@|$PYTHON|g" + -e "s|[@]PIP3@|$PIP3|g" + -e "s|[@]PYPI_PKGS@|$PYPI_PKGS|g" + -e "s|[@]CONFIGURE_ARGS@|$CONFIGURE_ARGS|g" + -e "s|[@]TEST_TARGETS@|$TEST_TARGETS|g" + <.gitlab-ci.d/cirrus/build.yml >.gitlab-ci.d/cirrus/$NAME.yml + - cat .gitlab-ci.d/cirrus/$NAME.yml + - cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml + rules: + # Allow on 'staging' branch and 'stable-X.Y-staging' branches only + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/' + when: never + - if: "$CIRRUS_GITHUB_REPO && $CIRRUS_API_TOKEN" + +x64-freebsd-12-build: + extends: .cirrus_build_job + variables: + NAME: freebsd-12 + CIRRUS_VM_INSTANCE_TYPE: freebsd_instance + CIRRUS_VM_IMAGE_SELECTOR: image_family + CIRRUS_VM_IMAGE_NAME: freebsd-12-2 + CIRRUS_VM_CPUS: 8 + CIRRUS_VM_RAM: 8G + UPDATE_COMMAND: pkg update + INSTALL_COMMAND: pkg install -y + # TODO: Enable gnutls again once FreeBSD's libtasn1 got fixed + # See: https://gitlab.com/gnutls/libtasn1/-/merge_requests/71 + CONFIGURE_ARGS: --disable-gnutls + TEST_TARGETS: check + +x64-freebsd-13-build: + extends: .cirrus_build_job + variables: + NAME: freebsd-13 + CIRRUS_VM_INSTANCE_TYPE: freebsd_instance + CIRRUS_VM_IMAGE_SELECTOR: image_family + CIRRUS_VM_IMAGE_NAME: freebsd-13-0 + CIRRUS_VM_CPUS: 8 + CIRRUS_VM_RAM: 8G + UPDATE_COMMAND: pkg update + INSTALL_COMMAND: pkg install -y + TEST_TARGETS: check + +x64-macos-11-base-build: + extends: .cirrus_build_job + variables: + NAME: macos-11 + CIRRUS_VM_INSTANCE_TYPE: osx_instance + CIRRUS_VM_IMAGE_SELECTOR: image + CIRRUS_VM_IMAGE_NAME: big-sur-base + CIRRUS_VM_CPUS: 12 + CIRRUS_VM_RAM: 24G + UPDATE_COMMAND: brew update + INSTALL_COMMAND: brew install + PATH_EXTRA: /usr/local/opt/ccache/libexec:/usr/local/opt/gettext/bin + PKG_CONFIG_PATH: /usr/local/opt/curl/lib/pkgconfig:/usr/local/opt/ncurses/lib/pkgconfig:/usr/local/opt/readline/lib/pkgconfig + TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat check-qtest-x86_64 diff --git a/.gitlab-ci.d/cirrus/README.rst b/.gitlab-ci.d/cirrus/README.rst new file mode 100644 index 00000000000..657b0706d78 --- /dev/null +++ b/.gitlab-ci.d/cirrus/README.rst @@ -0,0 +1,54 @@ +Cirrus CI integration +===================== + +GitLab CI shared runners only provide a docker environment running on Linux. +While it is possible to provide private runners for non-Linux platforms this +is not something most contributors/maintainers will wish to do. + +To work around this limitation, we take advantage of `Cirrus CI`_'s free +offering: more specifically, we use the `cirrus-run`_ script to trigger Cirrus +CI jobs from GitLab CI jobs so that Cirrus CI job output is integrated into +the main GitLab CI pipeline dashboard. + +There is, however, some one-time setup required. If you want FreeBSD and macOS +builds to happen when you push to your GitLab repository, you need to + +* set up a GitHub repository for the project, eg. ``yourusername/qemu``. + This repository needs to exist for cirrus-run to work, but it doesn't need to + be kept up to date, so you can create it and then forget about it; + +* enable the `Cirrus CI GitHub app`_ for your GitHub account; + +* sign up for Cirrus CI. It's enough to log into the website using your GitHub + account; + +* grab an API token from the `Cirrus CI settings`_ page; + +* it may be necessary to push an empty ``.cirrus.yml`` file to your github fork + for Cirrus CI to properly recognize the project. You can check whether + Cirrus CI knows about your project by navigating to: + + ``https://cirrus-ci.com/yourusername/qemu`` + +* in the *CI/CD / Variables* section of the settings page for your GitLab + repository, create two new variables: + + * ``CIRRUS_GITHUB_REPO``, containing the name of the GitHub repository + created earlier, eg. ``yourusername/qemu``; + + * ``CIRRUS_API_TOKEN``, containing the Cirrus CI API token generated earlier. + This variable **must** be marked as *Masked*, because anyone with knowledge + of it can impersonate you as far as Cirrus CI is concerned. + + Neither of these variables should be marked as *Protected*, because in + general you'll want to be able to trigger Cirrus CI builds from non-protected + branches. + +Once this one-time setup is complete, you can just keep pushing to your GitLab +repository as usual and you'll automatically get the additional CI coverage. + + +.. _Cirrus CI GitHub app: https://github.com/marketplace/cirrus-ci +.. _Cirrus CI settings: https://cirrus-ci.com/settings/profile/ +.. _Cirrus CI: https://cirrus-ci.com/ +.. _cirrus-run: https://github.com/sio/cirrus-run/ diff --git a/.gitlab-ci.d/cirrus/build.yml b/.gitlab-ci.d/cirrus/build.yml new file mode 100644 index 00000000000..c555f5d36e6 --- /dev/null +++ b/.gitlab-ci.d/cirrus/build.yml @@ -0,0 +1,36 @@ +@CIRRUS_VM_INSTANCE_TYPE@: + @CIRRUS_VM_IMAGE_SELECTOR@: @CIRRUS_VM_IMAGE_NAME@ + cpu: @CIRRUS_VM_CPUS@ + memory: @CIRRUS_VM_RAM@ + +env: + CIRRUS_CLONE_DEPTH: 1 + CI_REPOSITORY_URL: "@CI_REPOSITORY_URL@" + CI_COMMIT_REF_NAME: "@CI_COMMIT_REF_NAME@" + CI_COMMIT_SHA: "@CI_COMMIT_SHA@" + PATH: "@PATH@" + PKG_CONFIG_PATH: "@PKG_CONFIG_PATH@" + PYTHON: "@PYTHON@" + MAKE: "@MAKE@" + CONFIGURE_ARGS: "@CONFIGURE_ARGS@" + TEST_TARGETS: "@TEST_TARGETS@" + +build_task: + install_script: + - @UPDATE_COMMAND@ + - @INSTALL_COMMAND@ @PKGS@ + - if test -n "@PYPI_PKGS@" ; then @PIP3@ install @PYPI_PKGS@ ; fi + clone_script: + - git clone --depth 100 "$CI_REPOSITORY_URL" . + - git fetch origin "$CI_COMMIT_REF_NAME" + - git reset --hard "$CI_COMMIT_SHA" + build_script: + - mkdir build + - cd build + - ../configure --enable-werror $CONFIGURE_ARGS + || { cat config.log meson-logs/meson-log.txt; exit 1; } + - $MAKE -j$(sysctl -n hw.ncpu) + - for TARGET in $TEST_TARGETS ; + do + $MAKE -j$(sysctl -n hw.ncpu) $TARGET V=1 ; + done diff --git a/.gitlab-ci.d/cirrus/freebsd-12.vars b/.gitlab-ci.d/cirrus/freebsd-12.vars new file mode 100644 index 00000000000..2099b213547 --- /dev/null +++ b/.gitlab-ci.d/cirrus/freebsd-12.vars @@ -0,0 +1,13 @@ +# THIS FILE WAS AUTO-GENERATED +# +# $ lcitool variables freebsd-12 qemu +# +# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1 + +PACKAGING_COMMAND='pkg' +CCACHE='/usr/local/bin/ccache' +MAKE='/usr/local/bin/gmake' +NINJA='/usr/local/bin/ninja' +PYTHON='/usr/local/bin/python3' +PIP3='/usr/local/bin/pip-3.8' +PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd' diff --git a/.gitlab-ci.d/cirrus/freebsd-13.vars b/.gitlab-ci.d/cirrus/freebsd-13.vars new file mode 100644 index 00000000000..323fe806d5e --- /dev/null +++ b/.gitlab-ci.d/cirrus/freebsd-13.vars @@ -0,0 +1,13 @@ +# THIS FILE WAS AUTO-GENERATED +# +# $ lcitool variables freebsd-13 qemu +# +# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1 + +PACKAGING_COMMAND='pkg' +CCACHE='/usr/local/bin/ccache' +MAKE='/usr/local/bin/gmake' +NINJA='/usr/local/bin/ninja' +PYTHON='/usr/local/bin/python3' +PIP3='/usr/local/bin/pip-3.8' +PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd' diff --git a/.gitlab-ci.d/cirrus/macos-11.vars b/.gitlab-ci.d/cirrus/macos-11.vars new file mode 100644 index 00000000000..cbec8a44a35 --- /dev/null +++ b/.gitlab-ci.d/cirrus/macos-11.vars @@ -0,0 +1,15 @@ +# THIS FILE WAS AUTO-GENERATED +# +# $ lcitool variables macos-11 qemu +# +# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1 + +PACKAGING_COMMAND='brew' +CCACHE='/usr/local/bin/ccache' +MAKE='/usr/local/bin/gmake' +NINJA='/usr/local/bin/ninja' +PYTHON='/usr/local/bin/python3' +PIP3='/usr/local/bin/pip3' +PKGS='bash bc bzip2 capstone ccache cpanminus ctags curl dbus diffutils gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb libxml2 llvm lzo make meson ncurses nettle ninja perl pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy sparse spice-protocol tesseract texinfo usbredir vde vte3 zlib zstd' +PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme virtualenv' +CPAN_PKGS='Test::Harness' diff --git a/.gitlab-ci.d/container-core.yml b/.gitlab-ci.d/container-core.yml new file mode 100644 index 00000000000..e8dd1f476a2 --- /dev/null +++ b/.gitlab-ci.d/container-core.yml @@ -0,0 +1,17 @@ +include: + - local: '/.gitlab-ci.d/container-template.yml' + +amd64-centos8-container: + extends: .container_job_template + variables: + NAME: centos8 + +amd64-fedora-container: + extends: .container_job_template + variables: + NAME: fedora + +amd64-debian10-container: + extends: .container_job_template + variables: + NAME: debian10 diff --git a/.gitlab-ci.d/container-cross.yml b/.gitlab-ci.d/container-cross.yml new file mode 100644 index 00000000000..a3b5b905520 --- /dev/null +++ b/.gitlab-ci.d/container-cross.yml @@ -0,0 +1,193 @@ +alpha-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-alpha-cross + +amd64-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-amd64-cross + +amd64-debian-user-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-all-test-cross + +arm64-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-arm64-cross + +arm64-test-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian11-container'] + variables: + NAME: debian-arm64-test-cross + +armel-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-armel-cross + +armhf-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-armhf-cross + +# We never want to build hexagon in the CI system and by default we +# always want to refer to the master registry where it lives. +hexagon-cross-container: + image: docker:stable + stage: containers + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project"' + when: never + - when: always + variables: + NAME: debian-hexagon-cross + GIT_DEPTH: 1 + services: + - docker:dind + before_script: + - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest" + - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/qemu/$NAME:latest" + - docker info + - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" + script: + - echo "TAG:$TAG" + - echo "COMMON_TAG:$COMMON_TAG" + - docker pull $COMMON_TAG + - docker tag $COMMON_TAG $TAG + - docker push "$TAG" + after_script: + - docker logout + +hppa-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-hppa-cross + +m68k-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-m68k-cross + +mips64-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-mips64-cross + +mips64el-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-mips64el-cross + +mips-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-mips-cross + +mipsel-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-mipsel-cross + +powerpc-test-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian11-container'] + variables: + NAME: debian-powerpc-test-cross + +ppc64el-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-ppc64el-cross + +riscv64-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + # as we are currently based on 'sid/unstable' we may break so... + allow_failure: true + variables: + NAME: debian-riscv64-cross + +s390x-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-s390x-cross + +sh4-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-sh4-cross + +sparc64-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-sparc64-cross + +tricore-debian-cross-container: + extends: .container_job_template + stage: containers-layer2 + needs: ['amd64-debian10-container'] + variables: + NAME: debian-tricore-cross + +xtensa-debian-cross-container: + extends: .container_job_template + variables: + NAME: debian-xtensa-cross + +cris-fedora-cross-container: + extends: .container_job_template + variables: + NAME: fedora-cris-cross + +i386-fedora-cross-container: + extends: .container_job_template + variables: + NAME: fedora-i386-cross + +win32-fedora-cross-container: + extends: .container_job_template + variables: + NAME: fedora-win32-cross + +win64-fedora-cross-container: + extends: .container_job_template + variables: + NAME: fedora-win64-cross diff --git a/.gitlab-ci.d/container-template.yml b/.gitlab-ci.d/container-template.yml new file mode 100644 index 00000000000..1baecd94606 --- /dev/null +++ b/.gitlab-ci.d/container-template.yml @@ -0,0 +1,21 @@ +.container_job_template: + image: docker:stable + stage: containers + services: + - docker:dind + before_script: + - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest" + - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/$NAME:latest" + - apk add python3 + - docker info + - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" + script: + - echo "TAG:$TAG" + - echo "COMMON_TAG:$COMMON_TAG" + - ./tests/docker/docker.py --engine docker build + -t "qemu/$NAME" -f "tests/docker/dockerfiles/$NAME.docker" + -r $CI_REGISTRY/qemu-project/qemu + - docker tag "qemu/$NAME" "$TAG" + - docker push "$TAG" + after_script: + - docker logout diff --git a/.gitlab-ci.d/containers.yml b/.gitlab-ci.d/containers.yml index 33e4046e233..cd06d3f5f49 100644 --- a/.gitlab-ci.d/containers.yml +++ b/.gitlab-ci.d/containers.yml @@ -1,251 +1,45 @@ -.container_job_template: &container_job_definition - image: docker:stable - stage: containers - services: - - docker:dind - before_script: - - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest" - - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/$NAME:latest" - - apk add python3 - - docker info - - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" - script: - - echo "TAG:$TAG" - - echo "COMMON_TAG:$COMMON_TAG" - - docker pull "$TAG" || docker pull "$COMMON_TAG" || true - - ./tests/docker/docker.py --engine docker build - -t "qemu/$NAME" -f "tests/docker/dockerfiles/$NAME.docker" - -r $CI_REGISTRY_IMAGE - - docker tag "qemu/$NAME" "$TAG" - - docker push "$TAG" - after_script: - - docker logout +include: + - local: '/.gitlab-ci.d/container-core.yml' + - local: '/.gitlab-ci.d/container-cross.yml' amd64-alpine-container: - <<: *container_job_definition + extends: .container_job_template variables: NAME: alpine -amd64-centos7-container: - <<: *container_job_definition - variables: - NAME: centos7 - -amd64-centos8-container: - <<: *container_job_definition - variables: - NAME: centos8 - -amd64-debian10-container: - <<: *container_job_definition - variables: - NAME: debian10 - amd64-debian11-container: - <<: *container_job_definition + extends: .container_job_template variables: NAME: debian11 -alpha-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-alpha-cross - -amd64-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-amd64-cross - -amd64-debian-user-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-all-test-cross - amd64-debian-container: - <<: *container_job_definition + extends: .container_job_template stage: containers-layer2 needs: ['amd64-debian10-container'] variables: NAME: debian-amd64 -arm64-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-arm64-cross - -arm64-test-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian11-container'] - variables: - NAME: debian-arm64-test-cross - -armel-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-armel-cross - -armhf-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-armhf-cross - -hppa-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-hppa-cross - -m68k-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-m68k-cross - -mips64-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-mips64-cross - -mips64el-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-mips64el-cross - -mips-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-mips-cross - -mipsel-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-mipsel-cross - -powerpc-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-powerpc-cross - -ppc64-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-ppc64-cross - -ppc64el-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-ppc64el-cross - -riscv64-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-riscv64-cross - -s390x-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-s390x-cross - -sh4-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-sh4-cross - -sparc64-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-sparc64-cross - -tricore-debian-cross-container: - <<: *container_job_definition - stage: containers-layer2 - needs: ['amd64-debian10-container'] - variables: - NAME: debian-tricore-cross - -xtensa-debian-cross-container: - <<: *container_job_definition - variables: - NAME: debian-xtensa-cross - -cris-fedora-cross-container: - <<: *container_job_definition - variables: - NAME: fedora-cris-cross - -amd64-fedora-container: - <<: *container_job_definition - variables: - NAME: fedora - -i386-fedora-cross-container: - <<: *container_job_definition - variables: - NAME: fedora-i386-cross - -win32-fedora-cross-container: - <<: *container_job_definition - variables: - NAME: fedora-win32-cross - -win64-fedora-cross-container: - <<: *container_job_definition - variables: - NAME: fedora-win64-cross - amd64-ubuntu1804-container: - <<: *container_job_definition + extends: .container_job_template variables: NAME: ubuntu1804 amd64-ubuntu2004-container: - <<: *container_job_definition + extends: .container_job_template variables: NAME: ubuntu2004 amd64-ubuntu-container: - <<: *container_job_definition + extends: .container_job_template variables: NAME: ubuntu amd64-opensuse-leap-container: - <<: *container_job_definition + extends: .container_job_template variables: NAME: opensuse-leap + +python-container: + extends: .container_job_template + variables: + NAME: python diff --git a/.gitlab-ci.d/crossbuild-template.yml b/.gitlab-ci.d/crossbuild-template.yml new file mode 100644 index 00000000000..10d22dcf6c1 --- /dev/null +++ b/.gitlab-ci.d/crossbuild-template.yml @@ -0,0 +1,47 @@ +.cross_system_build_job: + stage: build + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + timeout: 80m + script: + - mkdir build + - cd build + - PKG_CONFIG_PATH=$PKG_CONFIG_PATH + ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS + --disable-user --target-list-exclude="arm-softmmu cris-softmmu + i386-softmmu microblaze-softmmu mips-softmmu mipsel-softmmu + mips64-softmmu ppc-softmmu riscv32-softmmu sh4-softmmu + sparc-softmmu xtensa-softmmu $CROSS_SKIP_TARGETS" + - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS + - if grep -q "EXESUF=.exe" config-host.mak; + then make installer; + version="$(git describe --match v[0-9]*)"; + mv -v qemu-setup*.exe qemu-setup-${version}.exe; + fi + +# Job to cross-build specific accelerators. +# +# Set the $ACCEL variable to select the specific accelerator (default to +# KVM), and set extra options (such disabling other accelerators) via the +# $EXTRA_CONFIGURE_OPTS variable. +.cross_accel_build_job: + stage: build + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + timeout: 30m + script: + - mkdir build + - cd build + - PKG_CONFIG_PATH=$PKG_CONFIG_PATH + ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS + --disable-tools --enable-${ACCEL:-kvm} $EXTRA_CONFIGURE_OPTS + - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS + +.cross_user_build_job: + stage: build + image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest + script: + - mkdir build + - cd build + - PKG_CONFIG_PATH=$PKG_CONFIG_PATH + ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS + --disable-system + - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS diff --git a/.gitlab-ci.d/crossbuilds.yml b/.gitlab-ci.d/crossbuilds.yml index 2d95784ed51..17d6cb3e458 100644 --- a/.gitlab-ci.d/crossbuilds.yml +++ b/.gitlab-ci.d/crossbuilds.yml @@ -1,44 +1,5 @@ -.cross_system_build_job: - stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest - timeout: 80m - script: - - mkdir build - - cd build - - PKG_CONFIG_PATH=$PKG_CONFIG_PATH - ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS - --disable-user --target-list-exclude="arm-softmmu cris-softmmu - i386-softmmu microblaze-softmmu mips-softmmu mipsel-softmmu - mips64-softmmu ppc-softmmu sh4-softmmu xtensa-softmmu" - - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS - -# Job to cross-build specific accelerators. -# -# Set the $ACCEL variable to select the specific accelerator (default to -# KVM), and set extra options (such disabling other accelerators) via the -# $ACCEL_CONFIGURE_OPTS variable. -.cross_accel_build_job: - stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest - timeout: 30m - script: - - mkdir build - - cd build - - PKG_CONFIG_PATH=$PKG_CONFIG_PATH - ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS - --disable-tools --enable-${ACCEL:-kvm} $ACCEL_CONFIGURE_OPTS - - make -j$(expr $(nproc) + 1) all check-build - -.cross_user_build_job: - stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest - script: - - mkdir build - - cd build - - PKG_CONFIG_PATH=$PKG_CONFIG_PATH - ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS - --disable-system - - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS +include: + - local: '/.gitlab-ci.d/crossbuild-template.yml' cross-armel-system: extends: .cross_system_build_job @@ -98,6 +59,15 @@ cross-i386-user: IMAGE: fedora-i386-cross MAKE_CHECK_ARGS: check +cross-i386-tci: + extends: .cross_accel_build_job + timeout: 60m + variables: + IMAGE: fedora-i386-cross + ACCEL: tcg-interpreter + EXTRA_CONFIGURE_OPTS: --target-list=i386-softmmu,i386-linux-user,aarch64-softmmu,aarch64-linux-user,ppc-softmmu,ppc-linux-user + MAKE_CHECK_ARGS: check check-tcg + cross-mips-system: extends: .cross_system_build_job needs: @@ -154,6 +124,25 @@ cross-ppc64el-user: variables: IMAGE: debian-ppc64el-cross +# The riscv64 cross-builds currently use a 'sid' container to get +# compilers and libraries. Until something more stable is found we +# allow_failure so as not to block CI. +cross-riscv64-system: + extends: .cross_system_build_job + allow_failure: true + needs: + job: riscv64-debian-cross-container + variables: + IMAGE: debian-riscv64-cross + +cross-riscv64-user: + extends: .cross_user_build_job + allow_failure: true + needs: + job: riscv64-debian-cross-container + variables: + IMAGE: debian-riscv64-cross + cross-s390x-system: extends: .cross_system_build_job needs: @@ -174,7 +163,15 @@ cross-s390x-kvm-only: job: s390x-debian-cross-container variables: IMAGE: debian-s390x-cross - ACCEL_CONFIGURE_OPTS: --disable-tcg + EXTRA_CONFIGURE_OPTS: --disable-tcg + +cross-mips64el-kvm-only: + extends: .cross_accel_build_job + needs: + job: mips64el-debian-cross-container + variables: + IMAGE: debian-mips64el-cross + EXTRA_CONFIGURE_OPTS: --disable-tcg --target-list=mips64el-softmmu cross-win32-system: extends: .cross_system_build_job @@ -182,6 +179,11 @@ cross-win32-system: job: win32-fedora-cross-container variables: IMAGE: fedora-win32-cross + CROSS_SKIP_TARGETS: alpha-softmmu avr-softmmu hppa-softmmu m68k-softmmu + microblazeel-softmmu mips64el-softmmu nios2-softmmu + artifacts: + paths: + - build/qemu-setup*.exe cross-win64-system: extends: .cross_system_build_job @@ -189,6 +191,11 @@ cross-win64-system: job: win64-fedora-cross-container variables: IMAGE: fedora-win64-cross + CROSS_SKIP_TARGETS: or1k-softmmu rx-softmmu sh4eb-softmmu sparc64-softmmu + tricore-softmmu xtensaeb-softmmu + artifacts: + paths: + - build/qemu-setup*.exe cross-amd64-xen-only: extends: .cross_accel_build_job @@ -197,7 +204,7 @@ cross-amd64-xen-only: variables: IMAGE: debian-amd64-cross ACCEL: xen - ACCEL_CONFIGURE_OPTS: --disable-tcg --disable-kvm + EXTRA_CONFIGURE_OPTS: --disable-tcg --disable-kvm cross-arm64-xen-only: extends: .cross_accel_build_job @@ -206,4 +213,4 @@ cross-arm64-xen-only: variables: IMAGE: debian-arm64-cross ACCEL: xen - ACCEL_CONFIGURE_OPTS: --disable-tcg --disable-kvm + EXTRA_CONFIGURE_OPTS: --disable-tcg --disable-kvm diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml new file mode 100644 index 00000000000..056c374619b --- /dev/null +++ b/.gitlab-ci.d/custom-runners.yml @@ -0,0 +1,19 @@ +# The CI jobs defined here require GitLab runners installed and +# registered on machines that match their operating system names, +# versions and architectures. This is in contrast to the other CI +# jobs that are intended to run on GitLab's "shared" runners. + +# Different than the default approach on "shared" runners, based on +# containers, the custom runners have no such *requirement*, as those +# jobs should be capable of running on operating systems with no +# compatible container implementation, or no support from +# gitlab-runner. To avoid problems that gitlab-runner can cause while +# reusing the GIT repository, let's enable the clone strategy, which +# guarantees a fresh repository on each job run. +variables: + GIT_STRATEGY: clone + +include: + - local: '/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml' + - local: '/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml' + - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml' diff --git a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml new file mode 100644 index 00000000000..49aa703f55c --- /dev/null +++ b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml @@ -0,0 +1,28 @@ +centos-stream-8-x86_64: + allow_failure: true + needs: [] + stage: build + tags: + - centos_stream_8 + - x86_64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE" + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + when: on_failure + expire_in: 7 days + paths: + - build/tests/results/latest/results.xml + - build/tests/results/latest/test-results + reports: + junit: build/tests/results/latest/results.xml + before_script: + - JOBS=$(expr $(nproc) + 1) + script: + - mkdir build + - cd build + - ../scripts/ci/org.centos/stream/8/x86_64/configure + - make -j"$JOBS" + - make NINJA=":" check + - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado diff --git a/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml new file mode 100644 index 00000000000..f39d874a1e1 --- /dev/null +++ b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml @@ -0,0 +1,118 @@ +# All ubuntu-18.04 jobs should run successfully in an environment +# setup by the scripts/ci/setup/build-environment.yml task +# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" + +ubuntu-18.04-s390x-all-linux-static: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$S390X_RUNNER_AVAILABLE" + script: + # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 + # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages + - mkdir build + - cd build + - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + - make --output-sync -j`nproc` check-tcg V=1 + +ubuntu-18.04-s390x-all: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$S390X_RUNNER_AVAILABLE" + script: + - mkdir build + - cd build + - ../configure --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-alldbg: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --enable-debug --disable-libssh + - make clean + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-clang: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-tci: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --enable-tcg-interpreter + - make --output-sync -j`nproc` + +ubuntu-18.04-s390x-notcg: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --disable-tcg + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 diff --git a/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml new file mode 100644 index 00000000000..920e388bd05 --- /dev/null +++ b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml @@ -0,0 +1,118 @@ +# All ubuntu-20.04 jobs should run successfully in an environment +# setup by the scripts/ci/setup/qemu/build-environment.yml task +# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" + +ubuntu-20.04-aarch64-all-linux-static: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$AARCH64_RUNNER_AVAILABLE" + script: + # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 + # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages + - mkdir build + - cd build + - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + - make --output-sync -j`nproc` check-tcg V=1 + +ubuntu-20.04-aarch64-all: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-alldbg: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$AARCH64_RUNNER_AVAILABLE" + script: + - mkdir build + - cd build + - ../configure --enable-debug --disable-libssh + - make clean + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-clang: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-tci: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --enable-tcg-interpreter + - make --output-sync -j`nproc` + +ubuntu-20.04-aarch64-notcg: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --disable-tcg + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 diff --git a/.gitlab-ci.d/edk2.yml b/.gitlab-ci.d/edk2.yml index ba7280605c4..13d0f8b019f 100644 --- a/.gitlab-ci.d/edk2.yml +++ b/.gitlab-ci.d/edk2.yml @@ -1,10 +1,22 @@ -docker-edk2: - stage: containers - rules: # Only run this job when the Dockerfile is modified +# All jobs needing docker-edk2 must use the same rules it uses. +.edk2_job_rules: + rules: # Only run this job when ... - changes: + # this file is modified - .gitlab-ci.d/edk2.yml + # or the Dockerfile is modified - .gitlab-ci.d/edk2/Dockerfile - when: always + # or roms/edk2/ is modified (submodule updated) + - roms/edk2/* + when: on_success + - if: '$CI_COMMIT_REF_NAME =~ /^edk2/' # or the branch/tag starts with 'edk2' + when: on_success + - if: '$CI_COMMIT_MESSAGE =~ /edk2/i' # or last commit description contains 'EDK2' + when: on_success + +docker-edk2: + extends: .edk2_job_rules + stage: containers image: docker:19.03.1 services: - docker:19.03.1-dind @@ -24,16 +36,9 @@ docker-edk2: - docker push $IMAGE_TAG build-edk2: + extends: .edk2_job_rules stage: build needs: ['docker-edk2'] - rules: # Only run this job when ... - - changes: # ... roms/edk2/ is modified (submodule updated) - - roms/edk2/* - when: always - - if: '$CI_COMMIT_REF_NAME =~ /^edk2/' # or the branch/tag starts with 'edk2' - when: always - - if: '$CI_COMMIT_MESSAGE =~ /edk2/i' # or last commit description contains 'EDK2' - when: always artifacts: paths: # 'artifacts.zip' will contains the following files: - pc-bios/edk2*bz2 @@ -45,7 +50,11 @@ build-edk2: GIT_DEPTH: 3 script: # Clone the required submodules and build EDK2 - git submodule update --init roms/edk2 - - git -C roms/edk2 submodule update --init + - git -C roms/edk2 submodule update --init -- + ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3 + BaseTools/Source/C/BrotliCompress/brotli + CryptoPkg/Library/OpensslLib/openssl + MdeModulePkg/Library/BrotliCustomDecompressLib/brotli - export JOBS=$(($(getconf _NPROCESSORS_ONLN) + 1)) - echo "=== Using ${JOBS} simultaneous jobs ===" - make -j${JOBS} -C roms efi 2>&1 1>edk2-stdout.log | tee -a edk2-stderr.log >&2 diff --git a/.gitlab-ci.d/opensbi.yml b/.gitlab-ci.d/opensbi.yml index f66cd1d9089..5e0a2477c5d 100644 --- a/.gitlab-ci.d/opensbi.yml +++ b/.gitlab-ci.d/opensbi.yml @@ -1,10 +1,23 @@ -docker-opensbi: - stage: containers - rules: # Only run this job when the Dockerfile is modified +# All jobs needing docker-opensbi must use the same rules it uses. +.opensbi_job_rules: + rules: # Only run this job when ... - changes: + # this file is modified - .gitlab-ci.d/opensbi.yml + # or the Dockerfile is modified - .gitlab-ci.d/opensbi/Dockerfile - when: always + when: on_success + - changes: # or roms/opensbi/ is modified (submodule updated) + - roms/opensbi/* + when: on_success + - if: '$CI_COMMIT_REF_NAME =~ /^opensbi/' # or the branch/tag starts with 'opensbi' + when: on_success + - if: '$CI_COMMIT_MESSAGE =~ /opensbi/i' # or last commit description contains 'OpenSBI' + when: on_success + +docker-opensbi: + extends: .opensbi_job_rules + stage: containers image: docker:19.03.1 services: - docker:19.03.1-dind @@ -24,16 +37,9 @@ docker-opensbi: - docker push $IMAGE_TAG build-opensbi: + extends: .opensbi_job_rules stage: build needs: ['docker-opensbi'] - rules: # Only run this job when ... - - changes: # ... roms/opensbi/ is modified (submodule updated) - - roms/opensbi/* - when: always - - if: '$CI_COMMIT_REF_NAME =~ /^opensbi/' # or the branch/tag starts with 'opensbi' - when: always - - if: '$CI_COMMIT_MESSAGE =~ /opensbi/i' # or last commit description contains 'OpenSBI' - when: always artifacts: paths: # 'artifacts.zip' will contains the following files: - pc-bios/opensbi-riscv32-generic-fw_dynamic.bin diff --git a/.gitlab-ci.d/qemu-project.yml b/.gitlab-ci.d/qemu-project.yml new file mode 100644 index 00000000000..b3d79bc429b --- /dev/null +++ b/.gitlab-ci.d/qemu-project.yml @@ -0,0 +1,13 @@ +# This file contains the set of jobs run by the QEMU project: +# https://gitlab.com/qemu-project/qemu/-/pipelines + +include: + - local: '/.gitlab-ci.d/stages.yml' + - local: '/.gitlab-ci.d/edk2.yml' + - local: '/.gitlab-ci.d/opensbi.yml' + - local: '/.gitlab-ci.d/containers.yml' + - local: '/.gitlab-ci.d/crossbuilds.yml' + - local: '/.gitlab-ci.d/buildtest.yml' + - local: '/.gitlab-ci.d/static_checks.yml' + - local: '/.gitlab-ci.d/custom-runners.yml' + - local: '/.gitlab-ci.d/cirrus.yml' diff --git a/.gitlab-ci.d/stages.yml b/.gitlab-ci.d/stages.yml new file mode 100644 index 00000000000..f50826018df --- /dev/null +++ b/.gitlab-ci.d/stages.yml @@ -0,0 +1,8 @@ +# Currently we have two build stages after our containers are built: +# - build (for traditional build and test or first stage build) +# - test (for test stages, using build artefacts from a build stage) +stages: + - containers + - containers-layer2 + - build + - test diff --git a/.gitlab-ci.d/static_checks.yml b/.gitlab-ci.d/static_checks.yml new file mode 100644 index 00000000000..902843f8b3c --- /dev/null +++ b/.gitlab-ci.d/static_checks.yml @@ -0,0 +1,49 @@ +check-patch: + stage: build + image: $CI_REGISTRY_IMAGE/qemu/centos8:latest + needs: + job: amd64-centos8-container + script: + - .gitlab-ci.d/check-patch.py + variables: + GIT_DEPTH: 1000 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project"' + when: never + - when: on_success + allow_failure: true + +check-dco: + stage: build + image: $CI_REGISTRY_IMAGE/qemu/centos8:latest + needs: + job: amd64-centos8-container + script: .gitlab-ci.d/check-dco.py + variables: + GIT_DEPTH: 1000 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH' + when: never + - when: on_success + +check-python-pipenv: + stage: test + image: $CI_REGISTRY_IMAGE/qemu/python:latest + script: + - make -C python check-pipenv + variables: + GIT_DEPTH: 1 + needs: + job: python-container + +check-python-tox: + stage: test + image: $CI_REGISTRY_IMAGE/qemu/python:latest + script: + - make -C python check-tox + variables: + GIT_DEPTH: 1 + QEMU_TOX_EXTRA_ARGS: --skip-missing-interpreters=false + needs: + job: python-container + allow_failure: true diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 52d65d6c04f..9762dda2ee3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,837 +1,24 @@ -# Currently we have two build stages after our containers are built: -# - build (for traditional build and test or first stage build) -# - test (for test stages, using build artefacts from a build stage) -stages: - - containers - - containers-layer2 - - build - - test - -include: - - local: '/.gitlab-ci.d/edk2.yml' - - local: '/.gitlab-ci.d/opensbi.yml' - - local: '/.gitlab-ci.d/containers.yml' - - local: '/.gitlab-ci.d/crossbuilds.yml' - -.native_build_job_template: &native_build_job_definition - stage: build - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest - before_script: - - JOBS=$(expr $(nproc) + 1) - script: - - mkdir build - - cd build - - if test -n "$TARGETS"; - then - ../configure --enable-werror --disable-docs $CONFIGURE_ARGS --target-list="$TARGETS" ; - else - ../configure --enable-werror --disable-docs $CONFIGURE_ARGS ; - fi || { cat config.log meson-logs/meson-log.txt && exit 1; } - - if test -n "$LD_JOBS"; - then - meson configure . -Dbackend_max_links="$LD_JOBS" ; - fi || exit 1; - - make -j"$JOBS" - - if test -n "$MAKE_CHECK_ARGS"; - then - make -j"$JOBS" $MAKE_CHECK_ARGS ; - fi - -.native_test_job_template: &native_test_job_definition - stage: test - image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest - script: - - scripts/git-submodule.sh update - $(sed -n '/GIT_SUBMODULES=/ s/.*=// p' build/config-host.mak) - - cd build - - find . -type f -exec touch {} + - # Avoid recompiling by hiding ninja with NINJA=":" - - make NINJA=":" $MAKE_CHECK_ARGS - -.acceptance_template: &acceptance_definition - cache: - key: "${CI_JOB_NAME}-cache" - paths: - - ${CI_PROJECT_DIR}/avocado-cache - policy: pull-push - artifacts: - name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" - when: always - expire_in: 2 days - paths: - - build/tests/results/latest/results.xml - - build/tests/results/latest/test-results - reports: - junit: build/tests/results/latest/results.xml - before_script: - - mkdir -p ~/.config/avocado - - echo "[datadir.paths]" > ~/.config/avocado/avocado.conf - - echo "cache_dirs = ['${CI_PROJECT_DIR}/avocado-cache']" - >> ~/.config/avocado/avocado.conf - - echo -e '[job.output.testlogs]\nstatuses = ["FAIL", "INTERRUPT"]' - >> ~/.config/avocado/avocado.conf - - if [ -d ${CI_PROJECT_DIR}/avocado-cache ]; then - du -chs ${CI_PROJECT_DIR}/avocado-cache ; - fi - - export AVOCADO_ALLOW_UNTRUSTED_CODE=1 - after_script: - - cd build - - du -chs ${CI_PROJECT_DIR}/avocado-cache - -build-system-alpine: - <<: *native_build_job_definition - needs: - - job: amd64-alpine-container - variables: - IMAGE: alpine - TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu - moxie-softmmu microblazeel-softmmu mips64el-softmmu - MAKE_CHECK_ARGS: check-build - CONFIGURE_ARGS: --enable-docs --enable-trace-backends=log,simple,syslog - artifacts: - expire_in: 2 days - paths: - - .git-submodule-status - - build - -check-system-alpine: - <<: *native_test_job_definition - needs: - - job: build-system-alpine - artifacts: true - variables: - IMAGE: alpine - MAKE_CHECK_ARGS: check - -acceptance-system-alpine: - <<: *native_test_job_definition - needs: - - job: build-system-alpine - artifacts: true - variables: - IMAGE: alpine - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -build-system-ubuntu: - <<: *native_build_job_definition - needs: - job: amd64-ubuntu2004-container - variables: - IMAGE: ubuntu2004 - CONFIGURE_ARGS: --enable-docs --enable-fdt=system --enable-slirp=system - TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu - moxie-softmmu microblazeel-softmmu mips64el-softmmu - MAKE_CHECK_ARGS: check-build - artifacts: - expire_in: 2 days - paths: - - build - -check-system-ubuntu: - <<: *native_test_job_definition - needs: - - job: build-system-ubuntu - artifacts: true - variables: - IMAGE: ubuntu2004 - MAKE_CHECK_ARGS: check - -acceptance-system-ubuntu: - <<: *native_test_job_definition - needs: - - job: build-system-ubuntu - artifacts: true - variables: - IMAGE: ubuntu2004 - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -build-system-debian: - <<: *native_build_job_definition - needs: - job: amd64-debian-container - variables: - IMAGE: debian-amd64 - CONFIGURE_ARGS: --enable-fdt=system - TARGETS: arm-softmmu avr-softmmu i386-softmmu mipsel-softmmu - riscv64-softmmu sh4eb-softmmu sparc-softmmu xtensaeb-softmmu - MAKE_CHECK_ARGS: check-build - artifacts: - expire_in: 2 days - paths: - - build - -check-system-debian: - <<: *native_test_job_definition - needs: - - job: build-system-debian - artifacts: true - variables: - IMAGE: debian-amd64 - MAKE_CHECK_ARGS: check - -acceptance-system-debian: - <<: *native_test_job_definition - needs: - - job: build-system-debian - artifacts: true - variables: - IMAGE: debian-amd64 - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -build-system-fedora: - <<: *native_build_job_definition - needs: - job: amd64-fedora-container - variables: - IMAGE: fedora - CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs - --enable-fdt=system --enable-slirp=system --enable-capstone=system - TARGETS: tricore-softmmu microblaze-softmmu mips-softmmu - xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu - MAKE_CHECK_ARGS: check-build - artifacts: - expire_in: 2 days - paths: - - build - -check-system-fedora: - <<: *native_test_job_definition - needs: - - job: build-system-fedora - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check - -acceptance-system-fedora: - <<: *native_test_job_definition - needs: - - job: build-system-fedora - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -build-system-centos: - <<: *native_build_job_definition - needs: - job: amd64-centos8-container - variables: - IMAGE: centos8 - CONFIGURE_ARGS: --disable-nettle --enable-gcrypt --enable-fdt=system - --enable-modules --enable-trace-backends=dtrace - TARGETS: ppc64-softmmu or1k-softmmu s390x-softmmu - x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu - MAKE_CHECK_ARGS: check-build - artifacts: - expire_in: 2 days - paths: - - build - -check-system-centos: - <<: *native_test_job_definition - needs: - - job: build-system-centos - artifacts: true - variables: - IMAGE: centos8 - MAKE_CHECK_ARGS: check - -acceptance-system-centos: - <<: *native_test_job_definition - needs: - - job: build-system-centos - artifacts: true - variables: - IMAGE: centos8 - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -build-system-opensuse: - <<: *native_build_job_definition - needs: - job: amd64-opensuse-leap-container - variables: - IMAGE: opensuse-leap - CONFIGURE_ARGS: --enable-fdt=system - TARGETS: s390x-softmmu x86_64-softmmu aarch64-softmmu - MAKE_CHECK_ARGS: check-build - artifacts: - expire_in: 2 days - paths: - - build - -check-system-opensuse: - <<: *native_test_job_definition - needs: - - job: build-system-opensuse - artifacts: true - variables: - IMAGE: opensuse-leap - MAKE_CHECK_ARGS: check - -acceptance-system-opensuse: - <<: *native_test_job_definition - needs: - - job: build-system-opensuse - artifacts: true - variables: - IMAGE: opensuse-leap - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - - -build-disabled: - <<: *native_build_job_definition - needs: - job: amd64-fedora-container - variables: - IMAGE: fedora - CONFIGURE_ARGS: - --disable-attr - --disable-auth-pam - --disable-avx2 - --disable-bochs - --disable-brlapi - --disable-bzip2 - --disable-cap-ng - --disable-capstone - --disable-cloop - --disable-coroutine-pool - --disable-curl - --disable-curses - --disable-dmg - --disable-docs - --disable-gcrypt - --disable-glusterfs - --disable-gnutls - --disable-gtk - --disable-guest-agent - --disable-iconv - --disable-keyring - --disable-kvm - --disable-libiscsi - --disable-libpmem - --disable-libssh - --disable-libudev - --disable-libusb - --disable-libxml2 - --disable-linux-aio - --disable-live-block-migration - --disable-lzo - --disable-malloc-trim - --disable-mpath - --disable-nettle - --disable-numa - --disable-opengl - --disable-parallels - --disable-pie - --disable-qcow1 - --disable-qed - --disable-qom-cast-debug - --disable-rbd - --disable-rdma - --disable-replication - --disable-sdl - --disable-seccomp - --disable-sheepdog - --disable-slirp - --disable-smartcard - --disable-snappy - --disable-sparse - --disable-spice - --disable-strip - --disable-tpm - --disable-usb-redir - --disable-vdi - --disable-vhost-crypto - --disable-vhost-net - --disable-vhost-scsi - --disable-vhost-kernel - --disable-vhost-user - --disable-vhost-vdpa - --disable-vhost-vsock - --disable-virglrenderer - --disable-vnc - --disable-vte - --disable-vvfat - --disable-xen - --disable-zstd - TARGETS: arm-softmmu i386-softmmu ppc64-softmmu mips64-softmmu - s390x-softmmu i386-linux-user - MAKE_CHECK_ARGS: check-qtest SPEED=slow - -# This jobs explicitly disable TCG (--disable-tcg), KVM is detected by -# the configure script. The container doesn't contain Xen headers so -# Xen accelerator is not detected / selected. As result it build the -# i386-softmmu and x86_64-softmmu with KVM being the single accelerator -# available. -# Also use a different coroutine implementation (which is only really of -# interest to KVM users, i.e. with TCG disabled) -build-tcg-disabled: - <<: *native_build_job_definition - needs: - job: amd64-centos8-container - variables: - IMAGE: centos8 - script: - - mkdir build - - cd build - - ../configure --disable-tcg --audio-drv-list="" --with-coroutine=ucontext - || { cat config.log meson-logs/meson-log.txt && exit 1; } - - make -j"$JOBS" - - make check-unit - - make check-qapi-schema - - cd tests/qemu-iotests/ - - ./check -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 048 - 052 063 077 086 101 104 106 113 148 150 151 152 157 159 160 163 - 170 171 183 184 192 194 197 208 215 221 222 226 227 236 253 277 - - ./check -qcow2 028 051 056 057 058 065 068 082 085 091 095 096 102 122 - 124 132 139 142 144 145 151 152 155 157 165 194 196 197 200 202 - 208 209 215 216 218 222 227 234 246 247 248 250 254 255 257 258 - 260 261 262 263 264 270 272 273 277 279 - -build-user: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - CONFIGURE_ARGS: --disable-tools --disable-system - MAKE_CHECK_ARGS: check-tcg - -build-user-static: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - CONFIGURE_ARGS: --disable-tools --disable-system --static - MAKE_CHECK_ARGS: check-tcg - -# Only build the softmmu targets we have check-tcg tests for -build-some-softmmu: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - CONFIGURE_ARGS: --disable-tools --enable-debug - TARGETS: xtensa-softmmu arm-softmmu aarch64-softmmu alpha-softmmu - MAKE_CHECK_ARGS: check-tcg - -# Run check-tcg against linux-user (with plugins) -# we skip sparc64-linux-user until it has been fixed somewhat -# we skip cris-linux-user as it doesn't use the common run loop -build-user-plugins: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - CONFIGURE_ARGS: --disable-tools --disable-system --enable-plugins --enable-debug-tcg --target-list-exclude=sparc64-linux-user,cris-linux-user - MAKE_CHECK_ARGS: check-tcg - timeout: 1h 30m - -build-user-centos7: - <<: *native_build_job_definition - needs: - job: amd64-centos7-container - variables: - IMAGE: centos7 - CONFIGURE_ARGS: --disable-system --disable-tools --disable-docs - MAKE_CHECK_ARGS: check-tcg - -build-some-softmmu-plugins: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - CONFIGURE_ARGS: --disable-tools --disable-user --enable-plugins --enable-debug-tcg - TARGETS: xtensa-softmmu arm-softmmu aarch64-softmmu alpha-softmmu - MAKE_CHECK_ARGS: check-tcg - -clang-system: - <<: *native_build_job_definition - needs: - job: amd64-fedora-container - variables: - IMAGE: fedora - CONFIGURE_ARGS: --cc=clang --cxx=clang++ - --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined - TARGETS: alpha-softmmu arm-softmmu m68k-softmmu mips64-softmmu - ppc-softmmu s390x-softmmu - MAKE_CHECK_ARGS: check-qtest check-tcg - -clang-user: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - CONFIGURE_ARGS: --cc=clang --cxx=clang++ --disable-system - --target-list-exclude=microblazeel-linux-user,aarch64_be-linux-user,i386-linux-user,m68k-linux-user,mipsn32el-linux-user,xtensaeb-linux-user - --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined - MAKE_CHECK_ARGS: check-unit check-tcg - -# Set LD_JOBS=1 because this requires LTO and ld consumes a large amount of memory. -# On gitlab runners, default value sometimes end up calling 2 lds concurrently and -# triggers an Out-Of-Memory error # -# Since slirp callbacks are used in QEMU Timers, slirp needs to be compiled together -# with QEMU and linked as a static library to avoid false positives in CFI checks. -# This can be accomplished by using -enable-slirp=git, which avoids the use of -# a system-wide version of the library +# This is the GitLab CI configuration file for the mainstream QEMU +# project: https://gitlab.com/qemu-project/qemu/-/pipelines # -# Split in three sets of build/check/acceptance to limit the execution time of each -# job -build-cfi-aarch64: - <<: *native_build_job_definition - needs: - - job: amd64-fedora-container - variables: - LD_JOBS: 1 - AR: llvm-ar - IMAGE: fedora - CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug - --enable-safe-stack --enable-slirp=git - TARGETS: aarch64-softmmu - MAKE_CHECK_ARGS: check-build - timeout: 70m - artifacts: - expire_in: 2 days - paths: - - build - -check-cfi-aarch64: - <<: *native_test_job_definition - needs: - - job: build-cfi-aarch64 - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check - -acceptance-cfi-aarch64: - <<: *native_test_job_definition - needs: - - job: build-cfi-aarch64 - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -build-cfi-ppc64-s390x: - <<: *native_build_job_definition - needs: - - job: amd64-fedora-container - variables: - LD_JOBS: 1 - AR: llvm-ar - IMAGE: fedora - CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug - --enable-safe-stack --enable-slirp=git - TARGETS: ppc64-softmmu s390x-softmmu - MAKE_CHECK_ARGS: check-build - timeout: 70m - artifacts: - expire_in: 2 days - paths: - - build - -check-cfi-ppc64-s390x: - <<: *native_test_job_definition - needs: - - job: build-cfi-ppc64-s390x - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check - -acceptance-cfi-ppc64-s390x: - <<: *native_test_job_definition - needs: - - job: build-cfi-ppc64-s390x - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -build-cfi-x86_64: - <<: *native_build_job_definition - needs: - - job: amd64-fedora-container - variables: - LD_JOBS: 1 - AR: llvm-ar - IMAGE: fedora - CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug - --enable-safe-stack --enable-slirp=git - TARGETS: x86_64-softmmu - MAKE_CHECK_ARGS: check-build - timeout: 70m - artifacts: - expire_in: 2 days - paths: - - build - -check-cfi-x86_64: - <<: *native_test_job_definition - needs: - - job: build-cfi-x86_64 - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check - -acceptance-cfi-x86_64: - <<: *native_test_job_definition - needs: - - job: build-cfi-x86_64 - artifacts: true - variables: - IMAGE: fedora - MAKE_CHECK_ARGS: check-acceptance - <<: *acceptance_definition - -tsan-build: - <<: *native_build_job_definition - needs: - job: amd64-ubuntu2004-container - variables: - IMAGE: ubuntu2004 - CONFIGURE_ARGS: --enable-tsan --cc=clang-10 --cxx=clang++-10 - --enable-trace-backends=ust --enable-fdt=system --enable-slirp=system - TARGETS: x86_64-softmmu ppc64-softmmu riscv64-softmmu x86_64-linux-user - MAKE_CHECK_ARGS: bench V=1 - -# These targets are on the way out -build-deprecated: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - CONFIGURE_ARGS: --disable-tools - MAKE_CHECK_ARGS: build-tcg - TARGETS: ppc64abi32-linux-user lm32-softmmu unicore32-softmmu - artifacts: - expire_in: 2 days - paths: - - build - -# We split the check-tcg step as test failures are expected but we still -# want to catch the build breaking. -check-deprecated: - <<: *native_test_job_definition - needs: - - job: build-deprecated - artifacts: true - variables: - IMAGE: debian-all-test-cross - MAKE_CHECK_ARGS: check-tcg - allow_failure: true - -# gprof/gcov are GCC features -gprof-gcov: - <<: *native_build_job_definition - needs: - job: amd64-ubuntu2004-container - variables: - IMAGE: ubuntu2004 - CONFIGURE_ARGS: --enable-gprof --enable-gcov - MAKE_CHECK_ARGS: check - TARGETS: aarch64-softmmu ppc64-softmmu s390x-softmmu x86_64-softmmu - timeout: 70m - after_script: - - ${CI_PROJECT_DIR}/scripts/ci/coverage-summary.sh - -build-oss-fuzz: - <<: *native_build_job_definition - needs: - job: amd64-fedora-container - variables: - IMAGE: fedora - script: - - mkdir build-oss-fuzz - - CC="clang" CXX="clang++" CFLAGS="-fsanitize=address" - ./scripts/oss-fuzz/build.sh - - export ASAN_OPTIONS="fast_unwind_on_malloc=0" - - for fuzzer in $(find ./build-oss-fuzz/DEST_DIR/ -executable -type f - | grep -v slirp); do - grep "LLVMFuzzerTestOneInput" ${fuzzer} > /dev/null 2>&1 || continue ; - echo Testing ${fuzzer} ... ; - "${fuzzer}" -runs=1 -seed=1 || exit 1 ; - done - # Unrelated to fuzzer: run some tests with -fsanitize=address - - cd build-oss-fuzz && make check-qtest-i386 check-unit - -build-tci: - <<: *native_build_job_definition - needs: - job: amd64-debian-user-cross-container - variables: - IMAGE: debian-all-test-cross - script: - - TARGETS="aarch64 alpha arm hppa m68k microblaze moxie ppc64 s390x x86_64" - - mkdir build - - cd build - - ../configure --enable-tcg-interpreter - --target-list="$(for tg in $TARGETS; do echo -n ${tg}'-softmmu '; done)" || { cat config.log meson-logs/meson-log.txt && exit 1; } - - make -j"$JOBS" - - make tests/qtest/boot-serial-test tests/qtest/cdrom-test tests/qtest/pxe-test - - for tg in $TARGETS ; do - export QTEST_QEMU_BINARY="./qemu-system-${tg}" ; - ./tests/qtest/boot-serial-test || exit 1 ; - ./tests/qtest/cdrom-test || exit 1 ; - done - - QTEST_QEMU_BINARY="./qemu-system-x86_64" ./tests/qtest/pxe-test - - QTEST_QEMU_BINARY="./qemu-system-s390x" ./tests/qtest/pxe-test -m slow - - make check-tcg - -# Alternate coroutines implementations are only really of interest to KVM users -# However we can't test against KVM on Gitlab-CI so we can only run unit tests -build-coroutine-sigaltstack: - <<: *native_build_job_definition - needs: - job: amd64-ubuntu2004-container - variables: - IMAGE: ubuntu2004 - CONFIGURE_ARGS: --with-coroutine=sigaltstack --disable-tcg - --enable-trace-backends=ftrace - MAKE_CHECK_ARGS: check-unit - -# Most jobs test latest gcrypt or nettle builds +# !!! DO NOT ADD ANY NEW CONFIGURATION TO THIS FILE !!! +# +# Only documentation or comments is accepted. +# +# To use a different set of jobs than the mainstream QEMU project, +# you need to set the location of your custom yml file at "custom CI/CD +# configuration path", on your GitLab CI namespace: +# https://docs.gitlab.com/ee/ci/pipelines/settings.html#custom-cicd-configuration-path +# +# ---------------------------------------------------------------------- +# +# QEMU CI jobs are based on templates. Some templates provide +# user-configurable options, modifiable via configuration variables. +# +# See https://qemu-project.gitlab.io/qemu/devel/ci.html#custom-ci-cd-variables +# for more information. # -# These jobs test old gcrypt and nettle from RHEL7 -# which had some API differences. -crypto-old-nettle: - <<: *native_build_job_definition - needs: - job: amd64-centos7-container - variables: - IMAGE: centos7 - TARGETS: x86_64-softmmu x86_64-linux-user - CONFIGURE_ARGS: --disable-gcrypt --enable-nettle - MAKE_CHECK_ARGS: check - -crypto-old-gcrypt: - <<: *native_build_job_definition - needs: - job: amd64-centos7-container - variables: - IMAGE: centos7 - TARGETS: x86_64-softmmu x86_64-linux-user - CONFIGURE_ARGS: --disable-nettle --enable-gcrypt - MAKE_CHECK_ARGS: check - -crypto-only-gnutls: - <<: *native_build_job_definition - needs: - job: amd64-centos7-container - variables: - IMAGE: centos7 - TARGETS: x86_64-softmmu x86_64-linux-user - CONFIGURE_ARGS: --disable-nettle --disable-gcrypt --enable-gnutls - MAKE_CHECK_ARGS: check - - -# Check our reduced build configurations -build-without-default-devices: - <<: *native_build_job_definition - needs: - job: amd64-centos8-container - variables: - IMAGE: centos8 - CONFIGURE_ARGS: --without-default-devices --disable-user - -build-without-default-features: - <<: *native_build_job_definition - needs: - job: amd64-debian-container - variables: - IMAGE: debian-amd64 - CONFIGURE_ARGS: --without-default-features --disable-user - --target-list-exclude=arm-softmmu,i386-softmmu,mipsel-softmmu,mips64-softmmu,ppc-softmmu - MAKE_CHECK_ARGS: check-unit - -check-patch: - stage: build - image: $CI_REGISTRY_IMAGE/qemu/centos8:latest - needs: - job: amd64-centos8-container - script: .gitlab-ci.d/check-patch.py - except: - variables: - - $CI_PROJECT_NAMESPACE == 'qemu-project' && $CI_COMMIT_BRANCH == 'master' - variables: - GIT_DEPTH: 1000 - allow_failure: true - -check-dco: - stage: build - image: $CI_REGISTRY_IMAGE/qemu/centos8:latest - needs: - job: amd64-centos8-container - script: .gitlab-ci.d/check-dco.py - except: - variables: - - $CI_PROJECT_NAMESPACE == 'qemu-project' && $CI_COMMIT_BRANCH == 'master' - variables: - GIT_DEPTH: 1000 - -build-libvhost-user: - stage: build - image: $CI_REGISTRY_IMAGE/qemu/fedora:latest - needs: - job: amd64-fedora-container - before_script: - - dnf install -y meson ninja-build - script: - - mkdir subprojects/libvhost-user/build - - cd subprojects/libvhost-user/build - - meson - - ninja - -# No targets are built here, just tools, docs, and unit tests. This -# also feeds into the eventual documentation deployment steps later -build-tools-and-docs-debian: - <<: *native_build_job_definition - needs: - job: amd64-debian-container - variables: - IMAGE: debian-amd64 - MAKE_CHECK_ARGS: check-unit check-softfloat ctags TAGS cscope - CONFIGURE_ARGS: --disable-system --disable-user --enable-docs --enable-tools - artifacts: - expire_in: 2 days - paths: - - build -# Prepare for GitLab pages deployment. Anything copied into the -# "public" directory will be deployed to $USER.gitlab.io/$PROJECT -pages: - image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:latest - stage: test - needs: - - job: build-tools-and-docs-debian - script: - - mkdir -p public - # HTML-ised source tree - - make gtags - - htags -anT --tree-view=filetree -m qemu_init - -t "Welcome to the QEMU sourcecode" - - mv HTML public/src - # Project documentation - - make -C build install DESTDIR=$(pwd)/temp-install - - mv temp-install/usr/local/share/doc/qemu/* public/ - artifacts: - paths: - - public +include: + - local: '/.gitlab-ci.d/qemu-project.yml' diff --git a/.gitlab/issue_templates/bug.md b/.gitlab/issue_templates/bug.md new file mode 100644 index 00000000000..e910f7b1c29 --- /dev/null +++ b/.gitlab/issue_templates/bug.md @@ -0,0 +1,64 @@ + + +## Host environment + - Operating system: (Windows 10 21H1, Fedora 34, etc.) + - OS/kernel version: (For POSIX hosts, use `uname -a`) + - Architecture: (x86, ARM, s390x, etc.) + - QEMU flavor: (qemu-system-x86_64, qemu-aarch64, qemu-img, etc.) + - QEMU version: (e.g. `qemu-system-x86_64 --version`) + - QEMU command line: + + ``` + ./qemu-system-x86_64 -M q35 -m 4096 -enable-kvm -hda fedora32.qcow2 + ``` + +## Emulated/Virtualized environment + - Operating system: (Windows 10 21H1, Fedora 34, etc.) + - OS/kernel version: (For POSIX guests, use `uname -a`.) + - Architecture: (x86, ARM, s390x, etc.) + + +## Description of problem + + + +## Steps to reproduce +1. +2. +3. + + +## Additional information + + + + +/label ~"kind::Bug" diff --git a/.gitlab/issue_templates/feature_request.md b/.gitlab/issue_templates/feature_request.md new file mode 100644 index 00000000000..7de02dcc2c9 --- /dev/null +++ b/.gitlab/issue_templates/feature_request.md @@ -0,0 +1,32 @@ + + +## Goal + + + +## Technical details + + + +## Additional information + + + +/label ~"kind::Feature Request" diff --git a/.mailmap b/.mailmap index a1bd659817d..8beb2f95ae2 100644 --- a/.mailmap +++ b/.mailmap @@ -27,6 +27,10 @@ Paul Brook pbrook ths malc malc +# Corrupted Author fields +Marek Dolata mkdolata@us.ibm.com +Nick Hudson hnick@vmware.com + # There is also a: # (no author) <(no author)@c046a42c-6fe2-441c-8c8c-71466251a162> # for the cvs2svn initialization commit e63c3dc74bf. @@ -65,6 +69,7 @@ Yongbok Kim # git author config, or had utf8/latin1 encoding issues. Aaron Lindsay Alexey Gerasimenko +Alex Chen Alex Ivanov Andreas Färber Bandan Das @@ -95,8 +100,11 @@ Gautham R. Shenoy Gautham R. Shenoy Gonglei (Arei) Guang Wang +Haibin Zhang Hailiang Zhang +Hanna Reitz Hervé Poussineau +Hyman Huang Jakub Jermář Jakub Jermář Jean-Christophe Dubois @@ -130,6 +138,7 @@ Nicholas Thomas Nikunj A Dadhania Orit Wasserman Paolo Bonzini +Pan Nengyuan Pavel Dovgaluk Pavel Dovgaluk Pavel Dovgaluk diff --git a/.patchew.yml b/.patchew.yml index 988c29261f9..1b78262ce58 100644 --- a/.patchew.yml +++ b/.patchew.yml @@ -88,7 +88,7 @@ email: more information: {{ logtext }} - {% elif test == "docker-mingw@fedora" or test == "docker-quick@centos7" or test == "asan" %} + {% elif test == "docker-mingw@fedora" or test == "docker-quick@centos8" or test == "asan" %} Hi, This series failed the {{ test }} build test. Please find the testing commands and @@ -124,13 +124,13 @@ testing: script: | #!/bin/bash time make docker-test-debug@fedora TARGET_LIST=x86_64-softmmu J=14 NETWORK=1 - docker-quick@centos7: + docker-quick@centos8: enabled: false requirements: docker,x86_64 timeout: 3600 script: | #!/bin/bash - time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1 + time make docker-test-quick@centos8 SHOW_ENV=1 J=14 NETWORK=1 checkpatch: enabled: true requirements: '' @@ -138,9 +138,6 @@ testing: script: | #!/bin/bash git rev-parse base > /dev/null || exit 0 - git config --local diff.renamelimit 0 - git config --local diff.renames True - git config --local diff.algorithm histogram ./scripts/checkpatch.pl --mailback base.. docker-mingw@fedora: enabled: true diff --git a/.require_clean_build b/.require_clean_build index aafd98dbfa6..8d5cb63a435 100644 --- a/.require_clean_build +++ b/.require_clean_build @@ -4,3 +4,4 @@ # https://github.com/CTSRD-CHERI/qemu/commit/56c3469a6d63c64fbe966af302de306e82cd5b6a # https://github.com/CTSRD-CHERI/qemu/commit/514635ea947fd44122fa9130782b5fc63873d2f6 20230717 # CGetHigh and CSetHigh added, meson does not track dependencies correctly so we need a full rebuild +20250106 # Merge to 6.2 breaks incremental builds due to missing config options - new configure required diff --git a/.travis.yml b/.travis.yml index 4609240b5aa..41010ebe6bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,7 @@ addons: - libattr1-dev - libbrlapi-dev - libcap-ng-dev + - libcacard-dev - libgcc-7-dev - libgnutls28-dev - libgtk-3-dev @@ -34,7 +35,6 @@ addons: - liblttng-ust-dev - libncurses5-dev - libnfs-dev - - libnss3-dev - libpixman-1-dev - libpng-dev - librados-dev @@ -129,6 +129,7 @@ jobs: - libaio-dev - libattr1-dev - libbrlapi-dev + - libcacard-dev - libcap-ng-dev - libgcrypt20-dev - libgnutls28-dev @@ -137,7 +138,6 @@ jobs: - liblttng-ust-dev - libncurses5-dev - libnfs-dev - - libnss3-dev - libpixman-1-dev - libpng-dev - librados-dev @@ -163,6 +163,7 @@ jobs: - libaio-dev - libattr1-dev - libbrlapi-dev + - libcacard-dev - libcap-ng-dev - libgcrypt20-dev - libgnutls28-dev @@ -171,7 +172,6 @@ jobs: - liblttng-ust-dev - libncurses5-dev - libnfs-dev - - libnss3-dev - libpixman-1-dev - libpng-dev - librados-dev @@ -196,6 +196,7 @@ jobs: - libaio-dev - libattr1-dev - libbrlapi-dev + - libcacard-dev - libcap-ng-dev - libgcrypt20-dev - libgnutls28-dev @@ -204,7 +205,6 @@ jobs: - liblttng-ust-dev - libncurses5-dev - libnfs-dev - - libnss3-dev - libpixman-1-dev - libpng-dev - librados-dev @@ -238,6 +238,7 @@ jobs: apt_packages: - libaio-dev - libattr1-dev + - libcacard-dev - libcap-ng-dev - libgnutls28-dev - libiscsi-dev @@ -245,7 +246,6 @@ jobs: - liblzo2-dev - libncurses-dev - libnfs-dev - - libnss3-dev - libpixman-1-dev - libsdl2-dev - libsdl2-image-dev @@ -281,6 +281,7 @@ jobs: - libaio-dev - libattr1-dev - libbrlapi-dev + - libcacard-dev - libcap-ng-dev - libgcrypt20-dev - libgnutls28-dev @@ -289,7 +290,6 @@ jobs: - liblttng-ust-dev - libncurses5-dev - libnfs-dev - - libnss3-dev - libpixman-1-dev - libpng-dev - librados-dev @@ -305,26 +305,3 @@ jobs: - CONFIG="--disable-containers --disable-tcg --enable-kvm --disable-tools --host-cc=clang --cxx=clang++" - UNRELIABLE=true - - # Release builds - # The make-release script expect a QEMU version, so our tag must start with a 'v'. - # This is the case when release candidate tags are created. - - name: "Release tarball" - if: tag IS present AND tag =~ /^v\d+\.\d+(\.\d+)?(-\S*)?$/ - env: - # We want to build from the release tarball - - BUILD_DIR="release/build/dir" SRC_DIR="../../.." - - BASE_CONFIG="--prefix=$PWD/dist" - - CONFIG="--target-list=x86_64-softmmu,aarch64-softmmu,armeb-linux-user,ppc-linux-user" - - TEST_CMD="make install -j${JOBS}" - - QEMU_VERSION="${TRAVIS_TAG:1}" - - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-default" - script: - - make -C ${SRC_DIR} qemu-${QEMU_VERSION}.tar.bz2 - - ls -l ${SRC_DIR}/qemu-${QEMU_VERSION}.tar.bz2 - - tar -xf ${SRC_DIR}/qemu-${QEMU_VERSION}.tar.bz2 && cd qemu-${QEMU_VERSION} - - mkdir -p release-build && cd release-build - - ../configure ${BASE_CONFIG} ${CONFIG} || { cat config.log meson-logs/meson-log.txt && exit 1; } - - make install - allow_failures: - - env: UNRELIABLE=true diff --git a/Jenkinsfile b/Jenkinsfile index eb33f5d27a8..a065736cd9c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -73,12 +73,13 @@ def addBootJobs(bootJobs, params, String qemuConfig, String architecture, String def bootCheriBSDForAllArchitectures(params, String qemuConfig, boolean isDebug) { stage("Boot CheriBSD (${qemuConfig})") { - bootJobs = [failFast: false] + def bootJobs = [failFast: false] ["riscv64", "riscv64-purecap", "aarch64", "morello-purecap"].each { String architecture -> addBootJobs(bootJobs, params, qemuConfig, architecture, "main") if (!isDebug) { // For the non-ASAN build of QEMU we also boot the latest release - addBootJobs(bootJobs, params, qemuConfig, architecture, "releng%252F22.12", "-latest-release") + def latestRelease = cheribsdInfo.getReleasedVersions()[-1] + addBootJobs(bootJobs, params, qemuConfig, architecture, "releng/${latestRelease}", "-latest-release") } def targetBranch = env.CHANGE_TARGET ? env.CHANGE_TARGET : env.BRANCH_NAME; if (targetBranch == 'dev') { @@ -106,7 +107,8 @@ def bootCheriBSD(params, String qemuConfig, String stageSuffix, String archSuffi def compressedDiskImage = "artifacts-${archSuffix}/cheribsd-${archSuffix}.img.xz" dir (stageSuffix) { sh "rm -rfv artifacts-${archSuffix}/cheribsd-*.img* artifacts-${archSuffix}/kernel*" - copyArtifacts projectName: "CheriBSD-pipeline/${cheribsdBranch}", filter: "${compressedDiskImage}, ${compressedKernel}", + def jenkinsBranchName = cheribsdBranch.replaceAll('/', '%2F') + copyArtifacts projectName: "CheriBSD-pipeline/${jenkinsBranchName}", filter: "${compressedDiskImage}, ${compressedKernel}", target: '.', fingerprintArtifacts: false, flatten: false, selector: lastSuccessful() } def testExtraArgs = [ diff --git a/Kconfig b/Kconfig index d52ebd839b3..fb6a24a2de8 100644 --- a/Kconfig +++ b/Kconfig @@ -1,5 +1,6 @@ source Kconfig.host source backends/Kconfig source accel/Kconfig +source target/Kconfig source hw/Kconfig source semihosting/Kconfig diff --git a/Kconfig.host b/Kconfig.host index 24255ef4419..60b9c07b5ee 100644 --- a/Kconfig.host +++ b/Kconfig.host @@ -41,3 +41,7 @@ config PVRDMA config MULTIPROCESS_ALLOWED bool imply MULTIPROCESS + +config FUZZ + bool + select SPARSE_MEM diff --git a/MAINTAINERS b/MAINTAINERS index 36055f14c59..7543eb4d597 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -87,7 +87,7 @@ S390 general architecture support M: Cornelia Huck M: Thomas Huth S: Supported -F: default-configs/*/s390x-softmmu.mak +F: configs/devices/s390x-softmmu/default.mak F: gdb-xml/s390*.xml F: hw/char/sclp*.[hc] F: hw/char/terminal3270.c @@ -109,6 +109,12 @@ K: ^Subject:.*(?i)s390x? T: git https://gitlab.com/cohuck/qemu.git s390-next L: qemu-s390x@nongnu.org +MIPS general architecture support +M: Philippe Mathieu-Daudé +R: Jiaxun Yang +S: Odd Fixes +K: ^Subject:.*(?i)mips + Guest CPU cores (TCG) --------------------- Overall TCG CPUs @@ -128,7 +134,6 @@ F: docs/devel/decodetree.rst F: include/exec/cpu*.h F: include/exec/exec-all.h F: include/exec/helper*.h -F: include/exec/tb-hash.h F: include/sysemu/cpus.h F: include/sysemu/tcg.h F: include/hw/core/tcg-cpu-ops.h @@ -156,6 +161,7 @@ S: Maintained F: target/arm/ F: tests/tcg/arm/ F: tests/tcg/aarch64/ +F: tests/qtest/arm-cpu-features.c F: hw/arm/ F: hw/cpu/a*mpcore.c F: include/hw/cpu/a*mpcore.h @@ -171,6 +177,7 @@ L: qemu-arm@nongnu.org S: Maintained F: hw/arm/smmu* F: include/hw/arm/smmu* +F: tests/avocado/smmu.py AVR TCG CPUs M: Michael Rolnik @@ -178,7 +185,7 @@ S: Maintained F: docs/system/target-avr.rst F: gdb-xml/avr-cpu.xml F: target/avr/ -F: tests/acceptance/machine_avr6.py +F: tests/avocado/machine_avr6.py CRIS TCG CPUs M: Edgar E. Iglesias @@ -196,29 +203,15 @@ F: target/hexagon/ F: linux-user/hexagon/ F: tests/tcg/hexagon/ F: disas/hexagon.c -F: default-configs/targets/hexagon-linux-user.mak +F: configs/targets/hexagon-linux-user/default.mak +F: docker/dockerfiles/debian-hexagon-cross.docker +F: docker/dockerfiles/debian-hexagon-cross.docker.d/build-toolchain.sh HPPA (PA-RISC) TCG CPUs M: Richard Henderson S: Maintained F: target/hppa/ -F: hw/hppa/ F: disas/hppa.c -F: hw/net/*i82596* -F: include/hw/net/lasi_82596.h - -LM32 TCG CPUs -R: Michael Walle -S: Orphan -F: target/lm32/ -F: disas/lm32.c -F: hw/lm32/ -F: hw/*/lm32_* -F: hw/*/milkymist-* -F: include/hw/display/milkymist_tmu2.h -F: include/hw/char/lm32_juart.h -F: include/hw/lm32/ -F: tests/tcg/lm32/ M68K TCG CPUs M: Laurent Vivier @@ -232,6 +225,8 @@ S: Maintained F: target/microblaze/ F: hw/microblaze/ F: disas/microblaze.c +F: tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh +F: tests/tcg/nios2/Makefile.target MIPS TCG CPUs M: Philippe Mathieu-Daudé @@ -240,31 +235,14 @@ R: Jiaxun Yang R: Aleksandar Rikalo S: Odd Fixes F: target/mips/ -F: default-configs/*/*mips* F: disas/mips.c F: docs/system/cpu-models-mips.rst.inc -F: hw/intc/mips_gic.c -F: hw/mips/ -F: hw/misc/mips_* -F: hw/timer/mips_gictimer.c -F: include/hw/intc/mips_gic.h -F: include/hw/mips/ -F: include/hw/misc/mips_* -F: include/hw/timer/mips_gictimer.h F: tests/tcg/mips/ -K: ^Subject:.*(?i)mips MIPS TCG CPUs (nanoMIPS ISA) S: Orphan F: disas/nanomips.* - -Moxie TCG CPUs -M: Anthony Green -S: Maintained -F: target/moxie/ -F: disas/moxie.c -F: hw/moxie/ -F: default-configs/*/moxie-softmmu.mak +F: target/mips/tcg/*nanomips* NiosII TCG CPUs M: Chris Wulff @@ -273,7 +251,8 @@ S: Maintained F: target/nios2/ F: hw/nios2/ F: disas/nios2.c -F: default-configs/*/nios2-softmmu.mak +F: configs/devices/nios2-softmmu/default.mak +F: tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh OpenRISC TCG CPUs M: Stafford Horne @@ -283,21 +262,22 @@ F: hw/openrisc/ F: tests/tcg/openrisc/ PowerPC TCG CPUs -M: David Gibson -M: Greg Kurz +M: Cédric Le Goater +M: Daniel Henrique Barboza +R: David Gibson +R: Greg Kurz L: qemu-ppc@nongnu.org S: Maintained F: target/ppc/ -F: hw/ppc/ -F: include/hw/ppc/ +F: hw/ppc/ppc.c +F: hw/ppc/ppc_booke.c +F: include/hw/ppc/ppc.h F: disas/ppc.c -F: tests/acceptance/machine_ppc.py RISC-V TCG CPUs M: Palmer Dabbelt -M: Alistair Francis -M: Sagar Karandikar -M: Bastian Koppelmann +M: Alistair Francis +M: Bin Meng L: qemu-riscv@nongnu.org S: Supported F: target/riscv/ @@ -316,6 +296,8 @@ M: Richard Henderson M: David Hildenbrand S: Maintained F: target/s390x/ +F: target/s390x/tcg +F: target/s390x/cpu_models_*.[ch] F: hw/s390x/ F: disas/s390.c F: tests/tcg/s390x/ @@ -339,24 +321,17 @@ F: hw/sparc64/ F: include/hw/sparc/sparc64.h F: disas/sparc.c -UniCore32 TCG CPUs -M: Guan Xuetao -S: Maintained -F: target/unicore32/ -F: hw/unicore32/ -F: include/hw/unicore32/ - X86 TCG CPUs M: Paolo Bonzini M: Richard Henderson -M: Eduardo Habkost +M: Eduardo Habkost S: Maintained -F: target/i386/ +F: target/i386/tcg/ F: tests/tcg/i386/ F: tests/tcg/x86_64/ F: hw/i386/ F: disas/i386.c -F: docs/system/cpu-models-x86.rst.inc +F: docs/system/cpu-models-x86* T: git https://gitlab.com/ehabkost/qemu.git x86-next Xtensa TCG CPUs @@ -368,7 +343,7 @@ F: hw/xtensa/ F: tests/tcg/xtensa/ F: disas/xtensa.c F: include/hw/xtensa/xtensa-isa.h -F: default-configs/*/xtensa*.mak +F: configs/devices/xtensa*/default.mak TriCore TCG CPUs M: Bastian Koppelmann @@ -376,6 +351,7 @@ S: Maintained F: target/tricore/ F: hw/tricore/ F: include/hw/tricore/ +F: tests/tcg/tricore/ Multiarch Linux User Tests M: Alex Bennée @@ -404,22 +380,22 @@ F: target/arm/kvm.c MIPS KVM CPUs M: Huacai Chen S: Odd Fixes -F: target/mips/kvm.c +F: target/mips/kvm* +F: target/mips/sysemu/ PPC KVM CPUs -M: David Gibson -M: Greg Kurz +M: Cédric Le Goater +M: Daniel Henrique Barboza +R: David Gibson +R: Greg Kurz S: Maintained F: target/ppc/kvm.c S390 KVM CPUs M: Halil Pasic -M: Cornelia Huck M: Christian Borntraeger S: Supported -F: target/s390x/kvm.c -F: target/s390x/kvm_s390x.h -F: target/s390x/kvm-stub.c +F: target/s390x/kvm/ F: target/s390x/ioinst.[ch] F: target/s390x/machine.c F: target/s390x/sigp.c @@ -431,7 +407,6 @@ F: hw/intc/s390_flic.c F: hw/intc/s390_flic_kvm.c F: include/hw/s390x/s390_flic.h F: gdb-xml/s390*.xml -T: git https://gitlab.com/cohuck/qemu.git s390-next T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s390x@nongnu.org @@ -440,7 +415,10 @@ M: Paolo Bonzini M: Marcelo Tosatti L: kvm@vger.kernel.org S: Supported +F: docs/amd-memory-encryption.txt +F: docs/system/i386/sgx.rst F: target/i386/kvm/ +F: target/i386/sev* F: scripts/kvm/vmxcap Guest CPU Cores (other accelerators) @@ -456,13 +434,26 @@ F: accel/accel-*.c F: accel/Makefile.objs F: accel/stubs/Makefile.objs +Apple Silicon HVF CPUs +M: Alexander Graf +S: Maintained +F: target/arm/hvf/ + X86 HVF CPUs M: Cameron Esfahani M: Roman Bolshakov W: https://wiki.qemu.org/Features/HVF S: Maintained F: target/i386/hvf/ + +HVF +M: Cameron Esfahani +M: Roman Bolshakov +W: https://wiki.qemu.org/Features/HVF +S: Maintained +F: accel/hvf/ F: include/sysemu/hvf.h +F: include/sysemu/hvf_int.h WHPX CPUs M: Sunil Muthuswamy @@ -509,6 +500,15 @@ F: accel/stubs/hax-stub.c F: include/sysemu/hax.h F: target/i386/hax/ +Guest CPU Cores (NVMM) +---------------------- +NetBSD Virtual Machine Monitor (NVMM) CPU support +M: Kamil Rytarowski +M: Reinoud Zandijk +S: Maintained +F: include/sysemu/nvmm.h +F: target/i386/nvmm/ + Hosts ----- LINUX @@ -529,6 +529,8 @@ F: include/qemu/*posix*.h NETBSD M: Kamil Rytarowski +M: Reinoud Zandijk +M: Ryo ONODERA S: Maintained K: ^Subject:.*(?i)NetBSD @@ -564,6 +566,7 @@ S: Odd Fixes F: hw/*/allwinner* F: include/hw/*/allwinner* F: hw/arm/cubieboard.c +F: docs/system/arm/cubieboard.rst Allwinner-h3 M: Niek Linnenbank @@ -620,6 +623,7 @@ F: hw/intc/gic_internal.h F: hw/misc/a9scu.c F: hw/misc/arm11scu.c F: hw/misc/arm_l2x0.c +F: hw/misc/armv7m_ras.c F: hw/timer/a9gtimer* F: hw/timer/arm* F: include/hw/arm/arm*.h @@ -629,6 +633,7 @@ F: include/hw/misc/arm11scu.h F: include/hw/timer/a9gtimer.h F: include/hw/timer/arm_mptimer.h F: include/hw/timer/armv7m_systick.h +F: include/hw/misc/armv7m_ras.h F: tests/qtest/test-arm-mptimer.c Exynos @@ -646,6 +651,7 @@ L: qemu-arm@nongnu.org S: Odd Fixes F: hw/arm/highbank.c F: hw/net/xgmac.c +F: docs/system/arm/highbank.rst Canon DIGIC M: Antony Pavlov @@ -655,7 +661,7 @@ S: Odd Fixes F: include/hw/arm/digic.h F: hw/*/digic* F: include/hw/*/digic* -F: tests/acceptance/machine_arm_canona1100.py +F: tests/avocado/machine_arm_canona1100.py F: docs/system/arm/digic.rst Goldfish RTC @@ -686,6 +692,7 @@ F: hw/watchdog/wdt_imx2.c F: include/hw/arm/fsl-imx25.h F: include/hw/misc/imx25_ccm.h F: include/hw/watchdog/wdt_imx2.h +F: docs/system/arm/imx25-pdk.rst i.MX31 (kzm) M: Peter Maydell @@ -696,6 +703,7 @@ F: hw/*/imx_* F: hw/*/*imx31* F: include/hw/*/imx_* F: include/hw/*/*imx31* +F: docs/system/arm/kzm.rst Integrator CP M: Peter Maydell @@ -704,7 +712,7 @@ S: Maintained F: hw/arm/integratorcp.c F: hw/misc/arm_integrator_debug.c F: include/hw/misc/arm_integrator_debug.h -F: tests/acceptance/machine_arm_integratorcp.py +F: tests/avocado/machine_arm_integratorcp.py F: docs/system/arm/integratorcp.rst MCIMX6UL EVK / i.MX6ul @@ -788,7 +796,6 @@ F: roms/vbootrom F: docs/system/arm/nuvoton.rst nSeries -M: Andrzej Zaborowski M: Peter Maydell L: qemu-arm@nongnu.org S: Odd Fixes @@ -799,13 +806,13 @@ F: hw/input/tsc2005.c F: hw/misc/cbus.c F: hw/rtc/twl92230.c F: include/hw/display/blizzard.h +F: include/hw/input/lm832x.h F: include/hw/input/tsc2xxx.h F: include/hw/misc/cbus.h -F: tests/acceptance/machine_arm_n8x0.py +F: tests/avocado/machine_arm_n8x0.py F: docs/system/arm/nseries.rst Palm -M: Andrzej Zaborowski M: Peter Maydell L: qemu-arm@nongnu.org S: Odd Fixes @@ -838,7 +845,6 @@ F: include/hw/intc/realview_gic.h F: docs/system/arm/realview.rst PXA2XX -M: Andrzej Zaborowski M: Peter Maydell L: qemu-arm@nongnu.org S: Odd Fixes @@ -851,12 +857,13 @@ F: hw/display/tc6393xb.c F: hw/gpio/max7310.c F: hw/gpio/zaurus.c F: hw/misc/mst_fpga.c -F: hw/misc/max111x.c -F: include/hw/misc/max111x.h +F: hw/adc/max111x.c +F: include/hw/adc/max111x.h F: include/hw/arm/pxa.h F: include/hw/arm/sharpsl.h F: include/hw/display/tc6393xb.h F: docs/system/arm/xscale.rst +F: docs/system/arm/mainstone.rst SABRELITE / i.MX6 M: Peter Maydell @@ -898,6 +905,13 @@ F: hw/*/stellaris* F: include/hw/input/gamepad.h F: docs/system/arm/stellaris.rst +STM32VLDISCOVERY +M: Alexandre Iooss +L: qemu-arm@nongnu.org +S: Maintained +F: hw/arm/stm32vldiscovery.c +F: docs/system/arm/stm32.rst + Versatile Express M: Peter Maydell L: qemu-arm@nongnu.org @@ -930,8 +944,10 @@ L: qemu-arm@nongnu.org S: Maintained F: hw/*/xilinx_* F: hw/*/cadence_* -F: hw/misc/zynq* -F: include/hw/misc/zynq* +F: hw/misc/zynq_slcr.c +F: hw/adc/zynq-xadc.c +F: include/hw/misc/zynq_slcr.h +F: include/hw/adc/zynq-xadc.h X: hw/ssi/xilinx_* Xilinx ZynqMP and Versal @@ -953,6 +969,12 @@ L: qemu-arm@nongnu.org S: Maintained F: hw/arm/virt-acpi-build.c +STM32F100 +M: Alexandre Iooss +L: qemu-arm@nongnu.org +S: Maintained +F: hw/arm/stm32f100_soc.c + STM32F205 M: Alistair Francis M: Peter Maydell @@ -1011,6 +1033,7 @@ M: Peter Maydell L: qemu-arm@nongnu.org S: Maintained F: hw/arm/msf2-som.c +F: docs/system/arm/emcraft-sf2.rst ASPEED BMCs M: Cédric Le Goater @@ -1026,6 +1049,7 @@ F: include/hw/misc/pca9552*.h F: hw/net/ftgmac100.c F: include/hw/net/ftgmac100.h F: docs/system/arm/aspeed.rst +F: tests/qtest/*aspeed* NRF51 M: Joel Stanley @@ -1037,6 +1061,7 @@ F: hw/*/microbit*.c F: include/hw/*/nrf51*.h F: include/hw/*/microbit*.h F: tests/qtest/microbit-test.c +F: docs/system/arm/nrf.rst AVR Machines ------------- @@ -1044,7 +1069,7 @@ AVR Machines AVR MCUs M: Michael Rolnik S: Maintained -F: default-configs/*/avr-softmmu.mak +F: configs/devices/avr-softmmu/default.mak F: hw/avr/ F: include/hw/char/avr_usart.h F: hw/char/avr_usart.c @@ -1072,22 +1097,12 @@ HP B160L M: Richard Henderson R: Helge Deller S: Odd Fixes -F: default-configs/*/hppa-softmmu.mak +F: configs/devices/hppa-softmmu/default.mak F: hw/hppa/ +F: hw/net/*i82596* +F: include/hw/net/lasi_82596.h F: pc-bios/hppa-firmware.img -LM32 Machines -------------- -EVR32 and uclinux BSP -R: Michael Walle -S: Orphan -F: hw/lm32/lm32_boards.c - -milkymist -R: Michael Walle -S: Orphan -F: hw/lm32/milkymist.c - M68K Machines ------------- an5206 @@ -1148,7 +1163,7 @@ M: Edgar E. Iglesias S: Maintained F: hw/microblaze/petalogix_s3adsp1800_mmu.c F: include/hw/char/xilinx_uartlite.h -F: tests/acceptance/machine_microblaze.py +F: tests/avocado/machine_microblaze.py petalogix_ml605 M: Edgar E. Iglesias @@ -1157,6 +1172,13 @@ F: hw/microblaze/petalogix_ml605_mmu.c MIPS Machines ------------- +Overall MIPS Machines +M: Philippe Mathieu-Daudé +S: Odd Fixes +F: configs/devices/mips*/* +F: hw/mips/ +F: include/hw/mips/ + Jazz M: Hervé Poussineau R: Aleksandar Rikalo @@ -1174,8 +1196,8 @@ F: hw/acpi/piix4.c F: hw/mips/malta.c F: hw/mips/gt64xxx_pci.c F: include/hw/southbridge/piix.h -F: tests/acceptance/linux_ssh_mips_malta.py -F: tests/acceptance/machine_mips_malta.py +F: tests/avocado/linux_ssh_mips_malta.py +F: tests/avocado/machine_mips_malta.py Mipssim R: Aleksandar Rikalo @@ -1193,6 +1215,7 @@ F: hw/isa/vt82c686.c F: hw/pci-host/bonito.c F: hw/usb/vt82c686-uhci-pci.c F: include/hw/isa/vt82c686.h +F: tests/avocado/machine_mips_fuloong2e.py Loongson-3 virtual platforms M: Huacai Chen @@ -1202,7 +1225,7 @@ F: hw/intc/loongson_liointc.c F: hw/mips/loongson3_bootp.c F: hw/mips/loongson3_bootp.h F: hw/mips/loongson3_virt.c -F: tests/acceptance/machine_mips_loongson3v.py +F: tests/avocado/machine_mips_loongson3v.py Boston M: Paul Burton @@ -1223,24 +1246,19 @@ F: hw/openrisc/openrisc_sim.c PowerPC Machines ---------------- 405 -M: David Gibson -M: Greg Kurz L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/ppc405_boards.c Bamboo -M: David Gibson -M: Greg Kurz L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/ppc440_bamboo.c +F: tests/avocado/ppc_bamboo.py e500 -M: David Gibson -M: Greg Kurz L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/e500* F: hw/gpio/mpc8xxx.c F: hw/i2c/mpc_i2c.c @@ -1249,19 +1267,18 @@ F: hw/pci-host/ppce500.c F: include/hw/ppc/ppc_e500.h F: include/hw/pci-host/ppce500.h F: pc-bios/u-boot.e500 +F: hw/intc/openpic_kvm.h +F: include/hw/ppc/openpic_kvm.h mpc8544ds -M: David Gibson -M: Greg Kurz L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/mpc8544ds.c F: hw/ppc/mpc8544_guts.c +F: tests/avocado/ppc_mpc8544ds.py New World (mac99) M: Mark Cave-Ayland -R: David Gibson -R: Greg Kurz L: qemu-ppc@nongnu.org S: Odd Fixes F: hw/ppc/mac_newworld.c @@ -1280,8 +1297,6 @@ F: pc-bios/qemu_vga.ndrv Old World (g3beige) M: Mark Cave-Ayland -R: David Gibson -R: Greg Kurz L: qemu-ppc@nongnu.org S: Odd Fixes F: hw/ppc/mac_oldworld.c @@ -1295,27 +1310,27 @@ F: pc-bios/qemu_vga.ndrv PReP M: Hervé Poussineau -R: David Gibson -R: Greg Kurz L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/prep.c F: hw/ppc/prep_systemio.c F: hw/ppc/rs6000_mc.c -F: hw/pci-host/prep.[hc] +F: hw/pci-host/raven.c F: hw/isa/i82378.c F: hw/isa/pc87312.c F: hw/dma/i82374.c F: hw/rtc/m48t59-isa.c F: include/hw/isa/pc87312.h F: include/hw/rtc/m48t59.h -F: tests/acceptance/ppc_prep_40p.py +F: tests/avocado/ppc_prep_40p.py sPAPR -M: David Gibson -M: Greg Kurz +M: Cédric Le Goater +M: Daniel Henrique Barboza +R: David Gibson +R: Greg Kurz L: qemu-ppc@nongnu.org -S: Supported +S: Maintained F: hw/*/spapr* F: include/hw/*/spapr* F: hw/*/xics* @@ -1327,11 +1342,10 @@ F: tests/qtest/spapr* F: tests/qtest/libqos/*spapr* F: tests/qtest/rtas* F: tests/qtest/libqos/rtas* +F: tests/avocado/ppc_pseries.py PowerNV (Non-Virtualized) M: Cédric Le Goater -M: David Gibson -M: Greg Kurz L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/pnv* @@ -1348,11 +1362,10 @@ M: Edgar E. Iglesias L: qemu-ppc@nongnu.org S: Odd Fixes F: hw/ppc/virtex_ml507.c +F: tests/avocado/ppc_virtex_ml507.py sam460ex M: BALATON Zoltan -R: David Gibson -R: Greg Kurz L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/sam460ex.c @@ -1364,6 +1377,29 @@ F: pc-bios/canyonlands.dt[sb] F: pc-bios/u-boot-sam460ex-20100605.bin F: roms/u-boot-sam460ex +pegasos2 +M: BALATON Zoltan +L: qemu-ppc@nongnu.org +S: Maintained +F: hw/ppc/pegasos2.c +F: hw/pci-host/mv64361.c +F: hw/pci-host/mv643xx.h +F: include/hw/pci-host/mv64361.h + +Virtual Open Firmware (VOF) +M: Alexey Kardashevskiy +R: Cédric Le Goater +R: Daniel Henrique Barboza +R: David Gibson +R: Greg Kurz +L: qemu-ppc@nongnu.org +S: Maintained +F: hw/ppc/spapr_vof* +F: hw/ppc/vof* +F: include/hw/ppc/vof* +F: pc-bios/vof/* +F: pc-bios/vof* + RISC-V Machines --------------- OpenTitan @@ -1371,11 +1407,9 @@ M: Alistair Francis L: qemu-riscv@nongnu.org S: Supported F: hw/riscv/opentitan.c -F: hw/char/ibex_uart.c -F: hw/intc/ibex_plic.c +F: hw/*/ibex_*.c F: include/hw/riscv/opentitan.h -F: include/hw/char/ibex_uart.h -F: include/hw/intc/ibex_plic.h +F: include/hw/*/ibex_*.h Microchip PolarFire SoC Icicle Kit M: Bin Meng @@ -1392,6 +1426,15 @@ F: include/hw/misc/mchp_pfsoc_dmc.h F: include/hw/misc/mchp_pfsoc_ioscb.h F: include/hw/misc/mchp_pfsoc_sysreg.h +Shakti C class SoC +M: Vijai Kumar K +L: qemu-riscv@nongnu.org +S: Supported +F: hw/riscv/shakti_c.c +F: hw/char/shakti_uart.c +F: include/hw/riscv/shakti_c.h +F: include/hw/char/shakti_uart.h + SiFive Machines M: Alistair Francis M: Bin Meng @@ -1408,7 +1451,7 @@ R: Yoshinori Sato S: Orphan F: docs/system/target-rx.rst F: hw/rx/rx-gdbsim.c -F: tests/acceptance/machine_rx_gdbsim.py +F: tests/avocado/machine_rx_gdbsim.py SH4 Machines ------------ @@ -1462,7 +1505,7 @@ F: include/hw/pci-host/sabre.h F: hw/pci-bridge/simba.c F: include/hw/pci-bridge/simba.h F: pc-bios/openbios-sparc64 -F: tests/acceptance/machine_sparc64_sun4u.py +F: tests/avocado/machine_sparc64_sun4u.py Sun4v M: Artyom Tarasenko @@ -1478,12 +1521,11 @@ S: Maintained F: hw/sparc/leon3.c F: hw/*/grlib* F: include/hw/*/grlib* -F: tests/acceptance/machine_sparc_leon3.py +F: tests/avocado/machine_sparc_leon3.py S390 Machines ------------- S390 Virtio-ccw -M: Cornelia Huck M: Halil Pasic M: Christian Borntraeger S: Supported @@ -1493,9 +1535,8 @@ F: hw/s390x/ F: include/hw/s390x/ F: hw/watchdog/wdt_diag288.c F: include/hw/watchdog/wdt_diag288.h -F: default-configs/*/s390x-softmmu.mak -F: tests/acceptance/machine_s390_ccw_virtio.py -T: git https://gitlab.com/cohuck/qemu.git s390-next +F: configs/devices/s390x-softmmu/default.mak +F: tests/avocado/machine_s390_ccw_virtio.py T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s390x@nongnu.org @@ -1518,14 +1559,6 @@ F: hw/s390x/s390-pci* F: include/hw/s390x/s390-pci* L: qemu-s390x@nongnu.org -UniCore32 Machines ------------------- -PKUnity-3 SoC initramfs-with-busybox -M: Guan Xuetao -S: Maintained -F: hw/*/puv3* -F: hw/unicore32/ - X86 Machines ------------ PC @@ -1589,19 +1622,21 @@ microvm M: Sergio Lopez M: Paolo Bonzini S: Maintained -F: docs/microvm.rst +F: docs/system/i386/microvm.rst F: hw/i386/microvm.c F: include/hw/i386/microvm.h F: pc-bios/bios-microvm.bin Machine core -M: Eduardo Habkost +M: Eduardo Habkost M: Marcel Apfelbaum +R: Philippe Mathieu-Daudé S: Supported F: cpu.c F: hw/core/cpu.c F: hw/core/machine-qmp-cmds.c F: hw/core/machine.c +F: hw/core/machine-smp.c F: hw/core/null-machine.c F: hw/core/numa.c F: hw/cpu/cluster.c @@ -1611,6 +1646,7 @@ F: include/hw/boards.h F: include/hw/core/cpu.h F: include/hw/cpu/cluster.h F: include/sysemu/numa.h +F: tests/unit/test-smp-parse.c T: git https://gitlab.com/ehabkost/qemu.git machine-next Xtensa Machines @@ -1633,6 +1669,16 @@ F: hw/net/opencores_eth.c Devices ------- +Overall Audio frontends +M: Gerd Hoffmann +S: Odd Fixes +F: hw/audio/ +F: include/hw/audio/ +F: tests/qtest/ac97-test.c +F: tests/qtest/es1370-test.c +F: tests/qtest/intel-hda-test.c +F: tests/qtest/fuzz-sb16-test.c + Xilinx CAN M: Vikram Garhwal M: Francisco Iglesias @@ -1676,6 +1722,9 @@ M: John Snow L: qemu-block@nongnu.org S: Supported F: hw/block/fdc.c +F: hw/block/fdc-internal.h +F: hw/block/fdc-isa.c +F: hw/block/fdc-sysbus.c F: include/hw/block/fdc.h F: tests/qtest/fdc-test.c T: git https://gitlab.com/jsnow/qemu.git ide @@ -1705,11 +1754,11 @@ F: hw/pci-bridge/* F: qapi/pci.json F: docs/pci* F: docs/specs/*pci* -F: default-configs/pci.mak ACPI/SMBIOS M: Michael S. Tsirkin M: Igor Mammedov +R: Ani Sinha S: Supported F: include/hw/acpi/* F: include/hw/firmware/smbios.h @@ -1722,6 +1771,10 @@ F: qapi/acpi.json F: tests/qtest/bios-tables-test* F: tests/qtest/acpi-utils.[hc] F: tests/data/acpi/ +F: docs/specs/acpi_cpu_hotplug.rst +F: docs/specs/acpi_mem_hotplug.rst +F: docs/specs/acpi_pci_hotplug.rst +F: docs/specs/acpi_hw_reduced_hotplug.rst ACPI/HEST/GHES R: Dongjiu Geng @@ -1732,9 +1785,8 @@ F: include/hw/acpi/ghes.h F: docs/specs/acpi_hest_ghes.rst ppc4xx -M: David Gibson L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/ppc4*.c F: hw/i2c/ppc4xx_i2c.c F: include/hw/ppc/ppc4xx.h @@ -1798,21 +1850,21 @@ F: include/hw/sd/sd* F: hw/sd/core.c F: hw/sd/sd* F: hw/sd/ssi-sd.c -F: tests/qtest/sd* +F: tests/qtest/fuzz-sdcard-test.c +F: tests/qtest/sdhci-test.c USB M: Gerd Hoffmann -S: Maintained +S: Odd Fixes F: hw/usb/* +F: stubs/usb-dev-stub.c F: tests/qtest/usb-*-test.c -F: docs/usb2.txt -F: docs/usb-storage.txt +F: docs/system/devices/usb.rst F: include/hw/usb.h F: include/hw/usb/ -F: default-configs/usb.mak USB (serial adapter) -M: Gerd Hoffmann +R: Gerd Hoffmann M: Samuel Thibault S: Maintained F: hw/usb/dev-serial.c @@ -1823,9 +1875,9 @@ S: Supported F: hw/vfio/* F: include/hw/vfio/ F: docs/igd-assign.txt +F: docs/devel/vfio-migration.rst vfio-ccw -M: Cornelia Huck M: Eric Farman M: Matthew Rosato S: Supported @@ -1833,7 +1885,6 @@ F: hw/vfio/ccw.c F: hw/s390x/s390-ccw.c F: include/hw/s390x/s390-ccw.h F: include/hw/s390x/vfio-ccw.h -T: git https://gitlab.com/cohuck/qemu.git s390-next L: qemu-s390x@nongnu.org vfio-ap @@ -1881,6 +1932,7 @@ virtio-9p M: Greg Kurz M: Christian Schoenebeck S: Odd Fixes +W: https://wiki.qemu.org/Documentation/9p F: hw/9pfs/ X: hw/9pfs/xen-9p* F: fsdev/ @@ -1920,7 +1972,7 @@ L: virtio-fs@redhat.com virtio-input M: Gerd Hoffmann -S: Maintained +S: Odd Fixes F: hw/input/vhost-user-input.c F: hw/input/virtio-input*.c F: include/hw/virtio/virtio-input.h @@ -1951,6 +2003,15 @@ F: include/sysemu/rng*.h F: backends/rng*.c F: tests/qtest/virtio-rng-test.c +vhost-user-rng +M: Mathieu Poirier +S: Supported +F: docs/tools/vhost-user-rng.rst +F: hw/virtio/vhost-user-rng.c +F: hw/virtio/vhost-user-rng-pci.c +F: include/hw/virtio/vhost-user-rng.h +F: tools/vhost-user-rng/* + virtio-crypto M: Gonglei S: Supported @@ -1972,7 +2033,7 @@ M: Keith Busch M: Klaus Jensen L: qemu-block@nongnu.org S: Supported -F: hw/block/nvme* +F: hw/nvme/* F: include/block/nvme.h F: tests/qtest/nvme-test.c F: docs/system/nvme.rst @@ -2017,6 +2078,7 @@ F: hw/acpi/nvdimm.c F: hw/mem/nvdimm.c F: include/hw/mem/nvdimm.h F: docs/nvdimm.txt +F: docs/specs/acpi_nvdimm.rst e1000x M: Dmitry Fleytman @@ -2040,6 +2102,12 @@ S: Maintained F: hw/net/tulip.c F: hw/net/tulip.h +pca954x +M: Patrick Venture +S: Maintained +F: hw/i2c/i2c_mux_pca954x.c +F: include/hw/i2c/i2c_mux_pca954x.h + Generic Loader M: Alistair Francis S: Maintained @@ -2052,7 +2120,7 @@ M: Alex Bennée S: Maintained F: hw/core/guest-loader.c F: docs/system/guest-loader.rst -F: tests/acceptance/boot_xen.py +F: tests/avocado/boot_xen.py Intel Hexadecimal Object File Loader M: Su Hang @@ -2111,7 +2179,7 @@ F: include/hw/display/ramfb.h virtio-gpu M: Gerd Hoffmann -S: Maintained +S: Odd Fixes F: hw/display/virtio-gpu* F: hw/display/virtio-vga.* F: include/hw/virtio/virtio-gpu.h @@ -2130,7 +2198,7 @@ F: include/hw/virtio/vhost-user-scsi.h vhost-user-gpu M: Marc-André Lureau -M: Gerd Hoffmann +R: Gerd Hoffmann S: Maintained F: docs/interop/vhost-user-gpu.rst F: contrib/vhost-user-gpu @@ -2158,7 +2226,6 @@ F: include/hw/southbridge/piix.h Firmware configuration (fw_cfg) M: Philippe Mathieu-Daudé -R: Laszlo Ersek R: Gerd Hoffmann S: Supported F: docs/specs/fw_cfg.txt @@ -2172,8 +2239,6 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next XIVE M: Cédric Le Goater -R: David Gibson -R: Greg Kurz L: qemu-ppc@nongnu.org S: Supported F: hw/*/*xive* @@ -2209,22 +2274,84 @@ F: net/can/* F: hw/net/can/* F: include/net/can_*.h +OpenPIC interrupt controller +M: Mark Cave-Ayland +S: Odd Fixes +F: hw/intc/openpic.c +F: include/hw/ppc/openpic.h + +MIPS CPS +M: Philippe Mathieu-Daudé +S: Odd Fixes +F: hw/misc/mips_* +F: include/hw/misc/mips_* + +MIPS GIC +M: Philippe Mathieu-Daudé +S: Odd Fixes +F: hw/intc/mips_gic.c +F: hw/timer/mips_gictimer.c +F: include/hw/intc/mips_gic.h +F: include/hw/timer/mips_gictimer.h + Subsystems ---------- -Audio +Overall Audio backends M: Gerd Hoffmann -S: Maintained +S: Odd Fixes F: audio/ -F: hw/audio/ -F: include/hw/audio/ +X: audio/alsaaudio.c +X: audio/coreaudio.c +X: audio/dsound* +X: audio/jackaudio.c +X: audio/ossaudio.c +X: audio/paaudio.c +X: audio/sdlaudio.c +X: audio/spiceaudio.c F: qapi/audio.json -F: tests/qtest/ac97-test.c -F: tests/qtest/es1370-test.c -F: tests/qtest/intel-hda-test.c + +ALSA Audio backend +M: Gerd Hoffmann +R: Christian Schoenebeck +S: Odd Fixes +F: audio/alsaaudio.c + +Core Audio framework backend +M: Gerd Hoffmann +R: Christian Schoenebeck +S: Odd Fixes +F: audio/coreaudio.c + +DSound Audio backend +M: Gerd Hoffmann +S: Odd Fixes +F: audio/dsound* + +JACK Audio Connection Kit backend +M: Gerd Hoffmann +R: Christian Schoenebeck +S: Odd Fixes +F: audio/jackaudio.c + +Open Sound System (OSS) Audio backend +M: Gerd Hoffmann +S: Odd Fixes +F: audio/ossaudio.c + +PulseAudio backend +M: Gerd Hoffmann +S: Odd Fixes +F: audio/paaudio.c + +SDL Audio backend +M: Gerd Hoffmann +R: Thomas Huth +S: Odd Fixes +F: audio/sdlaudio.c Block layer core M: Kevin Wolf -M: Max Reitz +M: Hanna Reitz L: qemu-block@nongnu.org S: Supported F: block* @@ -2291,8 +2418,8 @@ F: block/mirror.c F: qapi/job.json F: block/block-copy.c F: include/block/block-copy.c -F: block/backup-top.h -F: block/backup-top.c +F: block/copy-before-write.h +F: block/copy-before-write.c F: include/block/aio_task.h F: block/aio_task.c F: util/qemu-co-shared-resource.c @@ -2405,22 +2532,27 @@ F: tests/tcg/multiarch/gdbstub/ Memory API M: Paolo Bonzini +M: Peter Xu +M: David Hildenbrand +R: Philippe Mathieu-Daudé S: Supported F: include/exec/ioport.h F: include/exec/memop.h F: include/exec/memory.h F: include/exec/ram_addr.h F: include/exec/ramblock.h +F: include/sysemu/memory_mapping.h F: softmmu/dma-helpers.c F: softmmu/ioport.c F: softmmu/memory.c +F: softmmu/memory_mapping.c F: softmmu/physmem.c F: include/exec/memory-internal.h F: scripts/coccinelle/memory-region-housekeeping.cocci SPICE M: Gerd Hoffmann -S: Supported +S: Odd Fixes F: include/ui/qemu-spice.h F: include/ui/spice-display.h F: ui/spice-*.c @@ -2445,6 +2577,7 @@ F: ui/cocoa.m Main loop M: Paolo Bonzini S: Maintained +F: include/exec/gen-icount.h F: include/qemu/main-loop.h F: include/sysemu/runstate.h F: include/sysemu/runstate-action.h @@ -2499,7 +2632,7 @@ S: Maintained F: net/netmap.c Host Memory Backends -M: Eduardo Habkost +M: David Hildenbrand M: Igor Mammedov S: Maintained F: backends/hostmem*.c @@ -2515,13 +2648,13 @@ F: backends/cryptodev*.c Python library M: John Snow M: Cleber Rosa -R: Eduardo Habkost +R: Eduardo Habkost S: Maintained F: python/ T: git https://gitlab.com/jsnow/qemu.git python Python scripts -M: Eduardo Habkost +M: Eduardo Habkost M: Cleber Rosa S: Odd Fixes F: scripts/*.py @@ -2531,6 +2664,13 @@ Benchmark util M: Vladimir Sementsov-Ogievskiy S: Maintained F: scripts/simplebench/ +T: git https://src.openvz.org/scm/~vsementsov/qemu.git simplebench + +Transactions helper +M: Vladimir Sementsov-Ogievskiy +S: Maintained +F: include/qemu/transactions.h +F: util/transactions.c QAPI M: Markus Armbruster @@ -2590,7 +2730,7 @@ T: git https://github.com/mdroth/qemu.git qga QOM M: Paolo Bonzini R: Daniel P. Berrange -R: Eduardo Habkost +R: Eduardo Habkost S: Supported F: docs/qdev-device-use.txt F: hw/core/qdev* @@ -2610,7 +2750,7 @@ F: tests/unit/check-qom-proplist.c F: tests/unit/test-qdev-global-props.c QOM boilerplate conversion script -M: Eduardo Habkost +M: Eduardo Habkost S: Maintained F: scripts/codeconverter/ @@ -2648,6 +2788,8 @@ R: Paolo Bonzini R: Bandan Das R: Stefan Hajnoczi R: Thomas Huth +R: Darren Kenny +R: Qiuhao Li S: Maintained F: tests/qtest/fuzz/ F: tests/qtest/fuzz-*test.c @@ -2692,14 +2834,13 @@ F: scripts/tracetool.py F: scripts/tracetool/ F: scripts/qemu-trace-stap* F: docs/tools/qemu-trace-stap.rst -F: docs/devel/tracing.txt +F: docs/devel/tracing.rst T: git https://github.com/stefanha/qemu.git tracing TPM M: Stefan Berger S: Maintained F: tpm.c -F: stubs/tpm.c F: hw/tpm/* F: include/hw/acpi/tpm.h F: include/sysemu/tpm* @@ -2786,7 +2927,6 @@ F: tests/unit/test-authz-* Sockets M: Daniel P. Berrange -M: Gerd Hoffmann S: Maintained F: include/qemu/sockets.h F: util/qemu-sockets.c @@ -2854,8 +2994,9 @@ F: net/filter-replay.c F: include/sysemu/replay.h F: docs/replay.txt F: stubs/replay.c -F: tests/acceptance/replay_kernel.py -F: tests/acceptance/reverse_debugging.py +F: tests/avocado/replay_kernel.py +F: tests/avocado/replay_linux.py +F: tests/avocado/reverse_debugging.py F: qapi/replay.json IOVA Tree @@ -2882,7 +3023,6 @@ F: include/hw/i2c/smbus_slave.h F: include/hw/i2c/smbus_eeprom.h Firmware schema specifications -M: Laszlo Ersek M: Philippe Mathieu-Daudé R: Daniel P. Berrange R: Kashyap Chamarthy @@ -2890,9 +3030,10 @@ S: Maintained F: docs/interop/firmware.json EDK2 Firmware -M: Laszlo Ersek M: Philippe Mathieu-Daudé +R: Gerd Hoffmann S: Supported +F: hw/i386/*ovmf* F: pc-bios/descriptors/??-edk2-*.json F: pc-bios/edk2-* F: roms/Makefile.edk2 @@ -2943,14 +3084,14 @@ M: Warner Losh R: Kyle Evans S: Maintained F: bsd-user/ -F: default-configs/targets/*-bsd-user.mak +F: configs/targets/*-bsd-user.mak T: git https://github.com/qemu-bsd-user/qemu-bsd-user bsd-user-rebase-3.1 Linux user M: Laurent Vivier S: Maintained F: linux-user/ -F: default-configs/targets/*linux-user.mak +F: configs/targets/*linux-user.mak F: scripts/qemu-binfmt-conf.sh F: scripts/update-syscalltbl.sh F: scripts/update-mips-syscall-args.sh @@ -2966,11 +3107,13 @@ F: include/tcg/ TCG Plugins M: Alex Bennée +R: Alexandre Iooss +R: Mahmoud Mandour S: Maintained F: docs/devel/tcg-plugins.rst F: plugins/ F: tests/plugin/ -F: tests/acceptance/tcg_plugins.py +F: tests/avocado/tcg_plugins.py F: contrib/plugins/ AArch64 TCG target @@ -2982,7 +3125,7 @@ F: disas/arm-a64.cc F: disas/libvixl/ ARM TCG target -M: Andrzej Zaborowski +M: Richard Henderson S: Maintained L: qemu-arm@nongnu.org F: tcg/arm/ @@ -3045,17 +3188,12 @@ S: Supported F: block/vmdk.c RBD -M: Jason Dillaman +M: Ilya Dryomov +R: Peter Lieven L: qemu-block@nongnu.org S: Supported F: block/rbd.c -Sheepdog -M: Liu Yuan -L: qemu-block@nongnu.org -S: Odd Fixes -F: block/sheepdog.c - VHDX M: Jeff Cody L: qemu-block@nongnu.org @@ -3124,6 +3262,7 @@ F: block/null.c NVMe Block Driver M: Stefan Hajnoczi R: Fam Zheng +R: Philippe Mathieu-Daudé L: qemu-block@nongnu.org S: Supported F: block/nvme* @@ -3203,6 +3342,7 @@ Linux io_uring M: Aarushi Mehta M: Julia Suvorova M: Stefan Hajnoczi +R: Stefano Garzarella L: qemu-block@nongnu.org S: Maintained F: block/io_uring.c @@ -3210,7 +3350,7 @@ F: stubs/io_uring.c qcow2 M: Kevin Wolf -M: Max Reitz +M: Hanna Reitz L: qemu-block@nongnu.org S: Supported F: block/qcow2* @@ -3224,7 +3364,7 @@ F: block/qcow.c blkdebug M: Kevin Wolf -M: Max Reitz +M: Hanna Reitz L: qemu-block@nongnu.org S: Supported F: block/blkdebug.c @@ -3254,10 +3394,12 @@ F: block/export/vhost-user-blk-server.c F: block/export/vhost-user-blk-server.h F: include/qemu/vhost-user-server.h F: tests/qtest/libqos/vhost-user-blk.c +F: tests/qtest/libqos/vhost-user-blk.h +F: tests/qtest/vhost-user-blk-test.c F: util/vhost-user-server.c FUSE block device exports -M: Max Reitz +M: Hanna Reitz L: qemu-block@nongnu.org S: Supported F: block/export/fuse.c @@ -3312,6 +3454,14 @@ F: include/hw/remote/proxy-memory-listener.h F: hw/remote/iohub.c F: include/hw/remote/iohub.h +EBPF: +M: Jason Wang +R: Andrew Melnychenko +R: Yuri Benditovich +S: Maintained +F: ebpf/* +F: tools/ebpf/* + Build and test automation ------------------------- Build and test automation, general continuous integration @@ -3319,7 +3469,7 @@ M: Alex Bennée M: Philippe Mathieu-Daudé M: Thomas Huth R: Wainer dos Santos Moschetta -R: Willian Rampazzo +R: Beraldo Leal S: Maintained F: .github/lockdown.yml F: .gitlab-ci.yml @@ -3352,20 +3502,27 @@ S: Maintained F: tests/tcg/Makefile F: tests/tcg/Makefile.include -Acceptance (Integration) Testing with the Avocado framework +Integration Testing with the Avocado framework W: https://trello.com/b/6Qi1pxVn/avocado-qemu R: Cleber Rosa R: Philippe Mathieu-Daudé R: Wainer dos Santos Moschetta +R: Beraldo Leal S: Odd Fixes -F: tests/acceptance/ +F: tests/avocado/ + +GitLab custom runner (Works On Arm Sponsored) +M: Alex Bennée +M: Philippe Mathieu-Daudé +S: Maintained +F: .gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml Documentation ------------- Build system architecture M: Daniel P. Berrange S: Odd Fixes -F: docs/devel/build-system.txt +F: docs/devel/build-system.rst GIT Data Mining Config M: Alex Bennée @@ -3375,7 +3532,7 @@ F: contrib/gitdm/* Incompatible changes R: libvir-list@redhat.com -F: docs/system/deprecated.rst +F: docs/about/deprecated.rst Build System ------------ @@ -3394,6 +3551,7 @@ S: Maintained F: docs/conf.py F: docs/*/conf.py F: docs/sphinx/ +F: docs/_templates/ Miscellaneous ------------- diff --git a/Makefile b/Makefile index bcbbec71a1c..74c5b46d38b 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ SRC_PATH=. # we have explicit rules for everything MAKEFLAGS += -rR -SHELL = /usr/bin/env bash -o pipefail +SHELL = bash -o pipefail # Usage: $(call quiet-command,command and args,"NAME","args to print") # This will run "command and args", and either: @@ -48,9 +48,11 @@ Makefile: .git-submodule-status .PHONY: git-submodule-update git-submodule-update: +ifneq ($(GIT_SUBMODULES_ACTION),ignore) $(call quiet-command, \ (GIT="$(GIT)" "$(SRC_PATH)/scripts/git-submodule.sh" $(GIT_SUBMODULES_ACTION) $(GIT_SUBMODULES)), \ "GIT","$(GIT_SUBMODULES)") +endif # 0. ensure the build tree is okay @@ -85,7 +87,7 @@ x := $(shell rm -rf meson-private meson-info meson-logs) endif # 1. ensure config-host.mak is up-to-date -config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION +config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/scripts/meson-buildoptions.sh $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION @echo config-host.mak is out-of-date, running configure @if test -f meson-private/coredata.dat; then \ ./config.status --skip-meson; \ @@ -122,14 +124,22 @@ ifneq ($(MESON),) Makefile.mtest: build.ninja scripts/mtest2make.py $(MESON) introspect --targets --tests --benchmarks | $(PYTHON) scripts/mtest2make.py > $@ -include Makefile.mtest + +.PHONY: update-buildoptions +all update-buildoptions: $(SRC_PATH)/scripts/meson-buildoptions.sh +$(SRC_PATH)/scripts/meson-buildoptions.sh: $(SRC_PATH)/meson_options.txt + $(MESON) introspect --buildoptions $(SRC_PATH)/meson.build | $(PYTHON) \ + scripts/meson-buildoptions.py > $@.tmp && mv $@.tmp $@ endif # 4. Rules to bridge to other makefiles ifneq ($(NINJA),) -MAKE.n = $(findstring n,$(firstword $(MAKEFLAGS))) -MAKE.k = $(findstring k,$(firstword $(MAKEFLAGS))) -MAKE.q = $(findstring q,$(firstword $(MAKEFLAGS))) +# Filter out long options to avoid flags like --no-print-directory which +# may result in false positive match for MAKE.n +MAKE.n = $(findstring n,$(firstword $(filter-out --%,$(MAKEFLAGS)))) +MAKE.k = $(findstring k,$(firstword $(filter-out --%,$(MAKEFLAGS)))) +MAKE.q = $(findstring q,$(firstword $(filter-out --%,$(MAKEFLAGS)))) MAKE.nq = $(if $(word 2, $(MAKE.n) $(MAKE.q)),nq) NINJAFLAGS = $(if $V,-v) $(if $(MAKE.n), -n) $(if $(MAKE.k), -k0) \ $(filter-out -j, $(lastword -j1 $(filter -l% -j%, $(MAKEFLAGS)))) \ @@ -213,7 +223,7 @@ qemu-%.tar.bz2: distclean: clean -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || : - rm -f config-host.mak config-host.h* + rm -f config-host.mak config-host.h* config-poison.h rm -f tests/tcg/config-*.mak rm -f config-all-disas.mak config.status rm -f roms/seabios/config.mak roms/vgabios/config.mak @@ -225,7 +235,8 @@ distclean: clean rm -f linux-headers/asm rm -Rf .sdk -find-src-path = find "$(SRC_PATH)/" -path "$(SRC_PATH)/meson" -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \) +find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \ + -type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \) .PHONY: ctags ctags: @@ -246,7 +257,7 @@ gtags: "GTAGS", "Remove old $@ files") $(call quiet-command, \ (cd $(SRC_PATH) && \ - $(find-src-path) | gtags -f -), \ + $(find-src-path) -print | gtags -f -), \ "GTAGS", "Re-index $(SRC_PATH)") .PHONY: TAGS diff --git a/README.rst b/README.rst index 8fce5774685..668475b8224 100644 --- a/README.rst +++ b/README.rst @@ -82,9 +82,9 @@ of other UNIX targets. The simple steps to build QEMU are: Additional information can also be found online via the QEMU website: -* ``_ -* ``_ -* ``_ +* ``_ +* ``_ +* ``_ Submitting patches @@ -107,8 +107,8 @@ the Developers Guide. Additional information on submitting patches can be found online via the QEMU website -* ``_ -* ``_ +* ``_ +* ``_ The QEMU website is also maintained under source control. @@ -154,20 +154,20 @@ will be tagged as my-feature-v2. Bug reporting ============= -The QEMU project uses Launchpad as its primary upstream bug tracker. Bugs +The QEMU project uses GitLab issues to track bugs. Bugs found when running code built from QEMU git or upstream released sources should be reported via: -* ``_ +* ``_ If using QEMU via an operating system vendor pre-built binary package, it is preferable to report bugs to the vendor's own bug tracker first. If the bug is also known to affect latest upstream code, it can also be -reported via launchpad. +reported via GitLab. For additional information on bug reporting consult: -* ``_ +* ``_ ChangeLog @@ -191,4 +191,4 @@ main methods being email and IRC Information on additional methods of contacting the community can be found online via the QEMU website: -* ``_ +* ``_ diff --git a/VERSION b/VERSION index 09b254e90c6..6abaeb2f907 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.0.0 +6.2.0 diff --git a/accel/Kconfig b/accel/Kconfig index 461104c7715..8bdedb7d15f 100644 --- a/accel/Kconfig +++ b/accel/Kconfig @@ -1,6 +1,9 @@ config WHPX bool +config NVMM + bool + config HAX bool diff --git a/accel/accel-common.c b/accel/accel-common.c index 9901b0531c4..7b8ec7e0f72 100644 --- a/accel/accel-common.c +++ b/accel/accel-common.c @@ -44,7 +44,7 @@ static const TypeInfo accel_type = { AccelClass *accel_find(const char *opt_name) { char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name); - AccelClass *ac = ACCEL_CLASS(object_class_by_name(class_name)); + AccelClass *ac = ACCEL_CLASS(module_object_class_by_name(class_name)); g_free(class_name); return ac; } @@ -54,10 +54,23 @@ static void accel_init_cpu_int_aux(ObjectClass *klass, void *opaque) CPUClass *cc = CPU_CLASS(klass); AccelCPUClass *accel_cpu = opaque; + /* + * The first callback allows accel-cpu to run initializations + * for the CPU, customizing CPU behavior according to the accelerator. + * + * The second one allows the CPU to customize the accel-cpu + * behavior according to the CPU. + * + * The second is currently only used by TCG, to specialize the + * TCGCPUOps depending on the CPU type. + */ cc->accel_cpu = accel_cpu; if (accel_cpu->cpu_class_init) { accel_cpu->cpu_class_init(cc); } + if (cc->init_accel_cpu) { + cc->init_accel_cpu(accel_cpu, cc); + } } /* initialize the arch-specific accel CpuClass interfaces */ @@ -89,6 +102,25 @@ void accel_init_interfaces(AccelClass *ac) accel_init_cpu_interfaces(ac); } +void accel_cpu_instance_init(CPUState *cpu) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->accel_cpu && cc->accel_cpu->cpu_instance_init) { + cc->accel_cpu->cpu_instance_init(cpu); + } +} + +bool accel_cpu_realizefn(CPUState *cpu, Error **errp) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->accel_cpu && cc->accel_cpu->cpu_realizefn) { + return cc->accel_cpu->cpu_realizefn(cpu, errp); + } + return true; +} + static const TypeInfo accel_cpu_type = { .name = TYPE_ACCEL_CPU, .parent = TYPE_OBJECT, diff --git a/accel/accel-softmmu.c b/accel/accel-softmmu.c index 50fa5acaa40..67276e4f522 100644 --- a/accel/accel-softmmu.c +++ b/accel/accel-softmmu.c @@ -72,7 +72,7 @@ void accel_init_ops_interfaces(AccelClass *ac) g_assert(ac_name != NULL); ops_name = g_strdup_printf("%s" ACCEL_OPS_SUFFIX, ac_name); - ops = ACCEL_OPS_CLASS(object_class_by_name(ops_name)); + ops = ACCEL_OPS_CLASS(module_object_class_by_name(ops_name)); g_free(ops_name); /* diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c new file mode 100644 index 00000000000..54457c76c2f --- /dev/null +++ b/accel/hvf/hvf-accel-ops.c @@ -0,0 +1,488 @@ +/* + * Copyright 2008 IBM Corporation + * 2008 Red Hat, Inc. + * Copyright 2011 Intel Corporation + * Copyright 2016 Veertu, Inc. + * Copyright 2017 The Android Open Source Project + * + * QEMU Hypervisor.framework support + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * This file contain code under public domain from the hvdos project: + * https://github.com/mist64/hvdos + * + * Parts Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "exec/address-spaces.h" +#include "exec/exec-all.h" +#include "sysemu/cpus.h" +#include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" +#include "sysemu/runstate.h" +#include "qemu/guest-random.h" + +HVFState *hvf_state; + +#ifdef __aarch64__ +#define HV_VM_DEFAULT NULL +#endif + +/* Memory slots */ + +hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) +{ + hvf_slot *slot; + int x; + for (x = 0; x < hvf_state->num_slots; ++x) { + slot = &hvf_state->slots[x]; + if (slot->size && start < (slot->start + slot->size) && + (start + size) > slot->start) { + return slot; + } + } + return NULL; +} + +struct mac_slot { + int present; + uint64_t size; + uint64_t gpa_start; + uint64_t gva; +}; + +struct mac_slot mac_slots[32]; + +static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) +{ + struct mac_slot *macslot; + hv_return_t ret; + + macslot = &mac_slots[slot->slot_id]; + + if (macslot->present) { + if (macslot->size != slot->size) { + macslot->present = 0; + ret = hv_vm_unmap(macslot->gpa_start, macslot->size); + assert_hvf_ok(ret); + } + } + + if (!slot->size) { + return 0; + } + + macslot->present = 1; + macslot->gpa_start = slot->start; + macslot->size = slot->size; + ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); + assert_hvf_ok(ret); + return 0; +} + +static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) +{ + hvf_slot *mem; + MemoryRegion *area = section->mr; + bool writeable = !area->readonly && !area->rom_device; + hv_memory_flags_t flags; + uint64_t page_size = qemu_real_host_page_size; + + if (!memory_region_is_ram(area)) { + if (writeable) { + return; + } else if (!memory_region_is_romd(area)) { + /* + * If the memory device is not in romd_mode, then we actually want + * to remove the hvf memory slot so all accesses will trap. + */ + add = false; + } + } + + if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) || + !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) { + /* Not page aligned, so we can not map as RAM */ + add = false; + } + + mem = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + if (mem && add) { + if (mem->size == int128_get64(section->size) && + mem->start == section->offset_within_address_space && + mem->mem == (memory_region_get_ram_ptr(area) + + section->offset_within_region)) { + return; /* Same region was attempted to register, go away. */ + } + } + + /* Region needs to be reset. set the size to 0 and remap it. */ + if (mem) { + mem->size = 0; + if (do_hvf_set_memory(mem, 0)) { + error_report("Failed to reset overlapping slot"); + abort(); + } + } + + if (!add) { + return; + } + + if (area->readonly || + (!memory_region_is_ram(area) && memory_region_is_romd(area))) { + flags = HV_MEMORY_READ | HV_MEMORY_EXEC; + } else { + flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; + } + + /* Now make a new slot. */ + int x; + + for (x = 0; x < hvf_state->num_slots; ++x) { + mem = &hvf_state->slots[x]; + if (!mem->size) { + break; + } + } + + if (x == hvf_state->num_slots) { + error_report("No free slots"); + abort(); + } + + mem->size = int128_get64(section->size); + mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; + mem->start = section->offset_within_address_space; + mem->region = area; + + if (do_hvf_set_memory(mem, flags)) { + error_report("Error registering new memory slot"); + abort(); + } +} + +static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) +{ + if (!cpu->vcpu_dirty) { + hvf_get_registers(cpu); + cpu->vcpu_dirty = true; + } +} + +static void hvf_cpu_synchronize_state(CPUState *cpu) +{ + if (!cpu->vcpu_dirty) { + run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); + } +} + +static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu, + run_on_cpu_data arg) +{ + /* QEMU state is the reference, push it to HVF now and on next entry */ + cpu->vcpu_dirty = true; +} + +static void hvf_cpu_synchronize_post_reset(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); +} + +static void hvf_cpu_synchronize_post_init(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); +} + +static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); +} + +static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) +{ + hvf_slot *slot; + + slot = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + /* protect region against writes; begin tracking it */ + if (on) { + slot->flags |= HVF_SLOT_LOG; + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ | HV_MEMORY_EXEC); + /* stop tracking region*/ + } else { + slot->flags &= ~HVF_SLOT_LOG; + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); + } +} + +static void hvf_log_start(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) +{ + if (old != 0) { + return; + } + + hvf_set_dirty_tracking(section, 1); +} + +static void hvf_log_stop(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) +{ + if (new != 0) { + return; + } + + hvf_set_dirty_tracking(section, 0); +} + +static void hvf_log_sync(MemoryListener *listener, + MemoryRegionSection *section) +{ + /* + * sync of dirty pages is handled elsewhere; just make sure we keep + * tracking the region. + */ + hvf_set_dirty_tracking(section, 1); +} + +static void hvf_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, true); +} + +static void hvf_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, false); +} + +static MemoryListener hvf_memory_listener = { + .name = "hvf", + .priority = 10, + .region_add = hvf_region_add, + .region_del = hvf_region_del, + .log_start = hvf_log_start, + .log_stop = hvf_log_stop, + .log_sync = hvf_log_sync, +}; + +static void dummy_signal(int sig) +{ +} + +bool hvf_allowed; + +static int hvf_accel_init(MachineState *ms) +{ + int x; + hv_return_t ret; + HVFState *s; + + ret = hv_vm_create(HV_VM_DEFAULT); + assert_hvf_ok(ret); + + s = g_new0(HVFState, 1); + + s->num_slots = ARRAY_SIZE(s->slots); + for (x = 0; x < s->num_slots; ++x) { + s->slots[x].size = 0; + s->slots[x].slot_id = x; + } + + hvf_state = s; + memory_listener_register(&hvf_memory_listener, &address_space_memory); + + return hvf_arch_init(); +} + +static void hvf_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "HVF"; + ac->init_machine = hvf_accel_init; + ac->allowed = &hvf_allowed; +} + +static const TypeInfo hvf_accel_type = { + .name = TYPE_HVF_ACCEL, + .parent = TYPE_ACCEL, + .class_init = hvf_accel_class_init, +}; + +static void hvf_type_init(void) +{ + type_register_static(&hvf_accel_type); +} + +type_init(hvf_type_init); + +static void hvf_vcpu_destroy(CPUState *cpu) +{ + hv_return_t ret = hv_vcpu_destroy(cpu->hvf->fd); + assert_hvf_ok(ret); + + hvf_arch_vcpu_destroy(cpu); + g_free(cpu->hvf); + cpu->hvf = NULL; +} + +static int hvf_init_vcpu(CPUState *cpu) +{ + int r; + + cpu->hvf = g_malloc0(sizeof(*cpu->hvf)); + + /* init cpu signals */ + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = dummy_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask); + sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI); + +#ifdef __aarch64__ + r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL); +#else + r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf->fd, HV_VCPU_DEFAULT); +#endif + cpu->vcpu_dirty = 1; + assert_hvf_ok(r); + + return hvf_arch_init_vcpu(cpu); +} + +/* + * The HVF-specific vCPU thread function. This one should only run when the host + * CPU supports the VMX "unrestricted guest" feature. + */ +static void *hvf_cpu_thread_fn(void *arg) +{ + CPUState *cpu = arg; + + int r; + + assert(hvf_enabled()); + + rcu_register_thread(); + + qemu_mutex_lock_iothread(); + qemu_thread_get_self(cpu->thread); + + cpu->thread_id = qemu_get_thread_id(); + cpu->can_do_io = 1; + current_cpu = cpu; + + hvf_init_vcpu(cpu); + + /* signal CPU creation */ + cpu_thread_signal_created(cpu); + qemu_guest_random_seed_thread_part2(cpu->random_seed); + + do { + if (cpu_can_run(cpu)) { + r = hvf_vcpu_exec(cpu); + if (r == EXCP_DEBUG) { + cpu_handle_guest_debug(cpu); + } + } + qemu_wait_io_event(cpu); + } while (!cpu->unplug || cpu_can_run(cpu)); + + hvf_vcpu_destroy(cpu); + cpu_thread_signal_destroyed(cpu); + qemu_mutex_unlock_iothread(); + rcu_unregister_thread(); + return NULL; +} + +static void hvf_start_vcpu_thread(CPUState *cpu) +{ + char thread_name[VCPU_THREAD_NAME_SIZE]; + + /* + * HVF currently does not support TCG, and only runs in + * unrestricted-guest mode. + */ + assert(hvf_enabled()); + + cpu->thread = g_malloc0(sizeof(QemuThread)); + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); + qemu_cond_init(cpu->halt_cond); + + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF", + cpu->cpu_index); + qemu_thread_create(cpu->thread, thread_name, hvf_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); +} + +static void hvf_accel_ops_class_init(ObjectClass *oc, void *data) +{ + AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); + + ops->create_vcpu_thread = hvf_start_vcpu_thread; + ops->kick_vcpu_thread = hvf_kick_vcpu_thread; + + ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset; + ops->synchronize_post_init = hvf_cpu_synchronize_post_init; + ops->synchronize_state = hvf_cpu_synchronize_state; + ops->synchronize_pre_loadvm = hvf_cpu_synchronize_pre_loadvm; +}; +static const TypeInfo hvf_accel_ops_type = { + .name = ACCEL_OPS_NAME("hvf"), + + .parent = TYPE_ACCEL_OPS, + .class_init = hvf_accel_ops_class_init, + .abstract = true, +}; +static void hvf_accel_ops_register_types(void) +{ + type_register_static(&hvf_accel_ops_type); +} +type_init(hvf_accel_ops_register_types); diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c new file mode 100644 index 00000000000..f185b0830a7 --- /dev/null +++ b/accel/hvf/hvf-all.c @@ -0,0 +1,47 @@ +/* + * QEMU Hypervisor.framework support + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" + +void assert_hvf_ok(hv_return_t ret) +{ + if (ret == HV_SUCCESS) { + return; + } + + switch (ret) { + case HV_ERROR: + error_report("Error: HV_ERROR"); + break; + case HV_BUSY: + error_report("Error: HV_BUSY"); + break; + case HV_BAD_ARGUMENT: + error_report("Error: HV_BAD_ARGUMENT"); + break; + case HV_NO_RESOURCES: + error_report("Error: HV_NO_RESOURCES"); + break; + case HV_NO_DEVICE: + error_report("Error: HV_NO_DEVICE"); + break; + case HV_UNSUPPORTED: + error_report("Error: HV_UNSUPPORTED"); + break; + default: + error_report("Unknown Error"); + } + + abort(); +} diff --git a/accel/hvf/meson.build b/accel/hvf/meson.build new file mode 100644 index 00000000000..fc52cb78433 --- /dev/null +++ b/accel/hvf/meson.build @@ -0,0 +1,7 @@ +hvf_ss = ss.source_set() +hvf_ss.add(files( + 'hvf-all.c', + 'hvf-accel-ops.c', +)) + +specific_ss.add_all(when: 'CONFIG_HVF', if_true: hvf_ss) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index b6d9f92f151..eecd8031cf6 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -15,6 +15,7 @@ #include "qemu/osdep.h" #include +#include #include @@ -30,11 +31,9 @@ #include "sysemu/kvm_int.h" #include "sysemu/runstate.h" #include "sysemu/cpus.h" -#include "sysemu/sysemu.h" #include "qemu/bswap.h" #include "exec/memory.h" #include "exec/ram_addr.h" -#include "exec/address-spaces.h" #include "qemu/event_notifier.h" #include "qemu/main-loop.h" #include "trace.h" @@ -80,6 +79,25 @@ struct KVMParkedVcpu { QLIST_ENTRY(KVMParkedVcpu) node; }; +enum KVMDirtyRingReaperState { + KVM_DIRTY_RING_REAPER_NONE = 0, + /* The reaper is sleeping */ + KVM_DIRTY_RING_REAPER_WAIT, + /* The reaper is reaping for dirty pages */ + KVM_DIRTY_RING_REAPER_REAPING, +}; + +/* + * KVM reaper instance, responsible for collecting the KVM dirty bits + * via the dirty ring. + */ +struct KVMDirtyRingReaper { + /* The reaper thread */ + QemuThread reaper_thr; + volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ + volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ +}; + struct KVMState { AccelState parent_obj; @@ -128,6 +146,9 @@ struct KVMState KVMMemoryListener *ml; AddressSpace *as; } *as; + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + struct KVMDirtyRingReaper reaper; }; KVMState *kvm_state; @@ -174,8 +195,12 @@ typedef struct KVMResampleFd KVMResampleFd; static QLIST_HEAD(, KVMResampleFd) kvm_resample_fd_list = QLIST_HEAD_INITIALIZER(kvm_resample_fd_list); -#define kvm_slots_lock(kml) qemu_mutex_lock(&(kml)->slots_lock) -#define kvm_slots_unlock(kml) qemu_mutex_unlock(&(kml)->slots_lock) +static QemuMutex kml_slots_lock; + +#define kvm_slots_lock() qemu_mutex_lock(&kml_slots_lock) +#define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) + +static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); static inline void kvm_resample_fd_remove(int gsi) { @@ -241,9 +266,9 @@ bool kvm_has_free_slot(MachineState *ms) bool result; KVMMemoryListener *kml = &s->memory_listener; - kvm_slots_lock(kml); + kvm_slots_lock(); result = !!kvm_get_free_slot(kml); - kvm_slots_unlock(kml); + kvm_slots_unlock(); return result; } @@ -309,7 +334,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, KVMMemoryListener *kml = &s->memory_listener; int i, ret = 0; - kvm_slots_lock(kml); + kvm_slots_lock(); for (i = 0; i < s->nr_slots; i++) { KVMSlot *mem = &kml->slots[i]; @@ -319,7 +344,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, break; } } - kvm_slots_unlock(kml); + kvm_slots_unlock(); return ret; } @@ -385,6 +410,13 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) goto err; } + if (cpu->kvm_dirty_gfns) { + ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes); + if (ret < 0) { + goto err; + } + } + vcpu = g_malloc0(sizeof(*vcpu)); vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); vcpu->kvm_fd = cpu->kvm_fd; @@ -437,6 +469,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) cpu->kvm_fd = ret; cpu->kvm_state = s; cpu->vcpu_dirty = true; + cpu->dirty_pages = 0; mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { @@ -461,6 +494,19 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE; } + if (s->kvm_dirty_ring_size) { + /* Use MAP_SHARED to share pages with the kernel */ + cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes, + PROT_READ | PROT_WRITE, MAP_SHARED, + cpu->kvm_fd, + PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET); + if (cpu->kvm_dirty_gfns == MAP_FAILED) { + ret = -errno; + DPRINTF("mmap'ing vcpu dirty gfns failed: %d\n", ret); + goto err; + } + } + ret = kvm_arch_init_vcpu(cpu); if (ret < 0) { error_setg_errno(errp, -ret, @@ -500,6 +546,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, return 0; } + kvm_slot_init_dirty_bitmap(mem); return kvm_set_user_memory_region(kml, mem, false); } @@ -515,7 +562,7 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, return 0; } - kvm_slots_lock(kml); + kvm_slots_lock(); while (size && !ret) { slot_size = MIN(kvm_max_slot_size, size); @@ -531,7 +578,7 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, } out: - kvm_slots_unlock(kml); + kvm_slots_unlock(); return ret; } @@ -570,22 +617,28 @@ static void kvm_log_stop(MemoryListener *listener, } /* get kvm's dirty pages bitmap and update qemu's */ -static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, - unsigned long *bitmap) +static void kvm_slot_sync_dirty_pages(KVMSlot *slot) { - ram_addr_t start = section->offset_within_region + - memory_region_get_ram_addr(section->mr); - ram_addr_t pages = int128_get64(section->size) / qemu_real_host_page_size; + ram_addr_t start = slot->ram_start_offset; + ram_addr_t pages = slot->memory_size / qemu_real_host_page_size; - cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages); - return 0; + cpu_physical_memory_set_dirty_lebitmap(slot->dirty_bmap, start, pages); +} + +static void kvm_slot_reset_dirty_pages(KVMSlot *slot) +{ + memset(slot->dirty_bmap, 0, slot->dirty_bmap_size); } #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) /* Allocate the dirty bitmap for a slot */ -static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) +static void kvm_slot_init_dirty_bitmap(KVMSlot *mem) { + if (!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) || mem->dirty_bmap) { + return; + } + /* * XXX bad kernel interface alert * For dirty bitmap, kernel allocates array of size aligned to @@ -606,6 +659,197 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) hwaddr bitmap_size = ALIGN(mem->memory_size / qemu_real_host_page_size, /*HOST_LONG_BITS*/ 64) / 8; mem->dirty_bmap = g_malloc0(bitmap_size); + mem->dirty_bmap_size = bitmap_size; +} + +/* + * Sync dirty bitmap from kernel to KVMSlot.dirty_bmap, return true if + * succeeded, false otherwise + */ +static bool kvm_slot_get_dirty_log(KVMState *s, KVMSlot *slot) +{ + struct kvm_dirty_log d = {}; + int ret; + + d.dirty_bitmap = slot->dirty_bmap; + d.slot = slot->slot | (slot->as_id << 16); + ret = kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d); + + if (ret == -ENOENT) { + /* kernel does not have dirty bitmap in this slot */ + ret = 0; + } + if (ret) { + error_report_once("%s: KVM_GET_DIRTY_LOG failed with %d", + __func__, ret); + } + return ret == 0; +} + +/* Should be with all slots_lock held for the address spaces. */ +static void kvm_dirty_ring_mark_page(KVMState *s, uint32_t as_id, + uint32_t slot_id, uint64_t offset) +{ + KVMMemoryListener *kml; + KVMSlot *mem; + + if (as_id >= s->nr_as) { + return; + } + + kml = s->as[as_id].ml; + mem = &kml->slots[slot_id]; + + if (!mem->memory_size || offset >= + (mem->memory_size / qemu_real_host_page_size)) { + return; + } + + set_bit(offset, mem->dirty_bmap); +} + +static bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn) +{ + return gfn->flags == KVM_DIRTY_GFN_F_DIRTY; +} + +static void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) +{ + gfn->flags = KVM_DIRTY_GFN_F_RESET; +} + +/* + * Should be with all slots_lock held for the address spaces. It returns the + * dirty page we've collected on this dirty ring. + */ +static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu) +{ + struct kvm_dirty_gfn *dirty_gfns = cpu->kvm_dirty_gfns, *cur; + uint32_t ring_size = s->kvm_dirty_ring_size; + uint32_t count = 0, fetch = cpu->kvm_fetch_index; + + assert(dirty_gfns && ring_size); + trace_kvm_dirty_ring_reap_vcpu(cpu->cpu_index); + + while (true) { + cur = &dirty_gfns[fetch % ring_size]; + if (!dirty_gfn_is_dirtied(cur)) { + break; + } + kvm_dirty_ring_mark_page(s, cur->slot >> 16, cur->slot & 0xffff, + cur->offset); + dirty_gfn_set_collected(cur); + trace_kvm_dirty_ring_page(cpu->cpu_index, fetch, cur->offset); + fetch++; + count++; + } + cpu->kvm_fetch_index = fetch; + cpu->dirty_pages += count; + + return count; +} + +/* Must be with slots_lock held */ +static uint64_t kvm_dirty_ring_reap_locked(KVMState *s) +{ + int ret; + CPUState *cpu; + uint64_t total = 0; + int64_t stamp; + + stamp = get_clock(); + + CPU_FOREACH(cpu) { + total += kvm_dirty_ring_reap_one(s, cpu); + } + + if (total) { + ret = kvm_vm_ioctl(s, KVM_RESET_DIRTY_RINGS); + assert(ret == total); + } + + stamp = get_clock() - stamp; + + if (total) { + trace_kvm_dirty_ring_reap(total, stamp / 1000); + } + + return total; +} + +/* + * Currently for simplicity, we must hold BQL before calling this. We can + * consider to drop the BQL if we're clear with all the race conditions. + */ +static uint64_t kvm_dirty_ring_reap(KVMState *s) +{ + uint64_t total; + + /* + * We need to lock all kvm slots for all address spaces here, + * because: + * + * (1) We need to mark dirty for dirty bitmaps in multiple slots + * and for tons of pages, so it's better to take the lock here + * once rather than once per page. And more importantly, + * + * (2) We must _NOT_ publish dirty bits to the other threads + * (e.g., the migration thread) via the kvm memory slot dirty + * bitmaps before correctly re-protect those dirtied pages. + * Otherwise we can have potential risk of data corruption if + * the page data is read in the other thread before we do + * reset below. + */ + kvm_slots_lock(); + total = kvm_dirty_ring_reap_locked(s); + kvm_slots_unlock(); + + return total; +} + +static void do_kvm_cpu_synchronize_kick(CPUState *cpu, run_on_cpu_data arg) +{ + /* No need to do anything */ +} + +/* + * Kick all vcpus out in a synchronized way. When returned, we + * guarantee that every vcpu has been kicked and at least returned to + * userspace once. + */ +static void kvm_cpu_synchronize_kick_all(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + run_on_cpu(cpu, do_kvm_cpu_synchronize_kick, RUN_ON_CPU_NULL); + } +} + +/* + * Flush all the existing dirty pages to the KVM slot buffers. When + * this call returns, we guarantee that all the touched dirty pages + * before calling this function have been put into the per-kvmslot + * dirty bitmap. + * + * This function must be called with BQL held. + */ +static void kvm_dirty_ring_flush(void) +{ + trace_kvm_dirty_ring_flush(0); + /* + * The function needs to be serialized. Since this function + * should always be with BQL held, serialization is guaranteed. + * However, let's be sure of it. + */ + assert(qemu_mutex_iothread_locked()); + /* + * First make sure to flush the hardware buffers by kicking all + * vcpus out in a synchronous way. + */ + kvm_cpu_synchronize_kick_all(); + kvm_dirty_ring_reap(kvm_state); + trace_kvm_dirty_ring_flush(1); } /** @@ -619,53 +863,28 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) * @kml: the KVM memory listener object * @section: the memory section to sync the dirty bitmap with */ -static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - MemoryRegionSection *section) +static void kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + MemoryRegionSection *section) { KVMState *s = kvm_state; - struct kvm_dirty_log d = {}; KVMSlot *mem; hwaddr start_addr, size; - hwaddr slot_size, slot_offset = 0; - int ret = 0; + hwaddr slot_size; size = kvm_align_section(section, &start_addr); while (size) { - MemoryRegionSection subsection = *section; - slot_size = MIN(kvm_max_slot_size, size); mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); if (!mem) { /* We don't have a slot if we want to trap every access. */ - goto out; - } - - if (!mem->dirty_bmap) { - /* Allocate on the first log_sync, once and for all */ - kvm_memslot_init_dirty_bitmap(mem); + return; } - - d.dirty_bitmap = mem->dirty_bmap; - d.slot = mem->slot | (kml->as_id << 16); - ret = kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d); - if (ret == -ENOENT) { - /* kernel does not have dirty bitmap in this slot */ - ret = 0; - } else if (ret < 0) { - error_report("ioctl KVM_GET_DIRTY_LOG failed: %d", errno); - goto out; - } else { - subsection.offset_within_region += slot_offset; - subsection.size = int128_make64(slot_size); - kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap); + if (kvm_slot_get_dirty_log(s, mem)) { + kvm_slot_sync_dirty_pages(mem); } - - slot_offset += slot_size; start_addr += slot_size; size -= slot_size; } -out: - return ret; } /* Alignment requirement for KVM_CLEAR_DIRTY_LOG - 64 pages */ @@ -812,7 +1031,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml, return ret; } - kvm_slots_lock(kml); + kvm_slots_lock(); for (i = 0; i < s->nr_slots; i++) { mem = &kml->slots[i]; @@ -838,7 +1057,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml, } } - kvm_slots_unlock(kml); + kvm_slots_unlock(); return ret; } @@ -912,6 +1131,7 @@ static void kvm_coalesce_pio_del(MemoryListener *listener, } static MemoryListener kvm_coalesced_pio_listener = { + .name = "kvm-coalesced-pio", .coalesced_io_add = kvm_coalesce_pio_add, .coalesced_io_del = kvm_coalesce_pio_del, }; @@ -1121,7 +1341,8 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, int err; MemoryRegion *mr = section->mr; bool writeable = !mr->readonly && !mr->rom_device; - hwaddr start_addr, size, slot_size; + hwaddr start_addr, size, slot_size, mr_offset; + ram_addr_t ram_start_offset; void *ram; if (!memory_region_is_ram(mr)) { @@ -1139,11 +1360,15 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, return; } - /* use aligned delta to align the ram address */ - ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + - (start_addr - section->offset_within_address_space); + /* The offset of the kvmslot within the memory region */ + mr_offset = section->offset_within_region + start_addr - + section->offset_within_address_space; + + /* use aligned delta to align the ram address and offset */ + ram = memory_region_get_ram_ptr(mr) + mr_offset; + ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; - kvm_slots_lock(kml); + kvm_slots_lock(); if (!add) { do { @@ -1153,7 +1378,25 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, goto out; } if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { - kvm_physical_sync_dirty_bitmap(kml, section); + /* + * NOTE: We should be aware of the fact that here we're only + * doing a best effort to sync dirty bits. No matter whether + * we're using dirty log or dirty ring, we ignored two facts: + * + * (1) dirty bits can reside in hardware buffers (PML) + * + * (2) after we collected dirty bits here, pages can be dirtied + * again before we do the final KVM_SET_USER_MEMORY_REGION to + * remove the slot. + * + * Not easy. Let's cross the fingers until it's fixed. + */ + if (kvm_state->kvm_dirty_ring_size) { + kvm_dirty_ring_reap_locked(kvm_state); + } else { + kvm_slot_get_dirty_log(kvm_state, mem); + } + kvm_slot_sync_dirty_pages(mem); } /* unregister the slot */ @@ -1177,18 +1420,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, do { slot_size = MIN(kvm_max_slot_size, size); mem = kvm_alloc_slot(kml); + mem->as_id = kml->as_id; mem->memory_size = slot_size; mem->start_addr = start_addr; + mem->ram_start_offset = ram_start_offset; mem->ram = ram; mem->flags = kvm_mem_flags(mr); - - if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { - /* - * Reallocate the bmap; it means it doesn't disappear in - * middle of a migrate. - */ - kvm_memslot_init_dirty_bitmap(mem); - } + kvm_slot_init_dirty_bitmap(mem); err = kvm_set_user_memory_region(kml, mem, true); if (err) { fprintf(stderr, "%s: error registering slot: %s\n", __func__, @@ -1196,12 +1434,58 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, abort(); } start_addr += slot_size; + ram_start_offset += slot_size; ram += slot_size; size -= slot_size; } while (size); out: - kvm_slots_unlock(kml); + kvm_slots_unlock(); +} + +static void *kvm_dirty_ring_reaper_thread(void *data) +{ + KVMState *s = data; + struct KVMDirtyRingReaper *r = &s->reaper; + + rcu_register_thread(); + + trace_kvm_dirty_ring_reaper("init"); + + while (true) { + r->reaper_state = KVM_DIRTY_RING_REAPER_WAIT; + trace_kvm_dirty_ring_reaper("wait"); + /* + * TODO: provide a smarter timeout rather than a constant? + */ + sleep(1); + + trace_kvm_dirty_ring_reaper("wakeup"); + r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING; + + qemu_mutex_lock_iothread(); + kvm_dirty_ring_reap(s); + qemu_mutex_unlock_iothread(); + + r->reaper_iteration++; + } + + trace_kvm_dirty_ring_reaper("exit"); + + rcu_unregister_thread(); + + return NULL; +} + +static int kvm_dirty_ring_reaper_init(KVMState *s) +{ + struct KVMDirtyRingReaper *r = &s->reaper; + + qemu_thread_create(&r->reaper_thr, "kvm-reaper", + kvm_dirty_ring_reaper_thread, + s, QEMU_THREAD_JOINABLE); + + return 0; } static void kvm_region_add(MemoryListener *listener, @@ -1226,14 +1510,40 @@ static void kvm_log_sync(MemoryListener *listener, MemoryRegionSection *section) { KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); - int r; - kvm_slots_lock(kml); - r = kvm_physical_sync_dirty_bitmap(kml, section); - kvm_slots_unlock(kml); - if (r < 0) { - abort(); + kvm_slots_lock(); + kvm_physical_sync_dirty_bitmap(kml, section); + kvm_slots_unlock(); +} + +static void kvm_log_sync_global(MemoryListener *l) +{ + KVMMemoryListener *kml = container_of(l, KVMMemoryListener, listener); + KVMState *s = kvm_state; + KVMSlot *mem; + int i; + + /* Flush all kernel dirty addresses into KVMSlot dirty bitmap */ + kvm_dirty_ring_flush(); + + /* + * TODO: make this faster when nr_slots is big while there are + * only a few used slots (small VMs). + */ + kvm_slots_lock(); + for (i = 0; i < s->nr_slots; i++) { + mem = &kml->slots[i]; + if (mem->memory_size && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + kvm_slot_sync_dirty_pages(mem); + /* + * This is not needed by KVM_GET_DIRTY_LOG because the + * ioctl will unconditionally overwrite the whole region. + * However kvm dirty ring has no such side effect. + */ + kvm_slot_reset_dirty_pages(mem); + } } + kvm_slots_unlock(); } static void kvm_log_clear(MemoryListener *listener, @@ -1326,11 +1636,10 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener, } void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, - AddressSpace *as, int as_id) + AddressSpace *as, int as_id, const char *name) { int i; - qemu_mutex_init(&kml->slots_lock); kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot)); kml->as_id = as_id; @@ -1342,9 +1651,15 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, kml->listener.region_del = kvm_region_del; kml->listener.log_start = kvm_log_start; kml->listener.log_stop = kvm_log_stop; - kml->listener.log_sync = kvm_log_sync; - kml->listener.log_clear = kvm_log_clear; kml->listener.priority = 10; + kml->listener.name = name; + + if (s->kvm_dirty_ring_size) { + kml->listener.log_sync_global = kvm_log_sync_global; + } else { + kml->listener.log_sync = kvm_log_sync; + kml->listener.log_clear = kvm_log_clear; + } memory_listener_register(&kml->listener, as); @@ -1358,6 +1673,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, } static MemoryListener kvm_io_listener = { + .name = "kvm-io", .eventfd_add = kvm_io_ioeventfd_add, .eventfd_del = kvm_io_ioeventfd_del, .priority = 10, @@ -1982,6 +2298,11 @@ bool kvm_vcpu_id_is_valid(int vcpu_id) return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s); } +bool kvm_dirty_ring_enabled(void) +{ + return kvm_state->kvm_dirty_ring_size ? true : false; +} + static int kvm_init(MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); @@ -2003,6 +2324,8 @@ static int kvm_init(MachineState *ms) int type = 0; uint64_t dirty_log_manual_caps; + qemu_mutex_init(&kml_slots_lock); + s = KVM_STATE(ms->accelerator); /* @@ -2019,7 +2342,6 @@ static int kvm_init(MachineState *ms) QTAILQ_INIT(&s->kvm_sw_breakpoints); #endif QLIST_INIT(&s->kvm_parked_vcpus); - s->vmfd = -1; s->fd = qemu_open_old("/dev/kvm", O_RDWR); if (s->fd == -1) { fprintf(stderr, "Could not access KVM kernel module: %m\n"); @@ -2085,6 +2407,12 @@ static int kvm_init(MachineState *ms) "- for kernels supporting the vm.allocate_pgste sysctl, " "whether it is enabled\n"); } +#elif defined(TARGET_PPC) + if (ret == -EINVAL) { + fprintf(stderr, + "PPC KVM module is not loaded. Try modprobe kvm_%s.\n", + (type == 2) ? "pr" : "hv"); + } #endif goto err; } @@ -2127,20 +2455,70 @@ static int kvm_init(MachineState *ms) s->coalesced_pio = s->coalesced_mmio && kvm_check_extension(s, KVM_CAP_COALESCED_PIO); - dirty_log_manual_caps = - kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); - s->manual_dirty_log_protect = dirty_log_manual_caps; - if (dirty_log_manual_caps) { - ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, - dirty_log_manual_caps); - if (ret) { - warn_report("Trying to enable capability %"PRIu64" of " - "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. " - "Falling back to the legacy mode. ", - dirty_log_manual_caps); - s->manual_dirty_log_protect = 0; + /* + * Enable KVM dirty ring if supported, otherwise fall back to + * dirty logging mode + */ + if (s->kvm_dirty_ring_size > 0) { + uint64_t ring_bytes; + + ring_bytes = s->kvm_dirty_ring_size * sizeof(struct kvm_dirty_gfn); + + /* Read the max supported pages */ + ret = kvm_vm_check_extension(s, KVM_CAP_DIRTY_LOG_RING); + if (ret > 0) { + if (ring_bytes > ret) { + error_report("KVM dirty ring size %" PRIu32 " too big " + "(maximum is %ld). Please use a smaller value.", + s->kvm_dirty_ring_size, + (long)ret / sizeof(struct kvm_dirty_gfn)); + ret = -EINVAL; + goto err; + } + + ret = kvm_vm_enable_cap(s, KVM_CAP_DIRTY_LOG_RING, 0, ring_bytes); + if (ret) { + error_report("Enabling of KVM dirty ring failed: %s. " + "Suggested minimum value is 1024.", strerror(-ret)); + goto err; + } + + s->kvm_dirty_ring_bytes = ring_bytes; + } else { + warn_report("KVM dirty ring not available, using bitmap method"); + s->kvm_dirty_ring_size = 0; + } + } + + /* + * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is + * enabled. More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no + * page is wr-protected initially, which is against how kvm dirty ring is + * usage - kvm dirty ring requires all pages are wr-protected at the very + * beginning. Enabling this feature for dirty ring causes data corruption. + * + * TODO: Without KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and kvm clear dirty log, + * we may expect a higher stall time when starting the migration. In the + * future we can enable KVM_CLEAR_DIRTY_LOG to work with dirty ring too: + * instead of clearing dirty bit, it can be a way to explicitly wr-protect + * guest pages. + */ + if (!s->kvm_dirty_ring_size) { + dirty_log_manual_caps = + kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + s->manual_dirty_log_protect = dirty_log_manual_caps; + if (dirty_log_manual_caps) { + ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, + dirty_log_manual_caps); + if (ret) { + warn_report("Trying to enable capability %"PRIu64" of " + "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. " + "Falling back to the legacy mode. ", + dirty_log_manual_caps); + s->manual_dirty_log_protect = 0; + } } } @@ -2211,7 +2589,7 @@ static int kvm_init(MachineState *ms) s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region; kvm_memory_listener_register(s, &s->memory_listener, - &address_space_memory, 0); + &address_space_memory, 0, "kvm-memory"); if (kvm_eventfds_allowed) { memory_listener_register(&kvm_io_listener, &address_space_io); @@ -2226,6 +2604,14 @@ static int kvm_init(MachineState *ms) ret = ram_block_discard_disable(true); assert(!ret); } + + if (s->kvm_dirty_ring_size) { + ret = kvm_dirty_ring_reaper_init(s); + if (ret) { + goto err; + } + } + return 0; err: @@ -2269,7 +2655,7 @@ static int kvm_handle_internal_error(CPUState *cpu, struct kvm_run *run) int i; for (i = 0; i < run->internal.ndata; ++i) { - fprintf(stderr, "extra data[%d]: %"PRIx64"\n", + fprintf(stderr, "extra data[%d]: 0x%016"PRIx64"\n", i, (uint64_t)run->internal.data[i]); } } @@ -2538,6 +2924,17 @@ int kvm_cpu_exec(CPUState *cpu) case KVM_EXIT_INTERNAL_ERROR: ret = kvm_handle_internal_error(cpu, run); break; + case KVM_EXIT_DIRTY_RING_FULL: + /* + * We shouldn't continue if the dirty ring of this vcpu is + * still full. Got kicked by KVM_RESET_DIRTY_RINGS. + */ + trace_kvm_dirty_ring_full(cpu->cpu_index); + qemu_mutex_lock_iothread(); + kvm_dirty_ring_reap(kvm_state); + qemu_mutex_unlock_iothread(); + ret = 0; + break; case KVM_EXIT_SYSTEM_EVENT: switch (run->system_event.type) { case KVM_SYSTEM_EVENT_SHUTDOWN: @@ -3114,6 +3511,11 @@ static void kvm_set_kvm_shadow_mem(Object *obj, Visitor *v, KVMState *s = KVM_STATE(obj); int64_t value; + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + if (!visit_type_int(v, name, &value, errp)) { return; } @@ -3128,6 +3530,11 @@ static void kvm_set_kernel_irqchip(Object *obj, Visitor *v, KVMState *s = KVM_STATE(obj); OnOffSplit mode; + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + if (!visit_type_OnOffSplit(v, name, &mode, errp)) { return; } @@ -3170,13 +3577,53 @@ bool kvm_kernel_irqchip_split(void) return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON; } +static void kvm_get_dirty_ring_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint32_t value = s->kvm_dirty_ring_size; + + visit_type_uint32(v, name, &value, errp); +} + +static void kvm_set_dirty_ring_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + uint32_t value; + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + visit_type_uint32(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + if (value & (value - 1)) { + error_setg(errp, "dirty-ring-size must be a power of two."); + return; + } + + s->kvm_dirty_ring_size = value; +} + static void kvm_accel_instance_init(Object *obj) { KVMState *s = KVM_STATE(obj); + s->fd = -1; + s->vmfd = -1; s->kvm_shadow_mem = -1; s->kernel_irqchip_allowed = true; s->kernel_irqchip_split = ON_OFF_AUTO_AUTO; + /* KVM dirty ring is by default off */ + s->kvm_dirty_ring_size = 0; } static void kvm_accel_class_init(ObjectClass *oc, void *data) @@ -3198,6 +3645,12 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, "kvm-shadow-mem", "KVM shadow MMU size"); + + object_class_property_add(oc, "dirty-ring-size", "uint32", + kvm_get_dirty_ring_size, kvm_set_dirty_ring_size, + NULL, NULL); + object_class_property_set_description(oc, "dirty-ring-size", + "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)"); } static const TypeInfo kvm_accel_type = { diff --git a/accel/kvm/meson.build b/accel/kvm/meson.build index 8d219bea507..397a1fe1fd1 100644 --- a/accel/kvm/meson.build +++ b/accel/kvm/meson.build @@ -3,6 +3,5 @@ kvm_ss.add(files( 'kvm-all.c', 'kvm-accel-ops.c', )) -kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) specific_ss.add_all(when: 'CONFIG_KVM', if_true: kvm_ss) diff --git a/accel/kvm/sev-stub.c b/accel/kvm/sev-stub.c deleted file mode 100644 index 9587d1b2a31..00000000000 --- a/accel/kvm/sev-stub.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * QEMU SEV stub - * - * Copyright Advanced Micro Devices 2018 - * - * Authors: - * Brijesh Singh - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "sysemu/sev.h" - -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - /* If we get here, cgs must be some non-SEV thing */ - return 0; -} diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index e15ae8980d3..399aaeb0ec7 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" @@ -18,4 +18,11 @@ kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 kvm_resample_fd_notify(int gsi) "gsi %d" +kvm_dirty_ring_full(int id) "vcpu %d" +kvm_dirty_ring_reap_vcpu(int id) "vcpu %d" +kvm_dirty_ring_page(int vcpu, uint32_t slot, uint64_t offset) "vcpu %d fetch %"PRIu32" offset 0x%"PRIx64 +kvm_dirty_ring_reaper(const char *s) "%s" +kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" +kvm_dirty_ring_reaper_kick(const char *reason) "%s" +kvm_dirty_ring_flush(int finished) "%d" diff --git a/accel/meson.build b/accel/meson.build index b44ba30c864..dfd808d2c8e 100644 --- a/accel/meson.build +++ b/accel/meson.build @@ -2,6 +2,7 @@ specific_ss.add(files('accel-common.c')) softmmu_ss.add(files('accel-softmmu.c')) user_ss.add(files('accel-user.c')) +subdir('hvf') subdir('qtest') subdir('kvm') subdir('tcg') diff --git a/accel/qtest/meson.build b/accel/qtest/meson.build index a2f32764598..4c656002933 100644 --- a/accel/qtest/meson.build +++ b/accel/qtest/meson.build @@ -1,6 +1,2 @@ -qtest_ss = ss.source_set() -qtest_ss.add(files( - 'qtest.c', -)) - -specific_ss.add_all(when: ['CONFIG_SOFTMMU', 'CONFIG_POSIX'], if_true: qtest_ss) +qtest_module_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_POSIX'], + if_true: files('qtest.c')) diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c index edb29f6fa4c..7e6b8110d52 100644 --- a/accel/qtest/qtest.c +++ b/accel/qtest/qtest.c @@ -45,6 +45,7 @@ static const TypeInfo qtest_accel_type = { .parent = TYPE_ACCEL, .class_init = qtest_accel_class_init, }; +module_obj(TYPE_QTEST_ACCEL); static void qtest_accel_ops_class_init(ObjectClass *oc, void *data) { @@ -61,6 +62,7 @@ static const TypeInfo qtest_accel_ops_type = { .class_init = qtest_accel_ops_class_init, .abstract = true, }; +module_obj(ACCEL_OPS_NAME("qtest")); static void qtest_type_init(void) { diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 0f17acfac0f..5319573e003 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -11,7 +11,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "sysemu/kvm.h" #ifndef CONFIG_USER_ONLY @@ -148,4 +147,9 @@ bool kvm_arm_supports_user_irq(void) { return false; } + +bool kvm_dirty_ring_enabled(void) +{ + return false; +} #endif diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c index 2304606f8e0..d8162673ae8 100644 --- a/accel/stubs/tcg-stub.c +++ b/accel/stubs/tcg-stub.c @@ -11,7 +11,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "exec/exec-all.h" void tb_flush(CPUState *cpu) diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc index 344525b0bb3..1df1f243e91 100644 --- a/accel/tcg/atomic_common.c.inc +++ b/accel/tcg/atomic_common.c.inc @@ -13,42 +13,112 @@ * See the COPYING file in the top-level directory. */ -static inline -void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr, uint16_t info) +static void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr, + MemOpIdx oi) { CPUState *cpu = env_cpu(env); - trace_guest_mem_before_exec(cpu, addr, info); - trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST); + trace_guest_rmw_before_exec(cpu, addr, oi); } -static inline void -atomic_trace_rmw_post(CPUArchState *env, target_ulong addr, uint16_t info) +static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr, + MemOpIdx oi) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info); - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info | TRACE_MEM_ST); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW); } -static inline -void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr, uint16_t info) +#if HAVE_ATOMIC128 +static void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr, + MemOpIdx oi) { - trace_guest_mem_before_exec(env_cpu(env), addr, info); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); } -static inline -void atomic_trace_ld_post(CPUArchState *env, target_ulong addr, uint16_t info) +static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr, + MemOpIdx oi) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); } -static inline -void atomic_trace_st_pre(CPUArchState *env, target_ulong addr, uint16_t info) +static void atomic_trace_st_pre(CPUArchState *env, target_ulong addr, + MemOpIdx oi) { - trace_guest_mem_before_exec(env_cpu(env), addr, info); + trace_guest_st_before_exec(env_cpu(env), addr, oi); } -static inline -void atomic_trace_st_post(CPUArchState *env, target_ulong addr, uint16_t info) +static void atomic_trace_st_post(CPUArchState *env, target_ulong addr, + MemOpIdx oi) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } +#endif + +/* + * Atomic helpers callable from TCG. + * These have a common interface and all defer to cpu_atomic_* + * using the host return address from GETPC(). + */ + +#define CMPXCHG_HELPER(OP, TYPE) \ + TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr, \ + TYPE oldv, TYPE newv, uint32_t oi) \ + { return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); } + +CMPXCHG_HELPER(cmpxchgb, uint32_t) +CMPXCHG_HELPER(cmpxchgw_be, uint32_t) +CMPXCHG_HELPER(cmpxchgw_le, uint32_t) +CMPXCHG_HELPER(cmpxchgl_be, uint32_t) +CMPXCHG_HELPER(cmpxchgl_le, uint32_t) + +#ifdef CONFIG_ATOMIC64 +CMPXCHG_HELPER(cmpxchgq_be, uint64_t) +CMPXCHG_HELPER(cmpxchgq_le, uint64_t) +#endif + +#undef CMPXCHG_HELPER + +#define ATOMIC_HELPER(OP, TYPE) \ + TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \ + TYPE val, uint32_t oi) \ + { return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); } + +#ifdef CONFIG_ATOMIC64 +#define GEN_ATOMIC_HELPERS(OP) \ + ATOMIC_HELPER(glue(OP,b), uint32_t) \ + ATOMIC_HELPER(glue(OP,w_be), uint32_t) \ + ATOMIC_HELPER(glue(OP,w_le), uint32_t) \ + ATOMIC_HELPER(glue(OP,l_be), uint32_t) \ + ATOMIC_HELPER(glue(OP,l_le), uint32_t) \ + ATOMIC_HELPER(glue(OP,q_be), uint64_t) \ + ATOMIC_HELPER(glue(OP,q_le), uint64_t) +#else +#define GEN_ATOMIC_HELPERS(OP) \ + ATOMIC_HELPER(glue(OP,b), uint32_t) \ + ATOMIC_HELPER(glue(OP,w_be), uint32_t) \ + ATOMIC_HELPER(glue(OP,w_le), uint32_t) \ + ATOMIC_HELPER(glue(OP,l_be), uint32_t) \ + ATOMIC_HELPER(glue(OP,l_le), uint32_t) +#endif + +GEN_ATOMIC_HELPERS(fetch_add) +GEN_ATOMIC_HELPERS(fetch_and) +GEN_ATOMIC_HELPERS(fetch_or) +GEN_ATOMIC_HELPERS(fetch_xor) +GEN_ATOMIC_HELPERS(fetch_smin) +GEN_ATOMIC_HELPERS(fetch_umin) +GEN_ATOMIC_HELPERS(fetch_smax) +GEN_ATOMIC_HELPERS(fetch_umax) + +GEN_ATOMIC_HELPERS(add_fetch) +GEN_ATOMIC_HELPERS(and_fetch) +GEN_ATOMIC_HELPERS(or_fetch) +GEN_ATOMIC_HELPERS(xor_fetch) +GEN_ATOMIC_HELPERS(smin_fetch) +GEN_ATOMIC_HELPERS(umin_fetch) +GEN_ATOMIC_HELPERS(smax_fetch) +GEN_ATOMIC_HELPERS(umax_fetch) + +GEN_ATOMIC_HELPERS(xchg) + +#undef ATOMIC_HELPER +#undef GEN_ATOMIC_HELPERS diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index 0ff7f913e1f..2d917b6b1fe 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -19,7 +19,6 @@ */ #include "qemu/plugin.h" -#include "trace/mem.h" #if DATA_SIZE == 16 # define SUFFIX o @@ -28,8 +27,8 @@ # define SHIFT 4 #elif DATA_SIZE == 8 # define SUFFIX q -# define DATA_TYPE uint64_t -# define SDATA_TYPE int64_t +# define DATA_TYPE aligned_uint64_t +# define SDATA_TYPE aligned_int64_t # define BSWAP bswap64 # define SHIFT 3 #elif DATA_SIZE == 4 @@ -71,85 +70,78 @@ #endif ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, - ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) + ABI_TYPE cmpv, ABI_TYPE newv, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_READ | PAGE_WRITE, retaddr); DATA_TYPE ret; - uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, - ATOMIC_MMU_IDX); - atomic_trace_rmw_pre(env, addr, info); + atomic_trace_rmw_pre(env, addr, oi); #if DATA_SIZE == 16 ret = atomic16_cmpxchg(haddr, cmpv, newv); #else ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv); #endif ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, info); + atomic_trace_rmw_post(env, addr, oi); return ret; } #if DATA_SIZE >= 16 #if HAVE_ATOMIC128 -ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) +ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; - uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, - ATOMIC_MMU_IDX); + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_READ, retaddr); + DATA_TYPE val; - atomic_trace_ld_pre(env, addr, info); + atomic_trace_ld_pre(env, addr, oi); val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; - atomic_trace_ld_post(env, addr, info); + atomic_trace_ld_post(env, addr, oi); return val; } -void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, - ABI_TYPE val EXTRA_ARGS) +void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - uint16_t info = trace_mem_build_info(SHIFT, false, 0, true, - ATOMIC_MMU_IDX); + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_WRITE, retaddr); - atomic_trace_st_pre(env, addr, info); + atomic_trace_st_pre(env, addr, oi); atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; - atomic_trace_st_post(env, addr, info); + atomic_trace_st_post(env, addr, oi); } #endif #else -ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, - ABI_TYPE val EXTRA_ARGS) +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_READ | PAGE_WRITE, retaddr); DATA_TYPE ret; - uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, - ATOMIC_MMU_IDX); - atomic_trace_rmw_pre(env, addr, info); + atomic_trace_rmw_pre(env, addr, oi); ret = qatomic_xchg__nocheck(haddr, val); ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, info); + atomic_trace_rmw_post(env, addr, oi); return ret; } #define GEN_ATOMIC_HELPER(X) \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ - ABI_TYPE val EXTRA_ARGS) \ + ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \ { \ - ATOMIC_MMU_DECLS; \ - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ + PAGE_READ | PAGE_WRITE, retaddr); \ DATA_TYPE ret; \ - uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \ - ATOMIC_MMU_IDX); \ - atomic_trace_rmw_pre(env, addr, info); \ + atomic_trace_rmw_pre(env, addr, oi); \ ret = qatomic_##X(haddr, val); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, info); \ + atomic_trace_rmw_post(env, addr, oi); \ return ret; \ } @@ -164,7 +156,8 @@ GEN_ATOMIC_HELPER(xor_fetch) #undef GEN_ATOMIC_HELPER -/* These helpers are, as a whole, full barriers. Within the helper, +/* + * These helpers are, as a whole, full barriers. Within the helper, * the leading barrier is explicit and the trailing barrier is within * cmpxchg primitive. * @@ -173,14 +166,12 @@ GEN_ATOMIC_HELPER(xor_fetch) */ #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ - ABI_TYPE xval EXTRA_ARGS) \ + ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \ { \ - ATOMIC_MMU_DECLS; \ - XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ + PAGE_READ | PAGE_WRITE, retaddr); \ XDATA_TYPE cmp, old, new, val = xval; \ - uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \ - ATOMIC_MMU_IDX); \ - atomic_trace_rmw_pre(env, addr, info); \ + atomic_trace_rmw_pre(env, addr, oi); \ smp_mb(); \ cmp = qatomic_read__nocheck(haddr); \ do { \ @@ -188,7 +179,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ cmp = qatomic_cmpxchg__nocheck(haddr, old, new); \ } while (cmp != old); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, info); \ + atomic_trace_rmw_post(env, addr, oi); \ return RET; \ } @@ -218,87 +209,79 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new) #endif ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, - ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) + ABI_TYPE cmpv, ABI_TYPE newv, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_READ | PAGE_WRITE, retaddr); DATA_TYPE ret; - uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, - ATOMIC_MMU_IDX); - atomic_trace_rmw_pre(env, addr, info); + atomic_trace_rmw_pre(env, addr, oi); #if DATA_SIZE == 16 ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); #else ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); #endif ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, info); + atomic_trace_rmw_post(env, addr, oi); return BSWAP(ret); } #if DATA_SIZE >= 16 #if HAVE_ATOMIC128 -ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) +ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; - uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, - ATOMIC_MMU_IDX); + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_READ, retaddr); + DATA_TYPE val; - atomic_trace_ld_pre(env, addr, info); + atomic_trace_ld_pre(env, addr, oi); val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; - atomic_trace_ld_post(env, addr, info); + atomic_trace_ld_post(env, addr, oi); return BSWAP(val); } -void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, - ABI_TYPE val EXTRA_ARGS) +void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, true, - ATOMIC_MMU_IDX); + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_WRITE, retaddr); - val = BSWAP(val); - atomic_trace_st_pre(env, addr, info); + atomic_trace_st_pre(env, addr, oi); val = BSWAP(val); atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; - atomic_trace_st_post(env, addr, info); + atomic_trace_st_post(env, addr, oi); } #endif #else -ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, - ABI_TYPE val EXTRA_ARGS) +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val, + MemOpIdx oi, uintptr_t retaddr) { - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, + PAGE_READ | PAGE_WRITE, retaddr); ABI_TYPE ret; - uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, - ATOMIC_MMU_IDX); - atomic_trace_rmw_pre(env, addr, info); + atomic_trace_rmw_pre(env, addr, oi); ret = qatomic_xchg__nocheck(haddr, BSWAP(val)); ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, info); + atomic_trace_rmw_post(env, addr, oi); return BSWAP(ret); } #define GEN_ATOMIC_HELPER(X) \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ - ABI_TYPE val EXTRA_ARGS) \ + ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \ { \ - ATOMIC_MMU_DECLS; \ - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ + PAGE_READ | PAGE_WRITE, retaddr); \ DATA_TYPE ret; \ - uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \ - false, ATOMIC_MMU_IDX); \ - atomic_trace_rmw_pre(env, addr, info); \ + atomic_trace_rmw_pre(env, addr, oi); \ ret = qatomic_##X(haddr, BSWAP(val)); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, info); \ + atomic_trace_rmw_post(env, addr, oi); \ return BSWAP(ret); \ } @@ -320,14 +303,12 @@ GEN_ATOMIC_HELPER(xor_fetch) */ #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ - ABI_TYPE xval EXTRA_ARGS) \ + ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \ { \ - ATOMIC_MMU_DECLS; \ - XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \ + PAGE_READ | PAGE_WRITE, retaddr); \ XDATA_TYPE ldo, ldn, old, new, val = xval; \ - uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \ - false, ATOMIC_MMU_IDX); \ - atomic_trace_rmw_pre(env, addr, info); \ + atomic_trace_rmw_pre(env, addr, oi); \ smp_mb(); \ ldn = qatomic_read__nocheck(haddr); \ do { \ @@ -335,7 +316,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \ } while (ldo != ldn); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, info); \ + atomic_trace_rmw_post(env, addr, oi); \ return RET; \ } diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c index 12c1e3e9744..be6fe45aa5a 100644 --- a/accel/tcg/cpu-exec-common.c +++ b/accel/tcg/cpu-exec-common.c @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "sysemu/cpus.h" #include "sysemu/tcg.h" #include "exec/exec-all.h" diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 78da27ffaf9..6405c12a634 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -20,7 +20,9 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/qemu-print.h" -#include "cpu.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/type-helpers.h" #include "hw/core/tcg-cpu-ops.h" #include "trace.h" #include "disas/disas.h" @@ -30,8 +32,6 @@ #include "qemu/compiler.h" #include "qemu/timer.h" #include "qemu/rcu.h" -#include "exec/tb-hash.h" -#include "exec/tb-lookup.h" #include "exec/log.h" #include "exec/log_instr.h" #include "qemu/main-loop.h" @@ -42,6 +42,10 @@ #include "exec/cpu-all.h" #include "sysemu/cpu-timers.h" #include "sysemu/replay.h" +#include "sysemu/tcg.h" +#include "exec/helper-proto.h" +#include "tb-hash.h" +#include "tb-context.h" #include "internal.h" /* -icount align implementation. */ @@ -146,6 +150,202 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu) } #endif /* CONFIG USER ONLY */ +uint32_t curr_cflags(CPUState *cpu) +{ + uint32_t cflags = cpu->tcg_cflags; + + /* + * Record gdb single-step. We should be exiting the TB by raising + * EXCP_DEBUG, but to simplify other tests, disable chaining too. + * + * For singlestep and -d nochain, suppress goto_tb so that + * we can log -d cpu,exec after every TB. + */ + if (unlikely(cpu->singlestep_enabled)) { + cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1; + } else if (singlestep) { + cflags |= CF_NO_GOTO_TB | 1; + } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + cflags |= CF_NO_GOTO_TB; + } + + return cflags; +} + +/* Might cause an exception, so have a longjmp destination ready */ +static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, + target_ulong pcc_base, + target_ulong pcc_top, + uint32_t cheri_flags, + uint32_t flags, uint32_t cflags) +{ + TranslationBlock *tb; + uint32_t hash; + + /* we should never be trying to look up an INVALID tb */ + tcg_debug_assert(!(cflags & CF_INVALID)); + + hash = tb_jmp_cache_hash_func(pc); + tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); + + if (likely(tb && + tb->pc == pc && + tb->cs_base == cs_base && + tb->pcc_base == pcc_base && + tb->pcc_top == pcc_top && + tb->cheri_flags == cheri_flags && + tb->flags == flags && + tb->trace_vcpu_dstate == *cpu->trace_dstate && + tb_cflags(tb) == cflags)) { + return tb; + } + tb = tb_htable_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, + flags, cflags); + if (tb == NULL) { + return NULL; + } + qatomic_set(&cpu->tb_jmp_cache[hash], tb); + return tb; +} + +static inline void log_cpu_exec(target_ulong pc, CPUState *cpu, + const TranslationBlock *tb) +{ + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) + && qemu_log_in_addr_range(pc)) { + + qemu_log_mask(CPU_LOG_EXEC, + "Trace %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx + "/" TARGET_FMT_lx "-" TARGET_FMT_lx + "/%08x/%08x/%08x] %s\n", + cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc, + tb->pcc_base, tb->pcc_top, tb->cheri_flags, + tb->flags, tb->cflags, lookup_symbol(pc)); + +#if defined(DEBUG_DISAS) + if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) { + FILE *logfile = qemu_log_lock(); + int flags = 0; + + if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) { + flags |= CPU_DUMP_FPU; + } +#if defined(TARGET_I386) + flags |= CPU_DUMP_CCOP; +#endif + log_cpu_state(cpu, flags); + qemu_log_unlock(logfile); + } +#endif /* DEBUG_DISAS */ + } +} + +static bool check_for_breakpoints(CPUState *cpu, target_ulong pc, + uint32_t *cflags) +{ + CPUBreakpoint *bp; + bool match_page = false; + + if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) { + return false; + } + + /* + * Singlestep overrides breakpoints. + * This requirement is visible in the record-replay tests, where + * we would fail to make forward progress in reverse-continue. + * + * TODO: gdb singlestep should only override gdb breakpoints, + * so that one could (gdb) singlestep into the guest kernel's + * architectural breakpoint handler. + */ + if (cpu->singlestep_enabled) { + return false; + } + + QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { + /* + * If we have an exact pc match, trigger the breakpoint. + * Otherwise, note matches within the page. + */ + if (pc == bp->pc) { + bool match_bp = false; + + if (bp->flags & BP_GDB) { + match_bp = true; + } else if (bp->flags & BP_CPU) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + CPUClass *cc = CPU_GET_CLASS(cpu); + assert(cc->tcg_ops->debug_check_breakpoint); + match_bp = cc->tcg_ops->debug_check_breakpoint(cpu); +#endif + } + + if (match_bp) { + cpu->exception_index = EXCP_DEBUG; + return true; + } + } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) { + match_page = true; + } + } + + /* + * Within the same page as a breakpoint, single-step, + * returning to helper_lookup_tb_ptr after each insn looking + * for the actual breakpoint. + * + * TODO: Perhaps better to record all of the TBs associated + * with a given virtual page that contains a breakpoint, and + * then invalidate them when a new overlapping breakpoint is + * set on the page. Non-overlapping TBs would not be + * invalidated, nor would any TB need to be invalidated as + * breakpoints are removed. + */ + if (match_page) { + *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1; + } + return false; +} + +/** + * helper_lookup_tb_ptr: quick check for next tb + * @env: current cpu state + * + * Look for an existing TB matching the current cpu state. + * If found, return the code pointer. If not found, return + * the tcg epilogue so that we return into cpu_tb_exec. + */ +const void *HELPER(lookup_tb_ptr)(CPUArchState *env) +{ + CPUState *cpu = env_cpu(env); + TranslationBlock *tb; + target_ulong cs_base, pcc_base = 0, pcc_top = 0, pc; + uint32_t cheri_flags = 0; + uint32_t flags, cflags; + + cpu_get_tb_cpu_state_ext(env, &pc, &cs_base, &pcc_base, &pcc_top, + &cheri_flags, &flags); + + cflags = curr_cflags(cpu); + if (check_for_breakpoints(cpu, pc, &cflags)) { + cpu_loop_exit(cpu); + } + + tb = tb_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, flags, + cflags); + if (tb == NULL) { + return tcg_code_gen_epilogue; + } + + log_cpu_exec(pc, cpu, tb); + + return tb->tc.ptr; +} + /* Execute a TB, and fix up the CPU state afterwards if necessary */ /* * Disable CFI checks. @@ -164,28 +364,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) TranslationBlock *last_tb; const void *tb_ptr = itb->tc.ptr; - qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc, - "Trace %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx - "/" TARGET_FMT_lx "/%#x/%#x] %s\n", - cpu->cpu_index, itb->tc.ptr, itb->cs_base, itb->pc, - itb->cs_top, itb->cheri_flags, itb->flags, - lookup_symbol(itb->pc)); - -#if defined(DEBUG_DISAS) - if (qemu_loglevel_mask(CPU_LOG_TB_CPU) - && qemu_log_in_addr_range(itb->pc)) { - FILE *logfile = qemu_log_lock(); - int flags = 0; - if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) { - flags |= CPU_DUMP_FPU; - } -#if defined(TARGET_I386) - flags |= CPU_DUMP_CCOP; -#endif - log_cpu_state(cpu, flags); - qemu_log_unlock(logfile); - } -#endif /* DEBUG_DISAS */ + log_cpu_exec(itb->pc, cpu, itb); qemu_thread_jit_execute(); ret = tcg_qemu_tb_exec(env, tb_ptr); @@ -221,6 +400,17 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) cc->set_pc(cpu, last_tb->pc); } } + + /* + * If gdb single-step, and we haven't raised another exception, + * raise a debug exception. Single-step with another exception + * is handled in cpu_handle_exception. + */ + if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) { + cpu->exception_index = EXCP_DEBUG; + cpu_loop_exit(cpu); + } + return last_tb; } @@ -247,10 +437,9 @@ void cpu_exec_step_atomic(CPUState *cpu) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; - target_ulong cs_base, cs_top = 0, pc; + target_ulong cs_base, pcc_base = 0, pcc_top = 0, pc; uint32_t cheri_flags = 0; - uint32_t flags; - uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1; + uint32_t flags, cflags; int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -259,13 +448,27 @@ void cpu_exec_step_atomic(CPUState *cpu) g_assert(!cpu->running); cpu->running = true; - cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags); - tb = tb_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags); + cpu_get_tb_cpu_state_ext(env, &pc, &cs_base, &pcc_base, &pcc_top, + &cheri_flags, &flags); + + cflags = curr_cflags(cpu); + /* Execute in a serial context. */ + cflags &= ~CF_PARALLEL; + /* After 1 insn, return and release the exclusive lock. */ + cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1; + /* + * No need to check_for_breakpoints here. + * We only arrive in cpu_exec_step_atomic after beginning execution + * of an insn that includes an atomic operation we can't handle. + * Any breakpoint for this insn will have been recognized earlier. + */ + tb = tb_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, flags, + cflags); if (tb == NULL) { mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, cs_top, cheri_flags, flags, - cflags); + tb = tb_gen_code(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, + flags, cflags); mmap_unlock(); } @@ -280,6 +483,7 @@ void cpu_exec_step_atomic(CPUState *cpu) * memory. */ #ifndef CONFIG_SOFTMMU + clear_helper_retaddr(); tcg_debug_assert(!have_mmap_lock()); #endif if (qemu_mutex_iothread_locked()) { @@ -289,7 +493,6 @@ void cpu_exec_step_atomic(CPUState *cpu) qemu_plugin_disable_mem_helpers(cpu); } - /* * As we start the exclusive region before codegen we must still * be in the region if we longjump out of either the codegen or @@ -303,7 +506,8 @@ void cpu_exec_step_atomic(CPUState *cpu) struct tb_desc { target_ulong pc; target_ulong cs_base; - target_ulong cs_top; + target_ulong pcc_base; + target_ulong pcc_top; CPUArchState *env; tb_page_addr_t phys_page1; uint32_t cheri_flags; @@ -318,8 +522,9 @@ static bool tb_lookup_cmp(const void *p, const void *d) const struct tb_desc *desc = d; if (tb->pc == desc->pc && tb->page_addr[0] == desc->phys_page1 && - tb->cs_base == desc->cs_base && tb->cs_top == desc->cs_top && - tb->cheri_flags == desc->cheri_flags && tb->flags == desc->flags && + tb->cs_base == desc->cs_base && tb->pcc_base == desc->pcc_base && + tb->pcc_top == desc->pcc_top && tb->cheri_flags == desc->cheri_flags && + tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && tb_cflags(tb) == desc->cflags) { /* check next page if needed */ @@ -340,9 +545,9 @@ static bool tb_lookup_cmp(const void *p, const void *d) } TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, target_ulong cs_top, - uint32_t cheri_flags, uint32_t flags, - uint32_t cflags) + target_ulong cs_base, target_ulong pcc_base, + target_ulong pcc_top, uint32_t cheri_flags, + uint32_t flags, uint32_t cflags) { tb_page_addr_t phys_pc; struct tb_desc desc; @@ -350,7 +555,8 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, desc.env = (CPUArchState *)cpu->env_ptr; desc.cs_base = cs_base; - desc.cs_top = cs_top; + desc.pcc_base = pcc_base; + desc.pcc_top = pcc_top; desc.cheri_flags = cheri_flags; desc.flags = flags; desc.cflags = cflags; @@ -419,46 +625,11 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, return; } -static inline TranslationBlock *tb_find(CPUState *cpu, - TranslationBlock *last_tb, - int tb_exit, uint32_t cflags) -{ - CPUArchState *env = (CPUArchState *)cpu->env_ptr; - TranslationBlock *tb; - target_ulong cs_base, cs_top = 0, pc; - uint32_t cheri_flags = 0; - uint32_t flags; - - cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags); - - tb = tb_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags); - if (tb == NULL) { - mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags); - mmap_unlock(); - /* We add the TB in the virtual pc hash table for the fast lookup */ - qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); - } -#ifndef CONFIG_USER_ONLY - /* We don't take care of direct jumps when address mapping changes in - * system emulation. So it's not safe to make a direct jump to a TB - * spanning two pages because the mapping for the second page can change. - */ - if (tb->page_addr[1] != -1) { - last_tb = NULL; - } -#endif - /* See if we can patch the calling TB. */ - if (last_tb) { - tb_add_jump(last_tb, tb_exit, tb); - } - return tb; -} - static inline bool cpu_handle_halt(CPUState *cpu) { +#ifndef CONFIG_USER_ONLY if (cpu->halted) { -#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) +#if defined(TARGET_I386) if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { X86CPU *x86_cpu = X86_CPU(cpu); qemu_mutex_lock_iothread(); @@ -466,13 +637,14 @@ static inline bool cpu_handle_halt(CPUState *cpu) cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); qemu_mutex_unlock_iothread(); } -#endif +#endif /* TARGET_I386 */ if (!cpu_has_work(cpu)) { return true; } cpu->halted = 0; } +#endif /* !CONFIG_USER_ONLY */ return false; } @@ -520,8 +692,8 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) loop */ #if defined(TARGET_I386) CPUClass *cc = CPU_GET_CLASS(cpu); - cc->tcg_ops->do_interrupt(cpu); -#endif + cc->tcg_ops->fake_user_interrupt(cpu); +#endif /* TARGET_I386 */ *ret = cpu->exception_index; cpu->exception_index = -1; return true; @@ -554,6 +726,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) return false; } +#ifndef CONFIG_USER_ONLY /* * CPU_INTERRUPT_POLL is a virtual event which gets converted into a * "real" interrupt event later. It does not need to be recorded for @@ -567,11 +740,19 @@ static inline bool need_replay_interrupt(int interrupt_request) return true; #endif } +#endif /* !CONFIG_USER_ONLY */ static inline bool cpu_handle_interrupt(CPUState *cpu, TranslationBlock **last_tb) { - CPUClass *cc = CPU_GET_CLASS(cpu); + /* + * If we have requested custom cflags with CF_NOIRQ we should + * skip checking here. Any pending interrupts will get picked up + * by the next TB we execute under normal cflags. + */ + if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) { + return false; + } /* Clear the interrupt flag now since we're processing * cpu->interrupt_request and cpu->exit_request. @@ -594,6 +775,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, qemu_mutex_unlock_iothread(); return true; } +#if !defined(CONFIG_USER_ONLY) if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) { /* Do nothing */ } else if (interrupt_request & CPU_INTERRUPT_HALT) { @@ -622,12 +804,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, qemu_mutex_unlock_iothread(); return true; } -#endif +#endif /* !TARGET_I386 */ /* The target hook has 3 exit conditions: False when the interrupt isn't processed, True when it is, and we should restart on a new TB, and via longjmp via cpu_loop_exit. */ else { + CPUClass *cc = CPU_GET_CLASS(cpu); + if (cc->tcg_ops->cpu_exec_interrupt && cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) { if (need_replay_interrupt(interrupt_request)) { @@ -646,6 +830,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, * reload the 'interrupt_request' value */ interrupt_request = cpu->interrupt_request; } +#endif /* !CONFIG_USER_ONLY */ if (interrupt_request & CPU_INTERRUPT_EXITTB) { cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB; /* ensure that no TB jump will be modified as @@ -703,7 +888,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, /* Ensure global icount has gone forward */ icount_update(cpu); /* Refill decrementer and continue execution. */ - insns_left = MIN(CF_COUNT_MASK, cpu->icount_budget); + insns_left = MIN(0xffff, cpu->icount_budget); cpu_neg(cpu)->icount_decr.u16.low = insns_left; cpu->icount_extra = cpu->icount_budget - insns_left; @@ -712,7 +897,9 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, * execute we need to ensure we find/generate a TB with exactly * insns_left instructions in it. */ - if (!cpu->icount_extra && insns_left > 0 && insns_left < tb->icount) { + if (insns_left > 0 && insns_left < tb->icount) { + assert(insns_left <= CF_COUNT_MASK); + assert(cpu->icount_extra == 0); cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left; } #endif @@ -722,7 +909,6 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, int cpu_exec(CPUState *cpu) { - CPUClass *cc = CPU_GET_CLASS(cpu); int ret; SyncClocks sc = { 0 }; @@ -756,22 +942,18 @@ int cpu_exec(CPUState *cpu) * that we support, but is still unfixed in clang: * https://bugs.llvm.org/show_bug.cgi?id=21183 * - * Reload essential local variables here for those compilers. + * Reload an essential local variable here for those compilers. * Newer versions of gcc would complain about this code (-Wclobbered), * so we only perform the workaround for clang. */ cpu = current_cpu; - cc = CPU_GET_CLASS(cpu); #else - /* - * Non-buggy compilers preserve these locals; assert that - * they have the correct value. - */ + /* Non-buggy compilers preserve this; assert the correct value. */ g_assert(cpu == current_cpu); - g_assert(cc == CPU_GET_CLASS(cpu)); #endif #ifndef CONFIG_SOFTMMU + clear_helper_retaddr(); tcg_debug_assert(!have_mmap_lock()); #endif if (qemu_mutex_iothread_locked()) { @@ -788,22 +970,64 @@ int cpu_exec(CPUState *cpu) int tb_exit = 0; while (!cpu_handle_interrupt(cpu, &last_tb)) { - uint32_t cflags = cpu->cflags_next_tb; TranslationBlock *tb; - - /* When requested, use an exact setting for cflags for the next - execution. This is used for icount, precise smc, and stop- - after-access watchpoints. Since this request should never - have CF_INVALID set, -1 is a convenient invalid value that - does not require tcg headers for cpu_common_reset. */ + target_ulong cs_base, pcc_base = 0, pcc_top = 0, pc; + uint32_t cheri_flags = 0; + uint32_t flags, cflags; + + cpu_get_tb_cpu_state_ext(cpu->env_ptr, &pc, &cs_base, &pcc_base, + &pcc_top, &cheri_flags, &flags); + + /* + * When requested, use an exact setting for cflags for the next + * execution. This is used for icount, precise smc, and stop- + * after-access watchpoints. Since this request should never + * have CF_INVALID set, -1 is a convenient invalid value that + * does not require tcg headers for cpu_common_reset. + */ + cflags = cpu->cflags_next_tb; if (cflags == -1) { cflags = curr_cflags(cpu); } else { cpu->cflags_next_tb = -1; } - tb = tb_find(cpu, last_tb, tb_exit, cflags); + if (check_for_breakpoints(cpu, pc, &cflags)) { + break; + } + + tb = tb_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, + flags, cflags); + if (tb == NULL) { + mmap_lock(); + tb = tb_gen_code(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, + flags, cflags); + mmap_unlock(); + /* + * We add the TB in the virtual pc hash table + * for the fast lookup + */ + qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); + } + +#ifndef CONFIG_USER_ONLY + /* + * We don't take care of direct jumps when address mapping + * changes in system emulation. So it's not safe to make a + * direct jump to a TB spanning two pages because the mapping + * for the second page can change. + */ + if (tb->page_addr[1] != -1) { + last_tb = NULL; + } +#endif + /* See if we can patch the calling TB. */ + if (last_tb) { + tb_add_jump(last_tb, tb_exit, tb); + } + cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit); + /* Try to align the host and virtual clocks if the guest is in advance */ align_clocks(&sc, cpu); @@ -847,23 +1071,52 @@ void tcg_exec_unrealizefn(CPUState *cpu) #ifndef CONFIG_USER_ONLY -void dump_drift_info(void) +void dump_drift_info(GString *buf) { if (!icount_enabled()) { return; } - qemu_printf("Host - Guest clock %"PRIi64" ms\n", - (cpu_get_clock() - icount_get()) / SCALE_MS); + g_string_append_printf(buf, "Host - Guest clock %"PRIi64" ms\n", + (cpu_get_clock() - icount_get()) / SCALE_MS); if (icount_align_option) { - qemu_printf("Max guest delay %"PRIi64" ms\n", - -max_delay / SCALE_MS); - qemu_printf("Max guest advance %"PRIi64" ms\n", - max_advance / SCALE_MS); + g_string_append_printf(buf, "Max guest delay %"PRIi64" ms\n", + -max_delay / SCALE_MS); + g_string_append_printf(buf, "Max guest advance %"PRIi64" ms\n", + max_advance / SCALE_MS); } else { - qemu_printf("Max guest delay NA\n"); - qemu_printf("Max guest advance NA\n"); + g_string_append_printf(buf, "Max guest delay NA\n"); + g_string_append_printf(buf, "Max guest advance NA\n"); + } +} + +HumanReadableText *qmp_x_query_jit(Error **errp) +{ + g_autoptr(GString) buf = g_string_new(""); + + if (!tcg_enabled()) { + error_setg(errp, "JIT information is only available with accel=tcg"); + return NULL; } + + dump_exec_info(buf); + dump_drift_info(buf); + + return human_readable_text_from_str(buf); +} + +HumanReadableText *qmp_x_query_opcount(Error **errp) +{ + g_autoptr(GString) buf = g_string_new(""); + + if (!tcg_enabled()) { + error_setg(errp, "Opcode count information is only available with accel=tcg"); + return NULL; + } + + dump_opcount_info(buf); + + return human_readable_text_from_str(buf); } #endif /* !CONFIG_USER_ONLY */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e7bfaac8c43..e8d3429c26d 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -19,14 +19,11 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" -#include "cpu.h" #include "hw/core/tcg-cpu-ops.h" #include "exec/exec-all.h" #include "exec/memory.h" -#include "exec/address-spaces.h" #include "exec/cpu_ldst.h" #include "exec/cputlb.h" -#include "exec/tb-hash.h" #include "exec/memory-internal.h" #include "exec/ram_addr.h" #include "tcg/tcg.h" @@ -37,12 +34,13 @@ #include "qemu/atomic128.h" #include "exec/translate-all.h" #include "trace/trace-root.h" -#include "trace/mem.h" #include "cheri_tagmem.h" +#include "tb-hash.h" #include "internal.h" #ifdef CONFIG_PLUGIN #include "qemu/plugin-memory.h" #endif +#include "tcg/tcg-ldst.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -710,8 +708,9 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); } -static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, - target_ulong page, unsigned bits) +static void tlb_flush_range_locked(CPUArchState *env, int midx, + target_ulong addr, target_ulong len, + unsigned bits) { CPUTLBDesc *d = &env_tlb(env)->d[midx]; CPUTLBDescFast *f = &env_tlb(env)->f[midx]; @@ -721,20 +720,26 @@ static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, * If @bits is smaller than the tlb size, there may be multiple entries * within the TLB; otherwise all addresses that match under @mask hit * the same TLB entry. - * * TODO: Perhaps allow bits to be a few bits less than the size. * For now, just flush the entire TLB. + * + * If @len is larger than the tlb size, then it will take longer to + * test all of the entries in the TLB than it will to flush it all. */ - if (mask < f->mask) { + if (mask < f->mask || len > f->mask) { tlb_debug("forcing full flush midx %d (" - TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", - midx, page, mask); + TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n", + midx, addr, mask, len); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); return; } - /* Check if we need to flush due to large pages. */ - if ((page & d->large_page_mask) == d->large_page_addr) { + /* + * Check if we need to flush due to large pages. + * Because large_page_mask contains all 1's from the msb, + * we only need to test the end of the range. + */ + if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { tlb_debug("forcing full flush midx %d (" TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", midx, d->large_page_addr, d->large_page_mask); @@ -742,85 +747,67 @@ static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, return; } - if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) { - tlb_n_used_entries_dec(env, midx); + for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) { + target_ulong page = addr + i; + CPUTLBEntry *entry = tlb_entry(env, midx, page); + + if (tlb_flush_entry_mask_locked(entry, page, mask)) { + tlb_n_used_entries_dec(env, midx); + } + tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); } - tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); } typedef struct { target_ulong addr; + target_ulong len; uint16_t idxmap; uint16_t bits; -} TLBFlushPageBitsByMMUIdxData; +} TLBFlushRangeData; -static void -tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu, - TLBFlushPageBitsByMMUIdxData d) +static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, + TLBFlushRangeData d) { CPUArchState *env = cpu->env_ptr; int mmu_idx; assert_cpu_is_self(cpu); - tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n", - d.addr, d.bits, d.idxmap); + tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n", + d.addr, d.bits, d.len, d.idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if ((d.idxmap >> mmu_idx) & 1) { - tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits); + tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits); } } qemu_spin_unlock(&env_tlb(env)->c.lock); - tb_flush_jmp_cache(cpu, d.addr); -} - -static bool encode_pbm_to_runon(run_on_cpu_data *out, - TLBFlushPageBitsByMMUIdxData d) -{ - /* We need 6 bits to hold to hold @bits up to 63. */ - if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) { - *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits); - return true; + for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) { + tb_flush_jmp_cache(cpu, d.addr + i); } - return false; -} - -static TLBFlushPageBitsByMMUIdxData -decode_runon_to_pbm(run_on_cpu_data data) -{ - target_ulong addr_map_bits = (target_ulong) data.target_ptr; - return (TLBFlushPageBitsByMMUIdxData){ - .addr = addr_map_bits & TARGET_PAGE_MASK, - .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6, - .bits = addr_map_bits & 0x3f - }; } -static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu, - run_on_cpu_data runon) +static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu, + run_on_cpu_data data) { - tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon)); -} - -static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu, - run_on_cpu_data data) -{ - TLBFlushPageBitsByMMUIdxData *d = data.host_ptr; - tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d); + TLBFlushRangeData *d = data.host_ptr; + tlb_flush_range_by_mmuidx_async_0(cpu, *d); g_free(d); } -void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, - uint16_t idxmap, unsigned bits) +void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits) { - TLBFlushPageBitsByMMUIdxData d; - run_on_cpu_data runon; + TLBFlushRangeData d; - /* If all bits are significant, this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS) { + /* + * If all bits are significant, and len is small, + * this devolves to tlb_flush_page. + */ + if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { tlb_flush_page_by_mmuidx(cpu, addr, idxmap); return; } @@ -832,34 +819,38 @@ void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; + d.len = len; d.idxmap = idxmap; d.bits = bits; if (qemu_cpu_is_self(cpu)) { - tlb_flush_page_bits_by_mmuidx_async_0(cpu, d); - } else if (encode_pbm_to_runon(&runon, d)) { - async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + tlb_flush_range_by_mmuidx_async_0(cpu, d); } else { - TLBFlushPageBitsByMMUIdxData *p - = g_new(TLBFlushPageBitsByMMUIdxData, 1); - /* Otherwise allocate a structure, freed by the worker. */ - *p = d; - async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2, + TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); + async_run_on_cpu(cpu, tlb_flush_range_by_mmuidx_async_1, RUN_ON_CPU_HOST_PTR(p)); } } -void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, - target_ulong addr, - uint16_t idxmap, - unsigned bits) +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits) { - TLBFlushPageBitsByMMUIdxData d; - run_on_cpu_data runon; + tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits); +} + +void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, + target_ulong addr, target_ulong len, + uint16_t idxmap, unsigned bits) +{ + TLBFlushRangeData d; + CPUState *dst_cpu; - /* If all bits are significant, this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS) { + /* + * If all bits are significant, and len is small, + * this devolves to tlb_flush_page. + */ + if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); return; } @@ -871,40 +862,45 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; + d.len = len; d.idxmap = idxmap; d.bits = bits; - if (encode_pbm_to_runon(&runon, d)) { - flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); - } else { - CPUState *dst_cpu; - TLBFlushPageBitsByMMUIdxData *p; - - /* Allocate a separate data block for each destination cpu. */ - CPU_FOREACH(dst_cpu) { - if (dst_cpu != src_cpu) { - p = g_new(TLBFlushPageBitsByMMUIdxData, 1); - *p = d; - async_run_on_cpu(dst_cpu, - tlb_flush_page_bits_by_mmuidx_async_2, - RUN_ON_CPU_HOST_PTR(p)); - } + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); + async_run_on_cpu(dst_cpu, + tlb_flush_range_by_mmuidx_async_1, + RUN_ON_CPU_HOST_PTR(p)); } } - tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d); + tlb_flush_range_by_mmuidx_async_0(src_cpu, d); } -void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, - target_ulong addr, - uint16_t idxmap, - unsigned bits) +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, unsigned bits) +{ + tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE, + idxmap, bits); +} + +void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + target_ulong addr, + target_ulong len, + uint16_t idxmap, + unsigned bits) { - TLBFlushPageBitsByMMUIdxData d; - run_on_cpu_data runon; + TLBFlushRangeData d, *p; + CPUState *dst_cpu; - /* If all bits are significant, this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS) { + /* + * If all bits are significant, and len is small, + * this devolves to tlb_flush_page. + */ + if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); return; } @@ -916,32 +912,31 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; + d.len = len; d.idxmap = idxmap; d.bits = bits; - if (encode_pbm_to_runon(&runon, d)) { - flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); - async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, - runon); - } else { - CPUState *dst_cpu; - TLBFlushPageBitsByMMUIdxData *p; - - /* Allocate a separate data block for each destination cpu. */ - CPU_FOREACH(dst_cpu) { - if (dst_cpu != src_cpu) { - p = g_new(TLBFlushPageBitsByMMUIdxData, 1); - *p = d; - async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2, - RUN_ON_CPU_HOST_PTR(p)); - } + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + p = g_memdup(&d, sizeof(d)); + async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1, + RUN_ON_CPU_HOST_PTR(p)); } - - p = g_new(TLBFlushPageBitsByMMUIdxData, 1); - *p = d; - async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2, - RUN_ON_CPU_HOST_PTR(p)); } + + p = g_memdup(&d, sizeof(d)); + async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1, + RUN_ON_CPU_HOST_PTR(p)); +} + +void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ + tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE, + idxmap, bits); } /* update the TLBs so that writes to code in the virtual page 'addr' @@ -1804,7 +1799,7 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; } else { data->is_io = false; - data->v.ram.hostaddr = addr + tlbe->addend; + data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend); } return true; } else { @@ -1818,18 +1813,22 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, #endif -/* Probe for a read-modify-write atomic operation. Do not allow unaligned - * operations, or io operations to proceed. Return the host address. */ +/* + * Probe for an atomic operation. Do not allow unaligned operations, + * or io operations to proceed. Return the host address. + * + * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE. + */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, int size, int prot, + uintptr_t retaddr) { size_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(tlbe); MemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); - int s_bits = mop & MO_SIZE; + uintptr_t index; + CPUTLBEntry *tlbe; + target_ulong tlb_addr; void *hostaddr; /* Adjust the given return address. */ @@ -1843,7 +1842,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } /* Enforce qemu required alignment. */ - if (unlikely(addr & ((1 << s_bits) - 1))) { + if (unlikely(addr & (size - 1))) { /* We get here if guest alignment was not requested, or was not enforced by cpu_unaligned_access above. We might widen the access and emulate, but for now @@ -1851,15 +1850,45 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, goto stop_the_world; } + index = tlb_index(env, mmu_idx, addr); + tlbe = tlb_entry(env, mmu_idx, addr); + /* Check TLB entry and enforce page permissions. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - tlbe = tlb_entry(env, mmu_idx, addr); + if (prot & PAGE_WRITE) { + tlb_addr = tlb_addr_write(tlbe); + if (!tlb_hit(tlb_addr, addr)) { + if (!VICTIM_TLB_HIT(addr_write, addr)) { + tlb_fill(env_cpu(env), addr, size, + MMU_DATA_STORE, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + tlbe = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; + } + + /* Let the guest notice RMW on a write-only page. */ + if ((prot & PAGE_READ) && + unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { + tlb_fill(env_cpu(env), addr, size, + MMU_DATA_LOAD, mmu_idx, retaddr); + /* + * Since we don't support reads and writes to different addresses, + * and we do have the proper page loaded for write, this shouldn't + * ever return. But just in case, handle via stop-the-world. + */ + goto stop_the_world; + } + } else /* if (prot & PAGE_READ) */ { + tlb_addr = tlbe->addr_read; + if (!tlb_hit(tlb_addr, addr)) { + if (!VICTIM_TLB_HIT(addr_write, addr)) { + tlb_fill(env_cpu(env), addr, size, + MMU_DATA_LOAD, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + tlbe = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK; } - tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; } /* Notice an IO access or a needs-MMU-lookup access */ @@ -1869,20 +1898,10 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, goto stop_the_world; } - /* Let the guest notice RMW on a write-only page. */ - if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { - tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, - mmu_idx, retaddr); - /* Since we don't support reads and writes to different addresses, - and we do have the proper page loaded for write, this shouldn't - ever return. But just in case, handle via stop-the-world. */ - goto stop_the_world; - } - hostaddr = (void *)((uintptr_t)addr + tlbe->addend); if (unlikely(tlb_addr & TLB_NOTDIRTY)) { - notdirty_write(env_cpu(env), addr, 1 << s_bits, + notdirty_write(env_cpu(env), addr, size, &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); } @@ -1892,6 +1911,25 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, cpu_loop_exit_atomic(env_cpu(env), retaddr); } +/* + * Verify that we have passed the correct MemOp to the correct function. + * + * In the case of the helper_*_mmu functions, we will have done this by + * using the MemOp to look up the helper during code generation. + * + * In the case of the cpu_*_mmu functions, this is up to the caller. + * We could present one function to target code, and dispatch based on + * the MemOp, but so far we have worked hard to avoid an indirect function + * call along the memory path. + */ +static void validate_memop(MemOpIdx oi, MemOp expected) +{ +#ifdef CONFIG_DEBUG_TCG + MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP); + assert(have == expected); +#endif +} + /* * Load Helpers * @@ -1902,7 +1940,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, */ typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); + MemOpIdx oi, uintptr_t retaddr); static inline uint64_t QEMU_ALWAYS_INLINE load_memop(const void *haddr, MemOp op) @@ -1964,10 +2002,10 @@ static void check_address_space_wrap(CPUArchState *env, target_ulong addr, #endif static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); + MemOpIdx oi, uintptr_t retaddr); static inline uint64_t QEMU_ALWAYS_INLINE -load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, +load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr, MemOp op, bool code_read, FullLoadHelper *full_load) { @@ -2103,79 +2141,86 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, */ static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_UB); return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); } tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return full_ldub_mmu(env, addr, oi, retaddr); } static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_LEUW); return load_helper(env, addr, oi, retaddr, MO_LEUW, false, full_le_lduw_mmu); } tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return full_le_lduw_mmu(env, addr, oi, retaddr); } static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_BEUW); return load_helper(env, addr, oi, retaddr, MO_BEUW, false, full_be_lduw_mmu); } tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return full_be_lduw_mmu(env, addr, oi, retaddr); } static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_LEUL); return load_helper(env, addr, oi, retaddr, MO_LEUL, false, full_le_ldul_mmu); } tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return full_le_ldul_mmu(env, addr, oi, retaddr); } static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_BEUL); return load_helper(env, addr, oi, retaddr, MO_BEUL, false, full_be_ldul_mmu); } tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return full_be_ldul_mmu(env, addr, oi, retaddr); } uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_LEQ); return load_helper(env, addr, oi, retaddr, MO_LEQ, false, helper_le_ldq_mmu); } uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_BEQ); return load_helper(env, addr, oi, retaddr, MO_BEQ, false, helper_be_ldq_mmu); } @@ -2187,31 +2232,31 @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); } tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); } tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); } tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); } tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); } @@ -2224,238 +2269,97 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, /* * Log a target memory load via cpu_ldst. */ -#define log_instr_load_int(env, addr, value, op) \ - helper_qemu_log_instr_load64(env, addr, value, op) +#define log_instr_load_int(env, addr, value, oi) \ + helper_qemu_log_instr_load64(env, addr, value, oi) #else -#define log_instr_load_int(env, addr, val, op) ((void)0) +#define log_instr_load_int(env, addr, val, oi) ((void)0) #endif static inline uint64_t cpu_load_helper_no_log(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t retaddr, - MemOp op, + MemOpIdx oi, uintptr_t retaddr, FullLoadHelper *full_load) { - uint16_t meminfo; - TCGMemOpIdx oi; uint64_t ret; - meminfo = trace_mem_get_info(op, mmu_idx, false); - trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); - - op &= ~MO_SIGN; - oi = make_memop_idx(op, mmu_idx); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); ret = full_load(env, addr, oi, retaddr); - - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); - + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t retaddr, MemOp op, + MemOpIdx oi, uintptr_t retaddr, FullLoadHelper *full_load) { - uint64_t ret = - cpu_load_helper_no_log(env, addr, mmu_idx, retaddr, op, full_load); - log_instr_load_int(env, addr, ret, op); + uint64_t ret = cpu_load_helper_no_log(env, addr, oi, retaddr, full_load); + log_instr_load_int(env, addr, ret, oi); return ret; } -uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); -} - -int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, - full_ldub_mmu); -} - -uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); -} - -int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, - full_be_lduw_mmu); -} - -uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); -} - -uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); -} - -uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); -} - -int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, - full_le_lduw_mmu); -} - -uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); -} - -uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); -} - -uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu); } -int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu); } -uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu); } -int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_load_helper(env, addr, oi, MO_BEQ, helper_be_ldq_mmu); } -uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu); } -uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu); } -uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); -} - -int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) -{ - return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); -} - -uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) -{ - return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); -} - -uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) -{ - return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu); } #ifdef TARGET_CHERI /* - * TODO(am2419): Ugly hack to avoid logging memory accesses that load capability + * Hack to avoid logging memory accesses that load capability * components as normal memory accesses. The caller is responsible for logging. */ -target_ulong cpu_ld_cap_word_ra(CPUArchState *env, target_ulong ptr, +target_ulong cpu_ld_cap_word_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) { FullLoadHelper *full_load; - MemOp op; + MemOpIdx oi; #if TARGET_LONG_BITS == 32 - op = MO_TEUW; + oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, false)); full_load = MO_TE == MO_LE ? helper_le_lduw_mmu : helper_be_lduw_mmu; #elif TARGET_LONG_BITS == 64 - op = MO_TEQ; + oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, false)); full_load = MO_TE == MO_LE ? helper_le_ldq_mmu : helper_be_ldq_mmu; #else #error "Unhandled target long width" #endif - return cpu_load_helper_no_log( - env, ptr, cpu_mmu_index(env, false), retaddr, MO_TEQ, - MO_TE == MO_LE ? helper_le_ldq_mmu : helper_be_ldq_mmu); + return cpu_load_helper_no_log(env, ptr, oi, retaddr, full_load); } #endif -uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldub_data_ra(env, ptr, 0); -} - -int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldsb_data_ra(env, ptr, 0); -} - -uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_lduw_be_data_ra(env, ptr, 0); -} - -int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldsw_be_data_ra(env, ptr, 0); -} - -uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldl_be_data_ra(env, ptr, 0); -} - -uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldq_be_data_ra(env, ptr, 0); -} - -uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_lduw_le_data_ra(env, ptr, 0); -} - -int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldsw_le_data_ra(env, ptr, 0); -} - -uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldl_le_data_ra(env, ptr, 0); -} - -uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) -{ - return cpu_ldq_le_data_ra(env, ptr, 0); -} - /* * Store Helpers */ @@ -2495,6 +2399,9 @@ store_memop(void *haddr, uint64_t val, MemOp op) } } +static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr); + static void __attribute__((noinline)) store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, uintptr_t retaddr, size_t size, uintptr_t mmu_idx, @@ -2504,7 +2411,7 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, uintptr_t index, index2; CPUTLBEntry *entry, *entry2; target_ulong page2, tlb_addr, tlb_addr2; - TCGMemOpIdx oi; + MemOpIdx oi; size_t size2; int i; @@ -2561,20 +2468,20 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, for (i = 0; i < size; ++i) { /* Big-endian extract. */ uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); - helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); + full_stb_mmu(env, addr + i, val8, oi, retaddr); } } else { for (i = 0; i < size; ++i) { /* Little-endian extract. */ uint8_t val8 = val >> (i * 8); - helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); + full_stb_mmu(env, addr + i, val8, oi, retaddr); } } } static inline void QEMU_ALWAYS_INLINE store_helper(CPUArchState *env, target_ulong addr, uint64_t val, - TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) + MemOpIdx oi, uintptr_t retaddr, MemOp op) { uintptr_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); @@ -2670,46 +2577,83 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, store_memop(haddr, val, op); } -void __attribute__((noinline)) -helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, - TCGMemOpIdx oi, uintptr_t retaddr) +static void __attribute__((noinline)) +full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_UB); store_helper(env, addr, val, oi, retaddr, MO_UB); } -void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, - TCGMemOpIdx oi, uintptr_t retaddr) +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + MemOpIdx oi, uintptr_t retaddr) +{ + full_stb_mmu(env, addr, val, oi, retaddr); +} + +static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_LEUW); store_helper(env, addr, val, oi, retaddr, MO_LEUW); } -void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, - TCGMemOpIdx oi, uintptr_t retaddr) +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t retaddr) +{ + full_le_stw_mmu(env, addr, val, oi, retaddr); +} + +static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_BEUW); store_helper(env, addr, val, oi, retaddr, MO_BEUW); } -void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - TCGMemOpIdx oi, uintptr_t retaddr) +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t retaddr) { + full_be_stw_mmu(env, addr, val, oi, retaddr); +} + +static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr) +{ + validate_memop(oi, MO_LEUL); store_helper(env, addr, val, oi, retaddr, MO_LEUL); } -void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - TCGMemOpIdx oi, uintptr_t retaddr) +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t retaddr) +{ + full_le_stl_mmu(env, addr, val, oi, retaddr); +} + +static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_BEUL); store_helper(env, addr, val, oi, retaddr, MO_BEUL); } +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t retaddr) +{ + full_be_stl_mmu(env, addr, val, oi, retaddr); +} + void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_LEQ); store_helper(env, addr, val, oi, retaddr, MO_LEQ); } void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { + validate_memop(oi, MO_BEQ); store_helper(env, addr, val, oi, retaddr, MO_BEQ); } @@ -2727,177 +2671,102 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #define log_instr_store_int(env, addr, val, op) ((void)0) #endif -static inline void QEMU_ALWAYS_INLINE -cpu_store_helper_no_log(CPUArchState *env, target_ulong addr, uint64_t val, - int mmu_idx, uintptr_t retaddr, MemOp op) -{ - TCGMemOpIdx oi; - uint16_t meminfo; - - meminfo = trace_mem_get_info(op, mmu_idx, true); - trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); - - oi = make_memop_idx(op, mmu_idx); - store_helper(env, addr, val, oi, retaddr, op); - - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); -} - -static inline void QEMU_ALWAYS_INLINE -cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, - int mmu_idx, uintptr_t retaddr, MemOp op) -{ - cpu_store_helper_no_log(env, addr, val, mmu_idx, retaddr, op); - log_instr_store_int(env, addr, val, op); -} - -void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) -{ - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); -} - -void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) -{ - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); -} +typedef void FullStoreHelper(CPUArchState *env, target_ulong addr, + uint64_t val, MemOpIdx oi, uintptr_t retaddr); -void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +static inline void cpu_store_helper_no_log(CPUArchState *env, target_ulong addr, + uint64_t val, MemOpIdx oi, + uintptr_t ra, + FullStoreHelper *full_store) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + full_store(env, addr, val, oi, ra); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } -void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, - int mmu_idx, uintptr_t retaddr) +static inline void cpu_store_helper(CPUArchState *env, target_ulong addr, + uint64_t val, MemOpIdx oi, uintptr_t ra, + FullStoreHelper *full_store) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); + cpu_store_helper_no_log(env, addr, val, oi, ra, full_store); + log_instr_store_int(env, addr, val, oi); } -void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); + cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu); } -void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); + cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu); } -void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); + cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu); } -void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr) { - cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu); } -void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t retaddr) { - cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu); } -void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t retaddr) { - cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu); } -void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, - uint64_t val, uintptr_t retaddr) +void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr) { - cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); -} - -void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) -{ - cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); -} - -void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) -{ - cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); -} - -void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, - uint64_t val, uintptr_t retaddr) -{ - cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu); } #ifdef TARGET_CHERI /* - * TODO(am2419): Ugly hack to avoid logging memory accesses that store capability + * Hack to avoid logging memory accesses that store capability * components as normal memory accesses. The caller is responsible for logging. */ void cpu_st_cap_word_ra(CPUArchState *env, target_ulong ptr, target_ulong val, uintptr_t retaddr) { - MemOp op; + FullStoreHelper *full_store; + MemOpIdx oi; #if TARGET_LONG_BITS == 32 - op = MO_TEUW; + oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, false)); + full_store = MO_TE == MO_LE ? full_le_stw_mmu : full_be_stw_mmu; #elif TARGET_LONG_BITS == 64 - op = MO_TEQ; + oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, false)); + full_store = MO_TE == MO_LE ? helper_le_stq_mmu : helper_be_stq_mmu; #else #error "Unhandled target long width" #endif - cpu_store_helper_no_log(env, ptr, val, cpu_mmu_index(env, false), retaddr, op); + cpu_store_helper_no_log(env, ptr, val, oi, retaddr, full_store); } #endif -void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) -{ - cpu_stb_data_ra(env, ptr, val, 0); -} - -void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) -{ - cpu_stw_be_data_ra(env, ptr, val, 0); -} - -void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) -{ - cpu_stl_be_data_ra(env, ptr, val, 0); -} - -void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) -{ - cpu_stq_be_data_ra(env, ptr, val, 0); -} - -void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) -{ - cpu_stw_le_data_ra(env, ptr, val, 0); -} - -void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) -{ - cpu_stl_le_data_ra(env, ptr, val, 0); -} - -void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) -{ - cpu_stq_le_data_ra(env, ptr, val, 0); -} +#include "ldst_common.c.inc" -/* First set of helpers allows passing in of OI and RETADDR. This makes - them callable from other helpers. */ +/* + * First set of functions passes in OI and RETADDR. + * This makes them callable from other helpers. + */ -#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr #define ATOMIC_NAME(X) \ - HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) -#define ATOMIC_MMU_DECLS -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) + glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu) + #define ATOMIC_MMU_CLEANUP #define ATOMIC_MMU_IDX get_mmuidx(oi) @@ -2922,76 +2791,52 @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) #include "atomic_template.h" #endif -/* Second set of helpers are directly callable from TCG as helpers. */ - -#undef EXTRA_ARGS -#undef ATOMIC_NAME -#undef ATOMIC_MMU_LOOKUP -#define EXTRA_ARGS , TCGMemOpIdx oi -#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) - -#define DATA_SIZE 1 -#include "atomic_template.h" - -#define DATA_SIZE 2 -#include "atomic_template.h" - -#define DATA_SIZE 4 -#include "atomic_template.h" - -#ifdef CONFIG_ATOMIC64 -#define DATA_SIZE 8 -#include "atomic_template.h" -#endif -#undef ATOMIC_MMU_IDX - /* Code access functions. */ static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); } uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) { - TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); + MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); return full_ldub_code(env, addr, oi, 0); } static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); } uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) { - TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); + MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); return full_lduw_code(env, addr, oi, 0); } static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); } uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) { - TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); + MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); return full_ldl_code(env, addr, oi, 0); } static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t retaddr) { return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); } uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) { - TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); + MemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); return full_ldq_code(env, addr, oi, 0); } diff --git a/accel/tcg/hmp.c b/accel/tcg/hmp.c new file mode 100644 index 00000000000..d2ea3526553 --- /dev/null +++ b/accel/tcg/hmp.c @@ -0,0 +1,15 @@ +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "exec/exec-all.h" +#include "monitor/monitor.h" +#include "sysemu/tcg.h" + +static void hmp_tcg_register(void) +{ + monitor_register_hmp_info_hrt("jit", qmp_x_query_jit); + monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount); +} + +type_init(hmp_tcg_register); diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 301d9682ee3..d1c4e3ac330 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -12,9 +12,12 @@ #include "exec/exec-all.h" TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, - target_ulong cs_base, target_ulong cs_top, - uint32_t cheri_flags, uint32_t flags, int cflags); + target_ulong cs_base, target_ulong pcc_base, + target_ulong pcc_top, uint32_t cheri_flags, + uint32_t flags, int cflags); void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); +void page_init(void); +void tb_htable_init(void); #endif /* ACCEL_TCG_INTERNAL_H */ diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc new file mode 100644 index 00000000000..bfefb275e7e --- /dev/null +++ b/accel/tcg/ldst_common.c.inc @@ -0,0 +1,307 @@ +/* + * Routines common to user and system emulation of load/store. + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + return cpu_ldb_mmu(env, addr, oi, ra); +} + +int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return (int8_t)cpu_ldub_mmuidx_ra(env, addr, mmu_idx, ra); +} + +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx); + return cpu_ldw_be_mmu(env, addr, oi, ra); +} + +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return (int16_t)cpu_lduw_be_mmuidx_ra(env, addr, mmu_idx, ra); +} + +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx); + return cpu_ldl_be_mmu(env, addr, oi, ra); +} + +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_BEQ | MO_UNALN, mmu_idx); + return cpu_ldq_be_mmu(env, addr, oi, ra); +} + +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx); + return cpu_ldw_le_mmu(env, addr, oi, ra); +} + +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return (int16_t)cpu_lduw_le_mmuidx_ra(env, addr, mmu_idx, ra); +} + +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx); + return cpu_ldl_le_mmu(env, addr, oi, ra); +} + +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_LEQ | MO_UNALN, mmu_idx); + return cpu_ldq_le_mmu(env, addr, oi, ra); +} + +void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + cpu_stb_mmu(env, addr, val, oi, ra); +} + +void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx); + cpu_stw_be_mmu(env, addr, val, oi, ra); +} + +void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx); + cpu_stl_be_mmu(env, addr, val, oi, ra); +} + +void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_BEQ | MO_UNALN, mmu_idx); + cpu_stq_be_mmu(env, addr, val, oi, ra); +} + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx); + cpu_stw_le_mmu(env, addr, val, oi, ra); +} + +void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx); + cpu_stl_le_mmu(env, addr, val, oi, ra); +} + +void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t ra) +{ + MemOpIdx oi = make_memop_idx(MO_LEQ | MO_UNALN, mmu_idx); + cpu_stq_le_mmu(env, addr, val, oi, ra); +} + +/*--------------------------*/ + +uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return cpu_ldub_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra); +} + +int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return (int8_t)cpu_ldub_data_ra(env, addr, ra); +} + +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return cpu_lduw_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra); +} + +int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return (int16_t)cpu_lduw_be_data_ra(env, addr, ra); +} + +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return cpu_ldl_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra); +} + +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return cpu_ldq_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra); +} + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return cpu_lduw_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra); +} + +int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return (int16_t)cpu_lduw_le_data_ra(env, addr, ra); +} + +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return cpu_ldl_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra); +} + +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) +{ + return cpu_ldq_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra); +} + +void cpu_stb_data_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, uintptr_t ra) +{ + cpu_stb_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra); +} + +void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, uintptr_t ra) +{ + cpu_stw_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra); +} + +void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, uintptr_t ra) +{ + cpu_stl_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra); +} + +void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr addr, + uint64_t val, uintptr_t ra) +{ + cpu_stq_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra); +} + +void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, uintptr_t ra) +{ + cpu_stw_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra); +} + +void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr addr, + uint32_t val, uintptr_t ra) +{ + cpu_stl_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra); +} + +void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr addr, + uint64_t val, uintptr_t ra) +{ + cpu_stq_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra); +} + +/*--------------------------*/ + +uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr addr) +{ + return cpu_ldub_data_ra(env, addr, 0); +} + +int cpu_ldsb_data(CPUArchState *env, abi_ptr addr) +{ + return (int8_t)cpu_ldub_data(env, addr); +} + +uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr addr) +{ + return cpu_lduw_be_data_ra(env, addr, 0); +} + +int cpu_ldsw_be_data(CPUArchState *env, abi_ptr addr) +{ + return (int16_t)cpu_lduw_be_data(env, addr); +} + +uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr addr) +{ + return cpu_ldl_be_data_ra(env, addr, 0); +} + +uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr addr) +{ + return cpu_ldq_be_data_ra(env, addr, 0); +} + +uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr addr) +{ + return cpu_lduw_le_data_ra(env, addr, 0); +} + +int cpu_ldsw_le_data(CPUArchState *env, abi_ptr addr) +{ + return (int16_t)cpu_lduw_le_data(env, addr); +} + +uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr addr) +{ + return cpu_ldl_le_data_ra(env, addr, 0); +} + +uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr addr) +{ + return cpu_ldq_le_data_ra(env, addr, 0); +} + +void cpu_stb_data(CPUArchState *env, abi_ptr addr, uint32_t val) +{ + cpu_stb_data_ra(env, addr, val, 0); +} + +void cpu_stw_be_data(CPUArchState *env, abi_ptr addr, uint32_t val) +{ + cpu_stw_be_data_ra(env, addr, val, 0); +} + +void cpu_stl_be_data(CPUArchState *env, abi_ptr addr, uint32_t val) +{ + cpu_stl_be_data_ra(env, addr, val, 0); +} + +void cpu_stq_be_data(CPUArchState *env, abi_ptr addr, uint64_t val) +{ + cpu_stq_be_data_ra(env, addr, val, 0); +} + +void cpu_stw_le_data(CPUArchState *env, abi_ptr addr, uint32_t val) +{ + cpu_stw_le_data_ra(env, addr, val, 0); +} + +void cpu_stl_le_data(CPUArchState *env, abi_ptr addr, uint32_t val) +{ + cpu_stl_le_data_ra(env, addr, val, 0); +} + +void cpu_stq_le_data(CPUArchState *env, abi_ptr addr, uint64_t val) +{ + cpu_stq_le_data_ra(env, addr, val, 0); +} diff --git a/accel/tcg/log_instr.c b/accel/tcg/log_instr.c index edffa5d87a0..445505612d2 100644 --- a/accel/tcg/log_instr.c +++ b/accel/tcg/log_instr.c @@ -992,7 +992,7 @@ void qemu_log_instr_cap_int(CPUArchState *env, const char *reg_name, #endif static inline void qemu_log_instr_mem_int(CPUArchState *env, target_ulong addr, - int flags, TCGMemOpIdx oi, + int flags, MemOpIdx oi, target_ulong value) { cpu_log_instr_info_t *iinfo = get_cpu_log_instr_info(env); @@ -1005,13 +1005,13 @@ static inline void qemu_log_instr_mem_int(CPUArchState *env, target_ulong addr, g_array_append_val(iinfo->mem, m); } -void qemu_log_instr_ld_int(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, +void qemu_log_instr_ld_int(CPUArchState *env, target_ulong addr, MemOpIdx oi, target_ulong value) { qemu_log_instr_mem_int(env, addr, LMI_LD, oi, value); } -void qemu_log_instr_st_int(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, +void qemu_log_instr_st_int(CPUArchState *env, target_ulong addr, MemOpIdx oi, target_ulong value) { qemu_log_instr_mem_int(env, addr, LMI_ST, oi, value); @@ -1616,28 +1616,28 @@ void helper_qemu_log_instr_commit(CPUArchState *env) } void helper_qemu_log_instr_load64(CPUArchState *env, target_ulong addr, - uint64_t value, TCGMemOpIdx oi) + uint64_t value, MemOpIdx oi) { if (qemu_log_instr_enabled(env)) qemu_log_instr_mem_int(env, addr, LMI_LD, oi, value); } void helper_qemu_log_instr_store64(CPUArchState *env, target_ulong addr, - uint64_t value, TCGMemOpIdx oi) + uint64_t value, MemOpIdx oi) { if (qemu_log_instr_enabled(env)) qemu_log_instr_mem_int(env, addr, LMI_ST, oi, value); } void helper_qemu_log_instr_load32(CPUArchState *env, target_ulong addr, - uint32_t value, TCGMemOpIdx oi) + uint32_t value, MemOpIdx oi) { if (qemu_log_instr_enabled(env)) qemu_log_instr_mem_int(env, addr, LMI_LD, oi, (uint64_t)value); } void helper_qemu_log_instr_store32(CPUArchState *env, target_ulong addr, - uint32_t value, TCGMemOpIdx oi) + uint32_t value, MemOpIdx oi) { if (qemu_log_instr_enabled(env)) qemu_log_instr_mem_int(env, addr, LMI_ST, oi, (uint64_t)value); diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index 7b1ba57a7b1..3e2f52c939a 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -10,14 +10,18 @@ tcg_ss.add(files( )) tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c')) tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c')) -tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl]) +tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')]) specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss) specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files( 'cputlb.c', + 'hmp.c', +)) + +tcg_module_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files( 'tcg-accel-ops.c', 'tcg-accel-ops-mttcg.c', 'tcg-accel-ops-icount.c', - 'tcg-accel-ops-rr.c' + 'tcg-accel-ops-rr.c', )) specific_ss.add(when: ['CONFIG_TCG_LOG_INSTR', 'CONFIG_TCG'], if_true: files('log_instr.c')) diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index c3dc3effe7e..22d95fe1c35 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -43,10 +43,8 @@ * CPU's index into a TCG temp, since the first callback did it already. */ #include "qemu/osdep.h" -#include "cpu.h" #include "tcg/tcg.h" #include "tcg/tcg-op.h" -#include "trace/mem.h" #include "exec/exec-all.h" #include "exec/plugin-gen.h" #include "exec/translator.h" @@ -161,15 +159,10 @@ static void gen_empty_mem_helper(void) tcg_temp_free_ptr(ptr); } -static inline -void gen_plugin_cb_start(enum plugin_gen_from from, - enum plugin_gen_cb type, unsigned wr) +static void gen_plugin_cb_start(enum plugin_gen_from from, + enum plugin_gen_cb type, unsigned wr) { - TCGOp *op; - tcg_gen_plugin_cb_start(from, type, wr); - op = tcg_last_op(); - QSIMPLEQ_INSERT_TAIL(&tcg_ctx->plugin_ops, op, plugin_link); } static void gen_wrapped(enum plugin_gen_from from, @@ -180,7 +173,7 @@ static void gen_wrapped(enum plugin_gen_from from, tcg_gen_plugin_cb_end(); } -static inline void plugin_gen_empty_callback(enum plugin_gen_from from) +static void plugin_gen_empty_callback(enum plugin_gen_from from) { switch (from) { case PLUGIN_GEN_AFTER_INSN: @@ -213,9 +206,9 @@ static void gen_mem_wrapped(enum plugin_gen_cb type, const union mem_gen_fn *f, TCGv addr, uint32_t info, bool is_mem) { - int wr = !!(info & TRACE_MEM_ST); + enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info); - gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, wr); + gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw); if (is_mem) { f->mem_fn(addr, info); } else { @@ -386,7 +379,7 @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) } static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, - void *func, unsigned tcg_flags, int *cb_idx) + void *func, int *cb_idx) { /* copy all ops until the call */ do { @@ -413,7 +406,7 @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, tcg_debug_assert(i < MAX_OPC_PARAM_ARGS); } op->args[*cb_idx] = (uintptr_t)func; - op->args[*cb_idx + 1] = tcg_flags; + op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1]; return op; } @@ -440,7 +433,7 @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, /* call */ op = copy_call(&begin_op, op, HELPER(plugin_vcpu_udata_cb), - cb->f.vcpu_udata, cb->tcg_flags, cb_idx); + cb->f.vcpu_udata, cb_idx); return op; } @@ -491,7 +484,7 @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb, if (type == PLUGIN_GEN_CB_MEM) { /* call */ op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb), - cb->f.vcpu_udata, cb->tcg_flags, cb_idx); + cb->f.vcpu_udata, cb_idx); } return op; @@ -514,9 +507,8 @@ static bool op_rw(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb) return !!(cb->rw & (w + 1)); } -static inline -void inject_cb_type(const GArray *cbs, TCGOp *begin_op, inject_fn inject, - op_ok_fn ok) +static void inject_cb_type(const GArray *cbs, TCGOp *begin_op, + inject_fn inject, op_ok_fn ok) { TCGOp *end_op; TCGOp *op; @@ -710,62 +702,6 @@ static void plugin_gen_disable_mem_helper(const struct qemu_plugin_tb *ptb, inject_mem_disable_helper(insn, begin_op); } -static void plugin_inject_cb(const struct qemu_plugin_tb *ptb, TCGOp *begin_op, - int insn_idx) -{ - enum plugin_gen_from from = begin_op->args[0]; - enum plugin_gen_cb type = begin_op->args[1]; - - switch (from) { - case PLUGIN_GEN_FROM_TB: - switch (type) { - case PLUGIN_GEN_CB_UDATA: - plugin_gen_tb_udata(ptb, begin_op); - return; - case PLUGIN_GEN_CB_INLINE: - plugin_gen_tb_inline(ptb, begin_op); - return; - default: - g_assert_not_reached(); - } - case PLUGIN_GEN_FROM_INSN: - switch (type) { - case PLUGIN_GEN_CB_UDATA: - plugin_gen_insn_udata(ptb, begin_op, insn_idx); - return; - case PLUGIN_GEN_CB_INLINE: - plugin_gen_insn_inline(ptb, begin_op, insn_idx); - return; - case PLUGIN_GEN_ENABLE_MEM_HELPER: - plugin_gen_enable_mem_helper(ptb, begin_op, insn_idx); - return; - default: - g_assert_not_reached(); - } - case PLUGIN_GEN_FROM_MEM: - switch (type) { - case PLUGIN_GEN_CB_MEM: - plugin_gen_mem_regular(ptb, begin_op, insn_idx); - return; - case PLUGIN_GEN_CB_INLINE: - plugin_gen_mem_inline(ptb, begin_op, insn_idx); - return; - default: - g_assert_not_reached(); - } - case PLUGIN_GEN_AFTER_INSN: - switch (type) { - case PLUGIN_GEN_DISABLE_MEM_HELPER: - plugin_gen_disable_mem_helper(ptb, begin_op, insn_idx); - return; - default: - g_assert_not_reached(); - } - default: - g_assert_not_reached(); - } -} - /* #define DEBUG_PLUGIN_GEN_OPS */ static void pr_ops(void) { @@ -823,21 +759,95 @@ static void pr_ops(void) static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb) { TCGOp *op; - int insn_idx; + int insn_idx = -1; pr_ops(); - insn_idx = -1; - QSIMPLEQ_FOREACH(op, &tcg_ctx->plugin_ops, plugin_link) { - enum plugin_gen_from from = op->args[0]; - enum plugin_gen_cb type = op->args[1]; - - tcg_debug_assert(op->opc == INDEX_op_plugin_cb_start); - /* ENABLE_MEM_HELPER is the first callback of an instruction */ - if (from == PLUGIN_GEN_FROM_INSN && - type == PLUGIN_GEN_ENABLE_MEM_HELPER) { + + QTAILQ_FOREACH(op, &tcg_ctx->ops, link) { + switch (op->opc) { + case INDEX_op_insn_start: insn_idx++; + break; + case INDEX_op_plugin_cb_start: + { + enum plugin_gen_from from = op->args[0]; + enum plugin_gen_cb type = op->args[1]; + + switch (from) { + case PLUGIN_GEN_FROM_TB: + { + g_assert(insn_idx == -1); + + switch (type) { + case PLUGIN_GEN_CB_UDATA: + plugin_gen_tb_udata(plugin_tb, op); + break; + case PLUGIN_GEN_CB_INLINE: + plugin_gen_tb_inline(plugin_tb, op); + break; + default: + g_assert_not_reached(); + } + break; + } + case PLUGIN_GEN_FROM_INSN: + { + g_assert(insn_idx >= 0); + + switch (type) { + case PLUGIN_GEN_CB_UDATA: + plugin_gen_insn_udata(plugin_tb, op, insn_idx); + break; + case PLUGIN_GEN_CB_INLINE: + plugin_gen_insn_inline(plugin_tb, op, insn_idx); + break; + case PLUGIN_GEN_ENABLE_MEM_HELPER: + plugin_gen_enable_mem_helper(plugin_tb, op, insn_idx); + break; + default: + g_assert_not_reached(); + } + break; + } + case PLUGIN_GEN_FROM_MEM: + { + g_assert(insn_idx >= 0); + + switch (type) { + case PLUGIN_GEN_CB_MEM: + plugin_gen_mem_regular(plugin_tb, op, insn_idx); + break; + case PLUGIN_GEN_CB_INLINE: + plugin_gen_mem_inline(plugin_tb, op, insn_idx); + break; + default: + g_assert_not_reached(); + } + + break; + } + case PLUGIN_GEN_AFTER_INSN: + { + g_assert(insn_idx >= 0); + + switch (type) { + case PLUGIN_GEN_DISABLE_MEM_HELPER: + plugin_gen_disable_mem_helper(plugin_tb, op, insn_idx); + break; + default: + g_assert_not_reached(); + } + break; + } + default: + g_assert_not_reached(); + } + break; + } + default: + /* plugins don't care about any other ops */ + break; } - plugin_inject_cb(plugin_tb, op, insn_idx); } pr_ops(); } @@ -850,7 +860,6 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_mask)) { ret = true; - QSIMPLEQ_INIT(&tcg_ctx->plugin_ops); ptb->vaddr = tb->pc; ptb->vaddr2 = -1; get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1); @@ -867,9 +876,8 @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db) struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; struct qemu_plugin_insn *pinsn; - pinsn = qemu_plugin_tb_insn_get(ptb); + pinsn = qemu_plugin_tb_insn_get(ptb, db->pc_next); tcg_ctx->plugin_insn = pinsn; - pinsn->vaddr = db->pc_next; plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN); /* diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h index 1916ee79206..9829abe4a95 100644 --- a/accel/tcg/plugin-helpers.h +++ b/accel/tcg/plugin-helpers.h @@ -1,5 +1,4 @@ #ifdef CONFIG_PLUGIN -/* Note: no TCG flags because those are overwritten later */ -DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr) -DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr) +DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb, TCG_CALL_NO_RWG, void, i32, ptr) +DEF_HELPER_FLAGS_4(plugin_vcpu_mem_cb, TCG_CALL_NO_RWG, void, i32, i32, i64, ptr) #endif diff --git a/include/exec/tb-context.h b/accel/tcg/tb-context.h similarity index 96% rename from include/exec/tb-context.h rename to accel/tcg/tb-context.h index cc339791138..cac62d97491 100644 --- a/include/exec/tb-context.h +++ b/accel/tcg/tb-context.h @@ -34,6 +34,7 @@ struct TBContext { /* statistics */ unsigned tb_flush_count; + unsigned tb_phys_invalidate_count; }; extern TBContext tb_ctx; diff --git a/include/exec/tb-hash.h b/accel/tcg/tb-hash.h similarity index 100% rename from include/exec/tb-hash.h rename to accel/tcg/tb-hash.h diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index 13b8fbeb699..ea42d1d51b1 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -30,7 +30,6 @@ #include "qemu/main-loop.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" -#include "hw/boards.h" #include "tcg-accel-ops.h" #include "tcg-accel-ops-icount.h" diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 847d2079d21..29632bd4c0a 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -28,6 +28,7 @@ #include "sysemu/tcg.h" #include "sysemu/replay.h" #include "qemu/main-loop.h" +#include "qemu/notify.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" #include "hw/boards.h" @@ -35,6 +36,26 @@ #include "tcg-accel-ops.h" #include "tcg-accel-ops-mttcg.h" +typedef struct MttcgForceRcuNotifier { + Notifier notifier; + CPUState *cpu; +} MttcgForceRcuNotifier; + +static void do_nothing(CPUState *cpu, run_on_cpu_data d) +{ +} + +static void mttcg_force_rcu(Notifier *notify, void *data) +{ + CPUState *cpu = container_of(notify, MttcgForceRcuNotifier, notifier)->cpu; + + /* + * Called with rcu_registry_lock held, using async_run_on_cpu() ensures + * that there are no deadlocks. + */ + async_run_on_cpu(cpu, do_nothing, RUN_ON_CPU_NULL); +} + /* * In the multi-threaded case each vCPU has its own thread. The TLS * variable current_cpu can be used deep in the code to find the @@ -43,12 +64,16 @@ static void *mttcg_cpu_thread_fn(void *arg) { + MttcgForceRcuNotifier force_rcu; CPUState *cpu = arg; assert(tcg_enabled()); g_assert(!icount_enabled()); rcu_register_thread(); + force_rcu.notifier.notify = mttcg_force_rcu; + force_rcu.cpu = cpu; + rcu_add_force_rcu_notifier(&force_rcu.notifier); tcg_register_thread(); qemu_mutex_lock_iothread(); @@ -100,6 +125,7 @@ static void *mttcg_cpu_thread_fn(void *arg) tcg_cpus_destroy(cpu); qemu_mutex_unlock_iothread(); + rcu_remove_force_rcu_notifier(&force_rcu.notifier); rcu_unregister_thread(); return NULL; } diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 018b54c508f..bf59f53dbc2 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -28,9 +28,9 @@ #include "sysemu/tcg.h" #include "sysemu/replay.h" #include "qemu/main-loop.h" +#include "qemu/notify.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" -#include "hw/boards.h" #include "tcg-accel-ops.h" #include "tcg-accel-ops-rr.h" @@ -61,8 +61,6 @@ void rr_kick_vcpu_thread(CPUState *unused) static QEMUTimer *rr_kick_vcpu_timer; static CPUState *rr_current_cpu; -#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10) - static inline int64_t rr_next_kick_time(void) { return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; @@ -136,6 +134,11 @@ static void rr_deal_with_unplugged_cpus(void) } } +static void rr_force_rcu(Notifier *notify, void *data) +{ + rr_kick_next_cpu(); +} + /* * In the single-threaded case each vCPU is simulated in turn. If * there is more than a single vCPU we create a simple timer to kick @@ -146,10 +149,13 @@ static void rr_deal_with_unplugged_cpus(void) static void *rr_cpu_thread_fn(void *arg) { + Notifier force_rcu; CPUState *cpu = arg; assert(tcg_enabled()); rcu_register_thread(); + force_rcu.notify = rr_force_rcu; + rcu_add_force_rcu_notifier(&force_rcu); tcg_register_thread(); qemu_mutex_lock_iothread(); @@ -258,6 +264,7 @@ static void *rr_cpu_thread_fn(void *arg) rr_deal_with_unplugged_cpus(); } + rcu_remove_force_rcu_notifier(&force_rcu); rcu_unregister_thread(); return NULL; } diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index fdea83e4fe7..0e3f29884bd 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -32,7 +32,6 @@ #include "qemu/main-loop.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" -#include "hw/boards.h" #include "tcg-accel-ops.h" #include "tcg-accel-ops-mttcg.h" @@ -129,6 +128,7 @@ static const TypeInfo tcg_accel_ops_type = { .class_init = tcg_accel_ops_class_init, .abstract = true, }; +module_obj(ACCEL_OPS_NAME("tcg")); static void tcg_accel_ops_register_types(void) { diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index e378c2db73f..d6336a9c966 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -32,6 +32,11 @@ #include "qemu/error-report.h" #include "qemu/accel.h" #include "qapi/qapi-builtin-visit.h" +#include "qemu/units.h" +#if !defined(CONFIG_USER_ONLY) +#include "hw/boards.h" +#endif +#include "internal.h" struct TCGState { AccelState parent_obj; @@ -105,22 +110,29 @@ static void tcg_accel_instance_init(Object *obj) bool mttcg_enabled; -static int tcg_init(MachineState *ms) +static int tcg_init_machine(MachineState *ms) { TCGState *s = TCG_STATE(current_accel()); +#ifdef CONFIG_USER_ONLY + unsigned max_cpus = 1; +#else + unsigned max_cpus = ms->smp.max_cpus; +#endif - tcg_exec_init(s->tb_size * 1024 * 1024, s->splitwx_enabled); + tcg_allowed = true; mttcg_enabled = s->mttcg_enabled; + page_init(); + tb_htable_init(); + tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_cpus); + +#if defined(CONFIG_SOFTMMU) /* - * Initialize TCG regions only for softmmu. - * - * This needs to be done later for user mode, because the prologue - * generation needs to be delayed so that GUEST_BASE is already set. + * There's no guest base to take into account, so go ahead and + * initialize the prologue now. */ -#ifndef CONFIG_USER_ONLY - tcg_region_init(); -#endif /* !CONFIG_USER_ONLY */ + tcg_prologue_init(tcg_ctx); +#endif return 0; } @@ -200,7 +212,7 @@ static void tcg_accel_class_init(ObjectClass *oc, void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "tcg"; - ac->init_machine = tcg_init; + ac->init_machine = tcg_init_machine; ac->allowed = &tcg_allowed; object_class_property_add_str(oc, "thread", @@ -226,6 +238,7 @@ static const TypeInfo tcg_accel_type = { .class_init = tcg_accel_class_init, .instance_size = sizeof(TCGState), }; +module_obj(TYPE_TCG_ACCEL); static void register_accel_types(void) { diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c index 521da4a8137..ac7d28c251e 100644 --- a/accel/tcg/tcg-runtime-gvec.c +++ b/accel/tcg/tcg-runtime-gvec.c @@ -1073,9 +1073,8 @@ void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(int32_t)) { int32_t ai = *(int32_t *)(a + i); int32_t bi = *(int32_t *)(b + i); - int32_t di = ai + bi; - if (((di ^ ai) &~ (ai ^ bi)) < 0) { - /* Signed overflow. */ + int32_t di; + if (sadd32_overflow(ai, bi, &di)) { di = (di < 0 ? INT32_MAX : INT32_MIN); } *(int32_t *)(d + i) = di; @@ -1091,9 +1090,8 @@ void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(int64_t)) { int64_t ai = *(int64_t *)(a + i); int64_t bi = *(int64_t *)(b + i); - int64_t di = ai + bi; - if (((di ^ ai) &~ (ai ^ bi)) < 0) { - /* Signed overflow. */ + int64_t di; + if (sadd64_overflow(ai, bi, &di)) { di = (di < 0 ? INT64_MAX : INT64_MIN); } *(int64_t *)(d + i) = di; @@ -1143,9 +1141,8 @@ void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(int32_t)) { int32_t ai = *(int32_t *)(a + i); int32_t bi = *(int32_t *)(b + i); - int32_t di = ai - bi; - if (((di ^ ai) & (ai ^ bi)) < 0) { - /* Signed overflow. */ + int32_t di; + if (ssub32_overflow(ai, bi, &di)) { di = (di < 0 ? INT32_MAX : INT32_MIN); } *(int32_t *)(d + i) = di; @@ -1161,9 +1158,8 @@ void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(int64_t)) { int64_t ai = *(int64_t *)(a + i); int64_t bi = *(int64_t *)(b + i); - int64_t di = ai - bi; - if (((di ^ ai) & (ai ^ bi)) < 0) { - /* Signed overflow. */ + int64_t di; + if (ssub64_overflow(ai, bi, &di)) { di = (di < 0 ? INT64_MAX : INT64_MIN); } *(int64_t *)(d + i) = di; @@ -1209,8 +1205,8 @@ void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(uint32_t)) { uint32_t ai = *(uint32_t *)(a + i); uint32_t bi = *(uint32_t *)(b + i); - uint32_t di = ai + bi; - if (di < ai) { + uint32_t di; + if (uadd32_overflow(ai, bi, &di)) { di = UINT32_MAX; } *(uint32_t *)(d + i) = di; @@ -1226,8 +1222,8 @@ void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(uint64_t)) { uint64_t ai = *(uint64_t *)(a + i); uint64_t bi = *(uint64_t *)(b + i); - uint64_t di = ai + bi; - if (di < ai) { + uint64_t di; + if (uadd64_overflow(ai, bi, &di)) { di = UINT64_MAX; } *(uint64_t *)(d + i) = di; @@ -1273,8 +1269,8 @@ void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(uint32_t)) { uint32_t ai = *(uint32_t *)(a + i); uint32_t bi = *(uint32_t *)(b + i); - uint32_t di = ai - bi; - if (ai < bi) { + uint32_t di; + if (usub32_overflow(ai, bi, &di)) { di = 0; } *(uint32_t *)(d + i) = di; @@ -1290,8 +1286,8 @@ void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) for (i = 0; i < oprsz; i += sizeof(uint64_t)) { uint64_t ai = *(uint64_t *)(a + i); uint64_t bi = *(uint64_t *)(b + i); - uint64_t di = ai - bi; - if (ai < bi) { + uint64_t di; + if (usub64_overflow(ai, bi, &di)) { di = 0; } *(uint64_t *)(d + i) = di; diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 12ba4585619..e4e030043fb 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -27,15 +27,9 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" -#ifdef TARGET_CHERI -#include "cheri-helper-utils.h" -#endif #include "disas/disas.h" #include "exec/log.h" #include "tcg/tcg.h" -#include "exec/tb-lookup.h" -#include "exec/log_instr.h" - /* 32-bit helpers */ @@ -150,28 +144,6 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) return ctpop64(arg); } -const void *HELPER(lookup_tb_ptr)(CPUArchState *env) -{ - CPUState *cpu = env_cpu(env); - TranslationBlock *tb; - target_ulong cs_base, cs_top = 0, pc; - uint32_t cheri_flags = 0; - uint32_t flags; - - cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags); - - tb = tb_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, curr_cflags(cpu)); - if (tb == NULL) { - return tcg_code_gen_epilogue; - } - qemu_log_mask_and_addr(CPU_LOG_EXEC, pc, - "Chain %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx - "/" TARGET_FMT_lx "/%#x/%#x] %s\n", - cpu->cpu_index, tb->tc.ptr, cs_base, pc, cs_top, - cheri_flags, flags, lookup_symbol(pc)); - return tb->tc.ptr; -} - void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(env_cpu(env), GETPC()); diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index d76d98d8e18..a12e1a6d4e7 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -39,8 +39,6 @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr) #endif /* IN_HELPER_PROTO */ -#ifdef CONFIG_SOFTMMU - DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32, i32) DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG, @@ -88,50 +86,6 @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG, TCG_CALL_NO_WG, i32, env, tl, i32, i32) #endif /* CONFIG_ATOMIC64 */ -#else - -DEF_HELPER_FLAGS_4(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32) -DEF_HELPER_FLAGS_4(atomic_cmpxchgw_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32) -DEF_HELPER_FLAGS_4(atomic_cmpxchgw_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32) -DEF_HELPER_FLAGS_4(atomic_cmpxchgl_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32) -DEF_HELPER_FLAGS_4(atomic_cmpxchgl_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32) -#ifdef CONFIG_ATOMIC64 -DEF_HELPER_FLAGS_4(atomic_cmpxchgq_be, TCG_CALL_NO_WG, i64, env, tl, i64, i64) -DEF_HELPER_FLAGS_4(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64) -#endif - -#ifdef CONFIG_ATOMIC64 -#define GEN_ATOMIC_HELPERS(NAME) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_le), \ - TCG_CALL_NO_WG, i64, env, tl, i64) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_be), \ - TCG_CALL_NO_WG, i64, env, tl, i64) -#else -#define GEN_ATOMIC_HELPERS(NAME) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le), \ - TCG_CALL_NO_WG, i32, env, tl, i32) \ - DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be), \ - TCG_CALL_NO_WG, i32, env, tl, i32) -#endif /* CONFIG_ATOMIC64 */ - -#endif /* CONFIG_SOFTMMU */ - GEN_ATOMIC_HELPERS(fetch_add) GEN_ATOMIC_HELPERS(fetch_and) GEN_ATOMIC_HELPERS(fetch_or) diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events index 6eefb37f5d0..59eab96f264 100644 --- a/accel/tcg/trace-events +++ b/accel/tcg/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # TCG related tracing # cpu-exec.c diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 2a254563802..dd3e38b01d5 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -18,11 +18,9 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" #include "qemu-common.h" #define NO_CPU_IO_DEFS -#include "cpu.h" #include "trace.h" #include "disas/disas.h" #include "exec/exec-all.h" @@ -48,10 +46,8 @@ #endif #include "exec/cputlb.h" -#include "exec/tb-hash.h" #include "exec/translate-all.h" #include "qemu/bitmap.h" -#include "qemu/error-report.h" #include "qemu/qemu-print.h" #include "qemu/timer.h" #include "qemu/main-loop.h" @@ -61,6 +57,8 @@ #include "sysemu/tcg.h" #include "qapi/error.h" #include "hw/core/tcg-cpu-ops.h" +#include "tb-hash.h" +#include "tb-context.h" #include "internal.h" /* #define DEBUG_TB_INVALIDATE */ @@ -220,9 +218,6 @@ static int v_l2_levels; static void *l1_map[V_L1_MAX_SIZE]; -/* code generation context */ -TCGContext tcg_init_ctx; -__thread TCGContext *tcg_ctx; TBContext tb_ctx; static void page_table_config_init(void) @@ -245,11 +240,6 @@ static void page_table_config_init(void) assert(v_l2_levels >= 0); } -static void cpu_gen_init(void) -{ - tcg_context_init(&tcg_init_ctx); -} - /* Encode VAL as a signed leb128 sequence at P. Return P incremented past the encoded value. */ static uint8_t *encode_sleb128(uint8_t *p, target_long val) @@ -388,11 +378,6 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, return 0; } -void tb_destroy(TranslationBlock *tb) -{ - qemu_spin_destroy(&tb->jmp_lock); -} - bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) { /* @@ -415,7 +400,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) return false; } -static void page_init(void) +void page_init(void) { page_size_init(); page_table_config_init(); @@ -900,408 +885,6 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, } } -/* Minimum size of the code gen buffer. This number is randomly chosen, - but not so small that we can't have a fair number of TB's live. */ -#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) - -/* Maximum size of the code gen buffer we'd like to use. Unless otherwise - indicated, this is constrained by the range of direct branches on the - host cpu, as used by the TCG implementation of goto_tb. */ -#if defined(__x86_64__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__sparc__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__powerpc64__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__powerpc__) -# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) -#elif defined(__aarch64__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__s390x__) - /* We have a +- 4GB range on the branches; leave some slop. */ -# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB) -#elif defined(__mips__) - /* We have a 256MB branch region, but leave room to make sure the - main executable is also within that region. */ -# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB) -#else -# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) -#endif - -#if TCG_TARGET_REG_BITS == 32 -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) -#ifdef CONFIG_USER_ONLY -/* - * For user mode on smaller 32 bit systems we may run into trouble - * allocating big chunks of data in the right place. On these systems - * we utilise a static code generation buffer directly in the binary. - */ -#define USE_STATIC_CODE_GEN_BUFFER -#endif -#else /* TCG_TARGET_REG_BITS == 64 */ -#ifdef CONFIG_USER_ONLY -/* - * As user-mode emulation typically means running multiple instances - * of the translator don't go too nuts with our default code gen - * buffer lest we make things too hard for the OS. - */ -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB) -#else -/* - * We expect most system emulation to run one or two guests per host. - * Users running large scale system emulation may want to tweak their - * runtime setup via the tb-size control on the command line. - */ -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB) -#endif -#endif - -#define DEFAULT_CODE_GEN_BUFFER_SIZE \ - (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ - ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) - -static size_t size_code_gen_buffer(size_t tb_size) -{ - /* Size the buffer. */ - if (tb_size == 0) { - size_t phys_mem = qemu_get_host_physmem(); - if (phys_mem == 0) { - tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; - } else { - tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8); - } - } - if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { - tb_size = MIN_CODE_GEN_BUFFER_SIZE; - } - if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) { - tb_size = MAX_CODE_GEN_BUFFER_SIZE; - } - return tb_size; -} - -#ifdef __mips__ -/* In order to use J and JAL within the code_gen_buffer, we require - that the buffer not cross a 256MB boundary. */ -static inline bool cross_256mb(void *addr, size_t size) -{ - return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful; -} - -/* We weren't able to allocate a buffer without crossing that boundary, - so make do with the larger portion of the buffer that doesn't cross. - Returns the new base of the buffer, and adjusts code_gen_buffer_size. */ -static inline void *split_cross_256mb(void *buf1, size_t size1) -{ - void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful); - size_t size2 = buf1 + size1 - buf2; - - size1 = buf2 - buf1; - if (size1 < size2) { - size1 = size2; - buf1 = buf2; - } - - tcg_ctx->code_gen_buffer_size = size1; - return buf1; -} -#endif - -#ifdef USE_STATIC_CODE_GEN_BUFFER -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] - __attribute__((aligned(CODE_GEN_ALIGN))); - -static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp) -{ - void *buf, *end; - size_t size; - - if (splitwx > 0) { - error_setg(errp, "jit split-wx not supported"); - return false; - } - - /* page-align the beginning and end of the buffer */ - buf = static_code_gen_buffer; - end = static_code_gen_buffer + sizeof(static_code_gen_buffer); - buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); - end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size); - - size = end - buf; - - /* Honor a command-line option limiting the size of the buffer. */ - if (size > tb_size) { - size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size); - } - tcg_ctx->code_gen_buffer_size = size; - -#ifdef __mips__ - if (cross_256mb(buf, size)) { - buf = split_cross_256mb(buf, size); - size = tcg_ctx->code_gen_buffer_size; - } -#endif - - if (qemu_mprotect_rwx(buf, size)) { - error_setg_errno(errp, errno, "mprotect of jit buffer"); - return false; - } - qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); - - tcg_ctx->code_gen_buffer = buf; - return true; -} -#elif defined(_WIN32) -static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) -{ - void *buf; - - if (splitwx > 0) { - error_setg(errp, "jit split-wx not supported"); - return false; - } - - buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, - PAGE_EXECUTE_READWRITE); - if (buf == NULL) { - error_setg_win32(errp, GetLastError(), - "allocate %zu bytes for jit buffer", size); - return false; - } - - tcg_ctx->code_gen_buffer = buf; - tcg_ctx->code_gen_buffer_size = size; - return true; -} -#else -static bool alloc_code_gen_buffer_anon(size_t size, int prot, - int flags, Error **errp) -{ - void *buf; - - buf = mmap(NULL, size, prot, flags, -1, 0); - if (buf == MAP_FAILED) { - error_setg_errno(errp, errno, - "allocate %zu bytes for jit buffer", size); - return false; - } - tcg_ctx->code_gen_buffer_size = size; - -#ifdef __mips__ - if (cross_256mb(buf, size)) { - /* - * Try again, with the original still mapped, to avoid re-acquiring - * the same 256mb crossing. - */ - size_t size2; - void *buf2 = mmap(NULL, size, prot, flags, -1, 0); - switch ((int)(buf2 != MAP_FAILED)) { - case 1: - if (!cross_256mb(buf2, size)) { - /* Success! Use the new buffer. */ - munmap(buf, size); - break; - } - /* Failure. Work with what we had. */ - munmap(buf2, size); - /* fallthru */ - default: - /* Split the original buffer. Free the smaller half. */ - buf2 = split_cross_256mb(buf, size); - size2 = tcg_ctx->code_gen_buffer_size; - if (buf == buf2) { - munmap(buf + size2, size - size2); - } else { - munmap(buf, size - size2); - } - size = size2; - break; - } - buf = buf2; - } -#endif - - /* Request large pages for the buffer. */ - qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); - - tcg_ctx->code_gen_buffer = buf; - return true; -} - -#ifndef CONFIG_TCG_INTERPRETER -#ifdef CONFIG_POSIX -#include "qemu/memfd.h" - -static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) -{ - void *buf_rw = NULL, *buf_rx = MAP_FAILED; - int fd = -1; - -#ifdef __mips__ - /* Find space for the RX mapping, vs the 256MiB regions. */ - if (!alloc_code_gen_buffer_anon(size, PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS | - MAP_NORESERVE, errp)) { - return false; - } - /* The size of the mapping may have been adjusted. */ - size = tcg_ctx->code_gen_buffer_size; - buf_rx = tcg_ctx->code_gen_buffer; -#endif - - buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp); - if (buf_rw == NULL) { - goto fail; - } - -#ifdef __mips__ - void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC, - MAP_SHARED | MAP_FIXED, fd, 0); - if (tmp != buf_rx) { - goto fail_rx; - } -#else - buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); - if (buf_rx == MAP_FAILED) { - goto fail_rx; - } -#endif - - close(fd); - tcg_ctx->code_gen_buffer = buf_rw; - tcg_ctx->code_gen_buffer_size = size; - tcg_splitwx_diff = buf_rx - buf_rw; - - /* Request large pages for the buffer and the splitwx. */ - qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE); - qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE); - return true; - - fail_rx: - error_setg_errno(errp, errno, "failed to map shared memory for execute"); - fail: - if (buf_rx != MAP_FAILED) { - munmap(buf_rx, size); - } - if (buf_rw) { - munmap(buf_rw, size); - } - if (fd >= 0) { - close(fd); - } - return false; -} -#endif /* CONFIG_POSIX */ - -#ifdef CONFIG_DARWIN -#include - -extern kern_return_t mach_vm_remap(vm_map_t target_task, - mach_vm_address_t *target_address, - mach_vm_size_t size, - mach_vm_offset_t mask, - int flags, - vm_map_t src_task, - mach_vm_address_t src_address, - boolean_t copy, - vm_prot_t *cur_protection, - vm_prot_t *max_protection, - vm_inherit_t inheritance); - -static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) -{ - kern_return_t ret; - mach_vm_address_t buf_rw, buf_rx; - vm_prot_t cur_prot, max_prot; - - /* Map the read-write portion via normal anon memory. */ - if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, errp)) { - return false; - } - - buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer; - buf_rx = 0; - ret = mach_vm_remap(mach_task_self(), - &buf_rx, - size, - 0, - VM_FLAGS_ANYWHERE, - mach_task_self(), - buf_rw, - false, - &cur_prot, - &max_prot, - VM_INHERIT_NONE); - if (ret != KERN_SUCCESS) { - /* TODO: Convert "ret" to a human readable error message. */ - error_setg(errp, "vm_remap for jit splitwx failed"); - munmap((void *)buf_rw, size); - return false; - } - - if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) { - error_setg_errno(errp, errno, "mprotect for jit splitwx"); - munmap((void *)buf_rx, size); - munmap((void *)buf_rw, size); - return false; - } - - tcg_splitwx_diff = buf_rx - buf_rw; - return true; -} -#endif /* CONFIG_DARWIN */ -#endif /* CONFIG_TCG_INTERPRETER */ - -static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp) -{ -#ifndef CONFIG_TCG_INTERPRETER -# ifdef CONFIG_DARWIN - return alloc_code_gen_buffer_splitwx_vmremap(size, errp); -# endif -# ifdef CONFIG_POSIX - return alloc_code_gen_buffer_splitwx_memfd(size, errp); -# endif -#endif - error_setg(errp, "jit split-wx not supported"); - return false; -} - -static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) -{ - ERRP_GUARD(); - int prot, flags; - - if (splitwx) { - if (alloc_code_gen_buffer_splitwx(size, errp)) { - return true; - } - /* - * If splitwx force-on (1), fail; - * if splitwx default-on (-1), fall through to splitwx off. - */ - if (splitwx > 0) { - return false; - } - error_free_or_abort(errp); - } - - prot = PROT_READ | PROT_WRITE | PROT_EXEC; - flags = MAP_PRIVATE | MAP_ANONYMOUS; -#ifdef CONFIG_TCG_INTERPRETER - /* The tcg interpreter does not need execute permission. */ - prot = PROT_READ | PROT_WRITE; -#elif defined(CONFIG_DARWIN) - /* Applicable to both iOS and macOS (Apple Silicon). */ - if (!splitwx) { - flags |= MAP_JIT; - } -#endif - - return alloc_code_gen_buffer_anon(size, prot, flags, errp); -} -#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ - static bool tb_cmp(const void *ap, const void *bp) { const TranslationBlock *a = ap; @@ -1309,7 +892,8 @@ static bool tb_cmp(const void *ap, const void *bp) return a->pc == b->pc && a->cs_base == b->cs_base && - a->cs_top == b->cs_top && + a->pcc_base == b->pcc_base && + a->pcc_top == b->pcc_top && a->cheri_flags == b->cheri_flags && a->flags == b->flags && (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && @@ -1318,36 +902,13 @@ static bool tb_cmp(const void *ap, const void *bp) a->page_addr[1] == b->page_addr[1]; } -static void tb_htable_init(void) +void tb_htable_init(void) { unsigned int mode = QHT_MODE_AUTO_RESIZE; qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); } -/* Must be called before using the QEMU cpus. 'tb_size' is the size - (in bytes) allocated to the translation buffer. Zero means default - size. */ -void tcg_exec_init(unsigned long tb_size, int splitwx) -{ - bool ok; - - tcg_allowed = true; - cpu_gen_init(); - page_init(); - tb_htable_init(); - - ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size), - splitwx, &error_fatal); - assert(ok); - -#if defined(CONFIG_SOFTMMU) - /* There's no guest base to take into account, so go ahead and - initialize the prologue now. */ - tcg_prologue_init(tcg_ctx); -#endif -} - /* call with @p->lock held */ static inline void invalidate_page_bitmap(PageDesc *p) { @@ -1661,8 +1222,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* suppress any remaining jumps to this TB */ tb_jmp_unlink(tb); - qatomic_set(&tcg_ctx->tb_phys_invalidate_count, - tcg_ctx->tb_phys_invalidate_count + 1); + qatomic_set(&tb_ctx.tb_phys_invalidate_count, + tb_ctx.tb_phys_invalidate_count + 1); } static void tb_phys_invalidate__locked(TranslationBlock *tb) @@ -1739,31 +1300,8 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, invalidate_page_bitmap(p); #if defined(CONFIG_USER_ONLY) - if (p->flags & PAGE_WRITE) { - target_ulong addr; - PageDesc *p2; - int prot; - - /* force the host page as non writable (writes will have a - page fault + mprotect overhead) */ - page_addr &= qemu_host_page_mask; - prot = 0; - for (addr = page_addr; addr < page_addr + qemu_host_page_size; - addr += TARGET_PAGE_SIZE) { - - p2 = page_find(addr >> TARGET_PAGE_BITS); - if (!p2) { - continue; - } - prot |= p2->flags; - p2->flags &= ~PAGE_WRITE; - } - mprotect(g2h_untagged(page_addr), qemu_host_page_size, - (prot & PAGE_BITS) & ~PAGE_WRITE); - if (DEBUG_TB_INVALIDATE_GATE) { - printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr); - } - } + /* translator_loop() must have made all TB pages non-writable */ + assert(!(p->flags & PAGE_WRITE)); #else /* if some code is already present, then the pages are already protected. So we handle the case where only the first TB is @@ -1844,8 +1382,9 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, /* Called with mmap_lock held for user mode emulation. */ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, - target_ulong cs_base, target_ulong cs_top, - uint32_t cheri_flags, uint32_t flags, int cflags) + target_ulong cs_base, target_ulong pcc_base, + target_ulong pcc_top, uint32_t cheri_flags, + uint32_t flags, int cflags) { CPUArchState *env = cpu->env_ptr; TranslationBlock *tb, *existing_tb; @@ -1870,14 +1409,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, max_insns = cflags & CF_COUNT_MASK; if (max_insns == 0) { - max_insns = CF_COUNT_MASK; - } - if (max_insns > TCG_MAX_INSNS) { max_insns = TCG_MAX_INSNS; } - if (cpu->singlestep_enabled || singlestep) { - max_insns = 1; - } + QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); buffer_overflow: tb = tcg_tb_alloc(tcg_ctx); @@ -1894,7 +1428,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); tb->pc = pc; tb->cs_base = cs_base; - tb->cs_top = cs_top; + tb->pcc_base = pcc_base; + tb->pcc_top = pcc_top; tb->cheri_flags = cheri_flags; tb->flags = flags; tb->cflags = cflags; @@ -1917,6 +1452,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, tcg_ctx->cpu = env_cpu(env); gen_intermediate_code(cpu, tb, max_insns); + assert(tb->size != 0); tcg_ctx->cpu = NULL; max_insns = tb->icount; @@ -2047,8 +1583,15 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, int i; qemu_log(" data: [size=%d]\n", data_size); for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { - qemu_log("0x%08" PRIxPTR ": .quad 0x%" TCG_PRIlx "\n", - (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); + if (sizeof(tcg_target_ulong) == 8) { + qemu_log("0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", + (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); + } else if (sizeof(tcg_target_ulong) == 4) { + qemu_log("0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", + (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); + } else { + qemu_build_not_reached(); + } } } qemu_log("\n"); @@ -2088,6 +1631,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, return tb; } + /* + * Insert TB into the corresponding region tree before publishing it + * through QHT. Otherwise rewinding happened in the TB might fail to + * lookup itself using host PC. + */ + tcg_tb_insert(tb); + /* check next page if needed */ virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK; phys_page2 = -1; @@ -2105,10 +1655,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); - tb_destroy(tb); + tcg_tb_remove(tb); return existing_tb; } - tcg_tb_insert(tb); return tb; } @@ -2134,7 +1683,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, TranslationBlock *current_tb = NULL; target_ulong current_pc = 0; target_ulong current_cs_base = 0; - target_ulong current_cs_top = 0; + target_ulong current_pcc_base = 0; + target_ulong current_pcc_top = 0; uint32_t current_cheri_flags = 0; uint32_t current_flags = 0; #endif /* TARGET_HAS_PRECISE_SMC */ @@ -2180,9 +1730,9 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, */ current_tb_modified = true; cpu_restore_state_from_tb(cpu, current_tb, retaddr, true); - cpu_get_tb_cpu_state_6(env, ¤t_pc, ¤t_cs_base, - ¤t_cs_top, ¤t_cheri_flags, - ¤t_flags); + cpu_get_tb_cpu_state_ext(env, ¤t_pc, ¤t_cs_base, + ¤t_pcc_base, ¤t_pcc_top, + ¤t_cheri_flags, ¤t_flags); } #endif /* TARGET_HAS_PRECISE_SMC */ tb_phys_invalidate__locked(tb); @@ -2199,7 +1749,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (current_tb_modified) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); mmap_unlock(); cpu_loop_exit_noexc(cpu); } @@ -2325,8 +1875,8 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) int current_tb_modified = 0; target_ulong current_pc = 0; target_ulong current_cs_base = 0; - target_ulong current_cs_top = 0; - target_ulong current_ds_base = 0; + target_ulong current_pcc_base = 0; + target_ulong current_pcc_top = 0; uint32_t current_cheri_flags = 0; uint32_t current_flags = 0; #endif @@ -2360,9 +1910,9 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) current_tb_modified = 1; cpu_restore_state_from_tb(cpu, current_tb, pc, true); - cpu_get_tb_cpu_state_6(env, ¤t_pc, ¤t_cs_base, - ¤t_cs_top, ¤t_cheri_flags, - ¤t_flags); + cpu_get_tb_cpu_state_ext(env, ¤t_pc, ¤t_cs_base, + ¤t_pcc_base, ¤t_pcc_top, + ¤t_cheri_flags, ¤t_flags); } #endif /* TARGET_HAS_PRECISE_SMC */ tb_phys_invalidate(tb, addr); @@ -2371,7 +1921,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); return true; } #endif @@ -2396,12 +1946,13 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) /* The exception probably happened in a helper. The CPU state should have been saved before calling it. Fetch the PC from there. */ CPUArchState *env = cpu->env_ptr; - target_ulong pc, cs_base, cs_top = 0; + target_ulong pc, cs_base, pcc_base = 0, pcc_top = 0; uint32_t cheri_flags = 0; tb_page_addr_t addr; uint32_t flags; - cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags); + cpu_get_tb_cpu_state_ext(env, &pc, &cs_base, &pcc_base, &pcc_top, + &cheri_flags, &flags); addr = get_page_addr_code(env, pc); if (addr != -1) { tb_invalidate_phys_range(addr, addr + 1); @@ -2457,7 +2008,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) cpu_loop_exit_noexc(cpu); } -static void print_qht_statistics(struct qht_stats hst) +static void print_qht_statistics(struct qht_stats hst, GString *buf) { uint32_t hgram_opts; size_t hgram_bins; @@ -2466,9 +2017,11 @@ static void print_qht_statistics(struct qht_stats hst) if (!hst.head_buckets) { return; } - qemu_printf("TB hash buckets %zu/%zu (%0.2f%% head buckets used)\n", - hst.used_head_buckets, hst.head_buckets, - (double)hst.used_head_buckets / hst.head_buckets * 100); + g_string_append_printf(buf, "TB hash buckets %zu/%zu " + "(%0.2f%% head buckets used)\n", + hst.used_head_buckets, hst.head_buckets, + (double)hst.used_head_buckets / + hst.head_buckets * 100); hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; @@ -2476,8 +2029,9 @@ static void print_qht_statistics(struct qht_stats hst) hgram_opts |= QDIST_PR_NODECIMAL; } hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); - qemu_printf("TB hash occupancy %0.2f%% avg chain occ. Histogram: %s\n", - qdist_avg(&hst.occupancy) * 100, hgram); + g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " + "Histogram: %s\n", + qdist_avg(&hst.occupancy) * 100, hgram); g_free(hgram); hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; @@ -2489,8 +2043,9 @@ static void print_qht_statistics(struct qht_stats hst) hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; } hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); - qemu_printf("TB hash avg chain %0.3f buckets. Histogram: %s\n", - qdist_avg(&hst.chain), hgram); + g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " + "Histogram: %s\n", + qdist_avg(&hst.chain), hgram); g_free(hgram); } @@ -2527,7 +2082,7 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) return false; } -void dump_exec_info(void) +void dump_exec_info(GString *buf) { struct tb_tree_stats tst = {}; struct qht_stats hst; @@ -2536,49 +2091,53 @@ void dump_exec_info(void) tcg_tb_foreach(tb_tree_stats_iter, &tst); nb_tbs = tst.nb_tbs; /* XXX: avoid using doubles ? */ - qemu_printf("Translation buffer state:\n"); + g_string_append_printf(buf, "Translation buffer state:\n"); /* * Report total code size including the padding and TB structs; * otherwise users might think "-accel tcg,tb-size" is not honoured. * For avg host size we use the precise numbers from tb_tree_stats though. */ - qemu_printf("gen code size %zu/%zu\n", - tcg_code_size(), tcg_code_capacity()); - qemu_printf("TB count %zu\n", nb_tbs); - qemu_printf("TB avg target size %zu max=%zu bytes\n", - nb_tbs ? tst.target_size / nb_tbs : 0, - tst.max_target_size); - qemu_printf("TB avg host size %zu bytes (expansion ratio: %0.1f)\n", - nb_tbs ? tst.host_size / nb_tbs : 0, - tst.target_size ? (double)tst.host_size / tst.target_size : 0); - qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page, - nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); - qemu_printf("direct jump count %zu (%zu%%) (2 jumps=%zu %zu%%)\n", - tst.direct_jmp_count, - nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, - tst.direct_jmp2_count, - nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); + g_string_append_printf(buf, "gen code size %zu/%zu\n", + tcg_code_size(), tcg_code_capacity()); + g_string_append_printf(buf, "TB count %zu\n", nb_tbs); + g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", + nb_tbs ? tst.target_size / nb_tbs : 0, + tst.max_target_size); + g_string_append_printf(buf, "TB avg host size %zu bytes " + "(expansion ratio: %0.1f)\n", + nb_tbs ? tst.host_size / nb_tbs : 0, + tst.target_size ? + (double)tst.host_size / tst.target_size : 0); + g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", + tst.cross_page, + nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); + g_string_append_printf(buf, "direct jump count %zu (%zu%%) " + "(2 jumps=%zu %zu%%)\n", + tst.direct_jmp_count, + nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, + tst.direct_jmp2_count, + nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); qht_statistics_init(&tb_ctx.htable, &hst); - print_qht_statistics(hst); + print_qht_statistics(hst, buf); qht_statistics_destroy(&hst); - qemu_printf("\nStatistics:\n"); - qemu_printf("TB flush count %u\n", - qatomic_read(&tb_ctx.tb_flush_count)); - qemu_printf("TB invalidate count %zu\n", - tcg_tb_phys_invalidate_count()); + g_string_append_printf(buf, "\nStatistics:\n"); + g_string_append_printf(buf, "TB flush count %u\n", + qatomic_read(&tb_ctx.tb_flush_count)); + g_string_append_printf(buf, "TB invalidate count %u\n", + qatomic_read(&tb_ctx.tb_phys_invalidate_count)); tlb_flush_counts(&flush_full, &flush_part, &flush_elide); - qemu_printf("TLB full flushes %zu\n", flush_full); - qemu_printf("TLB partial flushes %zu\n", flush_part); - qemu_printf("TLB elided flushes %zu\n", flush_elide); - tcg_dump_info(); + g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); + g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); + g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); + tcg_dump_info(buf); } -void dump_opcount_info(void) +void dump_opcount_info(GString *buf) { - tcg_dump_op_count(); + tcg_dump_op_count(buf); } #else /* CONFIG_USER_ONLY */ @@ -2837,6 +2396,38 @@ int page_check_range(target_ulong start, target_ulong len, int flags) return 0; } +void page_protect(tb_page_addr_t page_addr) +{ + target_ulong addr; + PageDesc *p; + int prot; + + p = page_find(page_addr >> TARGET_PAGE_BITS); + if (p && (p->flags & PAGE_WRITE)) { + /* + * Force the host page as non writable (writes will have a page fault + + * mprotect overhead). + */ + page_addr &= qemu_host_page_mask; + prot = 0; + for (addr = page_addr; addr < page_addr + qemu_host_page_size; + addr += TARGET_PAGE_SIZE) { + + p = page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + continue; + } + prot |= p->flags; + p->flags &= ~PAGE_WRITE; + } + mprotect(g2h_untagged(page_addr), qemu_host_page_size, + (prot & PAGE_BITS) & ~PAGE_WRITE); + if (DEBUG_TB_INVALIDATE_GATE) { + printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr); + } + } +} + /* called from signal handler: invalidate the code and unprotect the * page. Return 0 if the fault was not handled, 1 if it was handled, * and 2 if it was handled but the caller must cause the TB to be diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 4fd503b9ff2..60f136de60e 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -9,7 +9,6 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include "cpu.h" #include "tcg/tcg.h" #include "tcg/tcg-op.h" #include "exec/exec-all.h" @@ -35,10 +34,30 @@ void translator_loop_temp_check(DisasContextBase *db) } } +bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) +{ + /* Suppress goto_tb if requested. */ + if (tb_cflags(db->tb) & CF_NO_GOTO_TB) { + return false; + } + + /* Check for the dest on the same page as the start of the TB. */ + return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0; +} + +static inline void translator_page_protect(DisasContextBase *dcbase, + target_ulong pc) +{ +#ifdef CONFIG_USER_ONLY + dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK; + page_protect(pc); +#endif +} + void translator_loop(const TranslatorOps *ops, DisasContextBase *db, CPUState *cpu, TranslationBlock *tb, int max_insns) { - int bp_insn = 0; + uint32_t cflags = tb_cflags(tb); bool plugin_enabled; #ifdef CONFIG_TCG_LOG_INSTR /* @@ -56,10 +75,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, db->is_jmp = DISAS_NEXT; db->num_insns = 0; db->max_insns = max_insns; - db->singlestep_enabled = cpu->singlestep_enabled; + db->singlestep_enabled = cflags & CF_SINGLE_STEP; #ifdef TARGET_CHERI - db->pcc_base = tb->cs_base; - db->pcc_top = tb->cs_top; + db->pcc_base = tb->pcc_base; + db->pcc_top = tb->pcc_top; cheri_debug_assert(db->pcc_base == cap_get_base(cheri_get_recent_pcc(cpu->env_ptr))); cheri_debug_assert(db->pcc_top == @@ -68,6 +87,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, disas_capreg_reset_all(db); // TODO: verify cheri_flags are correct? #endif + translator_page_protect(db, db->pc_next); + ops->init_disas_context(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ #ifdef CONFIG_TCG_LOG_INSTR @@ -110,8 +131,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, #endif tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ - plugin_enabled = plugin_gen_tb_start(cpu, tb, - tb_cflags(db->tb) & CF_MEMI_ONLY); + plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY); while (true) { db->num_insns++; @@ -128,39 +148,17 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, plugin_gen_insn_start(cpu, db); } - /* Pass breakpoint hits to target for further processing */ - if (!db->singlestep_enabled - && unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) { - CPUBreakpoint *bp; - QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { - if (bp->pc == db->pc_next) { - if (ops->breakpoint_check(db, cpu, bp)) { - bp_insn = 1; - break; - } - } - } - /* The breakpoint_check hook may use DISAS_TOO_MANY to indicate - that only one more instruction is to be executed. Otherwise - it should use DISAS_NORETURN when generating an exception, - but may use a DISAS_TARGET_* value for Something Else. */ - if (db->is_jmp > DISAS_TOO_MANY) { - break; - } - } - /* Disassemble one instruction. The translate_insn hook should update db->pc_next and db->is_jmp to indicate what should be done next -- either exiting this loop or locate the start of the next instruction. */ - if (db->num_insns == db->max_insns - && (tb_cflags(db->tb) & CF_LAST_IO)) { + if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) { /* Accept I/O on the last instruction. */ gen_io_start(); ops->translate_insn(db, cpu); } else { /* we should only see CF_MEMI_ONLY for io_recompile */ - tcg_debug_assert(!(tb_cflags(db->tb) & CF_MEMI_ONLY)); + tcg_debug_assert(!(cflags & CF_MEMI_ONLY)); ops->translate_insn(db, cpu); } @@ -209,7 +207,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, /* Emit code to exit the TB, as indicated by db->is_jmp. */ ops->tb_stop(db, cpu); - gen_tb_end(db->tb, db->num_insns - bp_insn); + gen_tb_end(db->tb, db->num_insns); if (plugin_enabled) { plugin_gen_tb_end(cpu); @@ -230,3 +228,32 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, } #endif } + +static inline void translator_maybe_page_protect(DisasContextBase *dcbase, + target_ulong pc, size_t len) +{ +#ifdef CONFIG_USER_ONLY + target_ulong end = pc + len - 1; + + if (end > dcbase->page_protect_end) { + translator_page_protect(dcbase, end); + } +#endif +} + +#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \ + type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \ + abi_ptr pc, bool do_swap) \ + { \ + translator_maybe_page_protect(dcbase, pc, sizeof(type)); \ + type ret = load_fn(env, pc); \ + if (do_swap) { \ + ret = swap_fn(ret); \ + } \ + plugin_insn_append(pc, &ret, sizeof(ret)); \ + return ret; \ + } + +FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) + +#undef GEN_TRANSLATOR_LD diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c index b876f5c1e45..968cd3ca60d 100644 --- a/accel/tcg/user-exec-stub.c +++ b/accel/tcg/user-exec-stub.c @@ -1,7 +1,6 @@ #include "qemu/osdep.h" #include "hw/core/cpu.h" #include "sysemu/replay.h" -#include "sysemu/sysemu.h" bool enable_cpu_pm = false; diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index f96fd31c61b..8569a431cac 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -17,7 +17,6 @@ * License along with this library; if not, see . */ #include "qemu/osdep.h" -#include "cpu.h" #include "hw/core/tcg-cpu-ops.h" #include "disas/disas.h" #include "exec/exec-all.h" @@ -28,48 +27,18 @@ #include "exec/helper-proto.h" #include "qemu/atomic128.h" #include "trace/trace-root.h" -#include "trace/mem.h" - -#undef EAX -#undef ECX -#undef EDX -#undef EBX -#undef ESP -#undef EBP -#undef ESI -#undef EDI -#undef EIP -#ifdef __linux__ -#include -#endif +#include "tcg/tcg-ldst.h" +#include "internal.h" __thread uintptr_t helper_retaddr; //#define DEBUG_SIGNAL -/* exit the current TB from a signal handler. The host registers are - restored in a state compatible with the CPU emulator +/* + * Adjust the pc to pass to cpu_restore_state; return the memop type. */ -static void QEMU_NORETURN cpu_exit_tb_from_sighandler(CPUState *cpu, - sigset_t *old_set) -{ - /* XXX: use siglongjmp ? */ - sigprocmask(SIG_SETMASK, old_set, NULL); - cpu_loop_exit_noexc(cpu); -} - -/* 'pc' is the host PC at which the exception was raised. 'address' is - the effective address of the memory exception. 'is_write' is 1 if a - write caused the exception and otherwise 0'. 'old_set' is the - signal set which should be restored */ -static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, - int is_write, sigset_t *old_set) +MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) { - CPUState *cpu = current_cpu; - CPUClass *cc; - unsigned long address = (unsigned long)info->si_addr; - MMUAccessType access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; - switch (helper_retaddr) { default: /* @@ -78,7 +47,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * pointer into the generated code that will unwind to the * correct guest pc. */ - pc = helper_retaddr; + *pc = helper_retaddr; break; case 0: @@ -98,7 +67,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * Therefore, adjust to compensate for what will be done later * by cpu_restore_state_from_tb. */ - pc += GETPC_ADJ; + *pc += GETPC_ADJ; break; case 1: @@ -114,118 +83,97 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * * Like tb_gen_code, release the memory lock before cpu_loop_exit. */ - pc = 0; - access_type = MMU_INST_FETCH; mmap_unlock(); - break; + *pc = 0; + return MMU_INST_FETCH; } - /* For synchronous signals we expect to be coming from the vCPU - * thread (so current_cpu should be valid) and either from running - * code or during translation which can fault as we cross pages. - * - * If neither is true then something has gone wrong and we should - * abort rather than try and restart the vCPU execution. - */ - if (!cpu || !cpu->running) { - printf("qemu:%s received signal outside vCPU context @ pc=0x%" - PRIxPTR "\n", __func__, pc); - abort(); - } + return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; +} -#if defined(DEBUG_SIGNAL) - printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n", - pc, address, is_write, *(unsigned long *)old_set); -#endif - /* XXX: locking issue */ - /* Note that it is important that we don't call page_unprotect() unless - * this is really a "write to nonwriteable page" fault, because - * page_unprotect() assumes that if it is called for an access to - * a page that's writeable this means we had two threads racing and - * another thread got there first and already made the page writeable; - * so we will retry the access. If we were to call page_unprotect() - * for some other kind of fault that should really be passed to the - * guest, we'd end up in an infinite loop of retrying the faulting - * access. - */ - if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR && - h2g_valid(address)) { - switch (page_unprotect(h2g(address), pc)) { - case 0: - /* Fault not caused by a page marked unwritable to protect - * cached translations, must be the guest binary's problem. - */ - break; - case 1: - /* Fault caused by protection of cached translation; TBs - * invalidated, so resume execution. Retain helper_retaddr - * for a possible second fault. - */ - return 1; - case 2: - /* Fault caused by protection of cached translation, and the - * currently executing TB was modified and must be exited - * immediately. Clear helper_retaddr for next execution. - */ - clear_helper_retaddr(); - cpu_exit_tb_from_sighandler(cpu, old_set); - /* NORETURN */ - - default: - g_assert_not_reached(); - } +/** + * handle_sigsegv_accerr_write: + * @cpu: the cpu context + * @old_set: the sigset_t from the signal ucontext_t + * @host_pc: the host pc, adjusted for the signal + * @guest_addr: the guest address of the fault + * + * Return true if the write fault has been handled, and should be re-tried. + * + * Note that it is important that we don't call page_unprotect() unless + * this is really a "write to nonwriteable page" fault, because + * page_unprotect() assumes that if it is called for an access to + * a page that's writeable this means we had two threads racing and + * another thread got there first and already made the page writeable; + * so we will retry the access. If we were to call page_unprotect() + * for some other kind of fault that should really be passed to the + * guest, we'd end up in an infinite loop of retrying the faulting access. + */ +bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, + uintptr_t host_pc, abi_ptr guest_addr) +{ + switch (page_unprotect(guest_addr, host_pc)) { + case 0: + /* + * Fault not caused by a page marked unwritable to protect + * cached translations, must be the guest binary's problem. + */ + return false; + case 1: + /* + * Fault caused by protection of cached translation; TBs + * invalidated, so resume execution. + */ + return true; + case 2: + /* + * Fault caused by protection of cached translation, and the + * currently executing TB was modified and must be exited immediately. + */ + sigprocmask(SIG_SETMASK, old_set, NULL); + cpu_loop_exit_noexc(cpu); + /* NORETURN */ + default: + g_assert_not_reached(); } - - /* Convert forcefully to guest address space, invalid addresses - are still valid segv ones */ - address = h2g_nocheck(address); - - /* - * There is no way the target can handle this other than raising - * an exception. Undo signal and retaddr state prior to longjmp. - */ - sigprocmask(SIG_SETMASK, old_set, NULL); - clear_helper_retaddr(); - - cc = CPU_GET_CLASS(cpu); - cc->tcg_ops->tlb_fill(cpu, address, 0, access_type, - MMU_USER_IDX, false, pc); - g_assert_not_reached(); } static QEMU_ALWAYS_INLINE int probe_access_internal(CPUArchState *env, target_ulong addr, int fault_size, MMUAccessType access_type, bool nonfault, uintptr_t ra) { - int flags; + int acc_flag; + bool maperr; switch (access_type) { case MMU_DATA_STORE: - flags = PAGE_WRITE; + acc_flag = PAGE_WRITE_ORG; break; case MMU_DATA_LOAD: - flags = PAGE_READ; + acc_flag = PAGE_READ; break; case MMU_INST_FETCH: - flags = PAGE_EXEC; + acc_flag = PAGE_EXEC; break; default: g_assert_not_reached(); } - if (!guest_addr_valid_untagged(addr) || - page_check_range(addr, 1, flags) < 0) { - if (nonfault) { - return TLB_INVALID_MASK; - } else { - CPUState *cpu = env_cpu(env); - CPUClass *cc = CPU_GET_CLASS(cpu); - cc->tcg_ops->tlb_fill(cpu, addr, fault_size, access_type, - MMU_USER_IDX, false, ra); - g_assert_not_reached(); + if (guest_addr_valid_untagged(addr)) { + int page_flags = page_get_flags(addr); + if (page_flags & acc_flag) { + return 0; /* success */ } + maperr = !(page_flags & PAGE_VALID); + } else { + maperr = true; } - return 0; + + if (nonfault) { + return TLB_INVALID_MASK; + } + + cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra); } int probe_access_flags(CPUArchState *env, target_ulong addr, @@ -254,919 +202,244 @@ probe_access_inlined(CPUArchState *env, target_ulong addr, int size, #include "probe-access.inc.c" -#if defined(__i386__) - -#if defined(__NetBSD__) -#include - -#define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP]) -#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#elif defined(__FreeBSD__) || defined(__DragonFly__) -#include - -#define EIP_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_eip)) -#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) -#define ERROR_sig(context) ((context)->uc_mcontext.mc_err) -#define MASK_sig(context) ((context)->uc_sigmask) -#elif defined(__OpenBSD__) -#define EIP_sig(context) ((context)->sc_eip) -#define TRAP_sig(context) ((context)->sc_trapno) -#define ERROR_sig(context) ((context)->sc_err) -#define MASK_sig(context) ((context)->sc_mask) -#else -#define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP]) -#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; -#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) - ucontext_t *uc = puc; -#elif defined(__OpenBSD__) - struct sigcontext *uc = puc; -#else - ucontext_t *uc = puc; -#endif - unsigned long pc; - int trapno; - -#ifndef REG_EIP -/* for glibc 2.1 */ -#define REG_EIP EIP -#define REG_ERR ERR -#define REG_TRAPNO TRAPNO -#endif - pc = EIP_sig(uc); - trapno = TRAP_sig(uc); - return handle_cpu_signal(pc, info, - trapno == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0, - &MASK_sig(uc)); -} - -#elif defined(__x86_64__) - -#ifdef __NetBSD__ -#define PC_sig(context) _UC_MACHINE_PC(context) -#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#elif defined(__OpenBSD__) -#define PC_sig(context) ((context)->sc_rip) -#define TRAP_sig(context) ((context)->sc_trapno) -#define ERROR_sig(context) ((context)->sc_err) -#define MASK_sig(context) ((context)->sc_mask) -#elif defined(__FreeBSD__) || defined(__DragonFly__) -#include - -#define PC_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_rip)) -#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) -#define ERROR_sig(context) ((context)->uc_mcontext.mc_err) -#define MASK_sig(context) ((context)->uc_sigmask) -#else -#define PC_sig(context) ((context)->uc_mcontext.gregs[REG_RIP]) -#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - unsigned long pc; -#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) - ucontext_t *uc = puc; -#elif defined(__OpenBSD__) - struct sigcontext *uc = puc; -#else - ucontext_t *uc = puc; -#endif - - pc = PC_sig(uc); - return handle_cpu_signal(pc, info, - TRAP_sig(uc) == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0, - &MASK_sig(uc)); -} - -#elif defined(_ARCH_PPC) +/* The softmmu versions of these helpers are in cputlb.c. */ -/*********************************************************************** - * signal context platform-specific definitions - * From Wine +/* + * Verify that we have passed the correct MemOp to the correct function. + * + * We could present one function to target code, and dispatch based on + * the MemOp, but so far we have worked hard to avoid an indirect function + * call along the memory path. */ -#ifdef linux -/* All Registers access - only for local access */ -#define REG_sig(reg_name, context) \ - ((context)->uc_mcontext.regs->reg_name) -/* Gpr Registers access */ -#define GPR_sig(reg_num, context) REG_sig(gpr[reg_num], context) -/* Program counter */ -#define IAR_sig(context) REG_sig(nip, context) -/* Machine State Register (Supervisor) */ -#define MSR_sig(context) REG_sig(msr, context) -/* Count register */ -#define CTR_sig(context) REG_sig(ctr, context) -/* User's integer exception register */ -#define XER_sig(context) REG_sig(xer, context) -/* Link register */ -#define LR_sig(context) REG_sig(link, context) -/* Condition register */ -#define CR_sig(context) REG_sig(ccr, context) - -/* Float Registers access */ -#define FLOAT_sig(reg_num, context) \ - (((double *)((char *)((context)->uc_mcontext.regs + 48 * 4)))[reg_num]) -#define FPSCR_sig(context) \ - (*(int *)((char *)((context)->uc_mcontext.regs + (48 + 32 * 2) * 4))) -/* Exception Registers access */ -#define DAR_sig(context) REG_sig(dar, context) -#define DSISR_sig(context) REG_sig(dsisr, context) -#define TRAP_sig(context) REG_sig(trap, context) -#endif /* linux */ - -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -#include -#define IAR_sig(context) ((context)->uc_mcontext.mc_srr0) -#define MSR_sig(context) ((context)->uc_mcontext.mc_srr1) -#define CTR_sig(context) ((context)->uc_mcontext.mc_ctr) -#define XER_sig(context) ((context)->uc_mcontext.mc_xer) -#define LR_sig(context) ((context)->uc_mcontext.mc_lr) -#define CR_sig(context) ((context)->uc_mcontext.mc_cr) -/* Exception Registers access */ -#define DAR_sig(context) ((context)->uc_mcontext.mc_dar) -#define DSISR_sig(context) ((context)->uc_mcontext.mc_dsisr) -#define TRAP_sig(context) ((context)->uc_mcontext.mc_exc) -#endif /* __FreeBSD__|| __FreeBSD_kernel__ */ - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) +static void validate_memop(MemOpIdx oi, MemOp expected) { - siginfo_t *info = pinfo; -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) - ucontext_t *uc = puc; -#else - ucontext_t *uc = puc; +#ifdef CONFIG_DEBUG_TCG + MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP); + assert(have == expected); #endif - unsigned long pc; - int is_write; - - pc = IAR_sig(uc); - is_write = 0; -#if 0 - /* ppc 4xx case */ - if (DSISR_sig(uc) & 0x00800000) { - is_write = 1; - } -#else - if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000)) { - is_write = 1; - } -#endif - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } -#elif defined(__alpha__) - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) +void helper_unaligned_ld(CPUArchState *env, target_ulong addr) { - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - uint32_t *pc = uc->uc_mcontext.sc_pc; - uint32_t insn = *pc; - int is_write = 0; - - /* XXX: need kernel patch to get write flag faster */ - switch (insn >> 26) { - case 0x0d: /* stw */ - case 0x0e: /* stb */ - case 0x0f: /* stq_u */ - case 0x24: /* stf */ - case 0x25: /* stg */ - case 0x26: /* sts */ - case 0x27: /* stt */ - case 0x2c: /* stl */ - case 0x2d: /* stq */ - case 0x2e: /* stl_c */ - case 0x2f: /* stq_c */ - is_write = 1; - } - - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); + cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC()); } -#elif defined(__sparc__) -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) +void helper_unaligned_st(CPUArchState *env, target_ulong addr) { - siginfo_t *info = pinfo; - int is_write; - uint32_t insn; -#if !defined(__arch64__) || defined(CONFIG_SOLARIS) - uint32_t *regs = (uint32_t *)(info + 1); - void *sigmask = (regs + 20); - /* XXX: is there a standard glibc define ? */ - unsigned long pc = regs[1]; -#else -#ifdef __linux__ - struct sigcontext *sc = puc; - unsigned long pc = sc->sigc_regs.tpc; - void *sigmask = (void *)sc->sigc_mask; -#elif defined(__OpenBSD__) - struct sigcontext *uc = puc; - unsigned long pc = uc->sc_pc; - void *sigmask = (void *)(long)uc->sc_mask; -#elif defined(__NetBSD__) - ucontext_t *uc = puc; - unsigned long pc = _UC_MACHINE_PC(uc); - void *sigmask = (void *)&uc->uc_sigmask; -#endif -#endif - - /* XXX: need kernel patch to get write flag faster */ - is_write = 0; - insn = *(uint32_t *)pc; - if ((insn >> 30) == 3) { - switch ((insn >> 19) & 0x3f) { - case 0x05: /* stb */ - case 0x15: /* stba */ - case 0x06: /* sth */ - case 0x16: /* stha */ - case 0x04: /* st */ - case 0x14: /* sta */ - case 0x07: /* std */ - case 0x17: /* stda */ - case 0x0e: /* stx */ - case 0x1e: /* stxa */ - case 0x24: /* stf */ - case 0x34: /* stfa */ - case 0x27: /* stdf */ - case 0x37: /* stdfa */ - case 0x26: /* stqf */ - case 0x36: /* stqfa */ - case 0x25: /* stfsr */ - case 0x3c: /* casa */ - case 0x3e: /* casxa */ - is_write = 1; - break; - } - } - return handle_cpu_signal(pc, info, is_write, sigmask); + cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC()); } -#elif defined(__arm__) - -#if defined(__NetBSD__) -#include -#include -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) +static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t ra, MMUAccessType type) { - siginfo_t *info = pinfo; -#if defined(__NetBSD__) - ucontext_t *uc = puc; - siginfo_t *si = pinfo; -#else - ucontext_t *uc = puc; -#endif - unsigned long pc; - uint32_t fsr; - int is_write; - -#if defined(__NetBSD__) - pc = uc->uc_mcontext.__gregs[_REG_R15]; -#elif defined(__GLIBC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3)) - pc = uc->uc_mcontext.gregs[R15]; -#else - pc = uc->uc_mcontext.arm_pc; -#endif - -#ifdef __NetBSD__ - fsr = si->si_trap; -#else - fsr = uc->uc_mcontext.error_code; -#endif - /* - * In the FSR, bit 11 is WnR, assuming a v6 or - * later processor. On v5 we will always report - * this as a read, which will fail later. - */ - is_write = extract32(fsr, 11, 1); - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} + MemOp mop = get_memop(oi); + int a_bits = get_alignment_bits(mop); + void *ret; -#elif defined(__aarch64__) - -#if defined(__NetBSD__) - -#include -#include - -int cpu_signal_handler(int host_signum, void *pinfo, void *puc) -{ - ucontext_t *uc = puc; - siginfo_t *si = pinfo; - unsigned long pc; - int is_write; - uint32_t esr; - - pc = uc->uc_mcontext.__gregs[_REG_PC]; - esr = si->si_trap; - - /* - * siginfo_t::si_trap is the ESR value, for data aborts ESR.EC - * is 0b10010x: then bit 6 is the WnR bit - */ - is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1; - return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask); -} - -#else - -#ifndef ESR_MAGIC -/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */ -#define ESR_MAGIC 0x45535201 -struct esr_context { - struct _aarch64_ctx head; - uint64_t esr; -}; -#endif - -static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc) -{ - return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved; -} - -static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr) -{ - return (struct _aarch64_ctx *)((char *)hdr + hdr->size); -} - -int cpu_signal_handler(int host_signum, void *pinfo, void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - uintptr_t pc = uc->uc_mcontext.pc; - bool is_write; - struct _aarch64_ctx *hdr; - struct esr_context const *esrctx = NULL; - - /* Find the esr_context, which has the WnR bit in it */ - for (hdr = first_ctx(uc); hdr->magic; hdr = next_ctx(hdr)) { - if (hdr->magic == ESR_MAGIC) { - esrctx = (struct esr_context const *)hdr; - break; - } + /* Enforce guest required alignment. */ + if (unlikely(addr & ((1 << a_bits) - 1))) { + cpu_loop_exit_sigbus(env_cpu(env), addr, type, ra); } - if (esrctx) { - /* For data aborts ESR.EC is 0b10010x: then bit 6 is the WnR bit */ - uint64_t esr = esrctx->esr; - is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1; - } else { - /* - * Fall back to parsing instructions; will only be needed - * for really ancient (pre-3.16) kernels. - */ - uint32_t insn = *(uint32_t *)pc; - - is_write = ((insn & 0xbfff0000) == 0x0c000000 /* C3.3.1 */ - || (insn & 0xbfe00000) == 0x0c800000 /* C3.3.2 */ - || (insn & 0xbfdf0000) == 0x0d000000 /* C3.3.3 */ - || (insn & 0xbfc00000) == 0x0d800000 /* C3.3.4 */ - || (insn & 0x3f400000) == 0x08000000 /* C3.3.6 */ - || (insn & 0x3bc00000) == 0x39000000 /* C3.3.13 */ - || (insn & 0x3fc00000) == 0x3d800000 /* ... 128bit */ - /* Ignore bits 10, 11 & 21, controlling indexing. */ - || (insn & 0x3bc00000) == 0x38000000 /* C3.3.8-12 */ - || (insn & 0x3fe00000) == 0x3c800000 /* ... 128bit */ - /* Ignore bits 23 & 24, controlling indexing. */ - || (insn & 0x3a400000) == 0x28000000); /* C3.3.7,14-16 */ - } - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} -#endif - -#elif defined(__s390__) - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - unsigned long pc; - uint16_t *pinsn; - int is_write = 0; - - pc = uc->uc_mcontext.psw.addr; - - /* ??? On linux, the non-rt signal handler has 4 (!) arguments instead - of the normal 2 arguments. The 3rd argument contains the "int_code" - from the hardware which does in fact contain the is_write value. - The rt signal handler, as far as I can tell, does not give this value - at all. Not that we could get to it from here even if it were. */ - /* ??? This is not even close to complete, since it ignores all - of the read-modify-write instructions. */ - pinsn = (uint16_t *)pc; - switch (pinsn[0] >> 8) { - case 0x50: /* ST */ - case 0x42: /* STC */ - case 0x40: /* STH */ - is_write = 1; - break; - case 0xc4: /* RIL format insns */ - switch (pinsn[0] & 0xf) { - case 0xf: /* STRL */ - case 0xb: /* STGRL */ - case 0x7: /* STHRL */ - is_write = 1; - } - break; - case 0xe3: /* RXY format insns */ - switch (pinsn[2] & 0xff) { - case 0x50: /* STY */ - case 0x24: /* STG */ - case 0x72: /* STCY */ - case 0x70: /* STHY */ - case 0x8e: /* STPQ */ - case 0x3f: /* STRVH */ - case 0x3e: /* STRV */ - case 0x2f: /* STRVG */ - is_write = 1; - } - break; - } - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#elif defined(__mips__) - -#if defined(__misp16) || defined(__mips_micromips) -#error "Unsupported encoding" -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - uintptr_t pc = uc->uc_mcontext.pc; - uint32_t insn = *(uint32_t *)pc; - int is_write = 0; - - /* Detect all store instructions at program counter. */ - switch((insn >> 26) & 077) { - case 050: /* SB */ - case 051: /* SH */ - case 052: /* SWL */ - case 053: /* SW */ - case 054: /* SDL */ - case 055: /* SDR */ - case 056: /* SWR */ - case 070: /* SC */ - case 071: /* SWC1 */ - case 074: /* SCD */ - case 075: /* SDC1 */ - case 077: /* SD */ -#if !defined(__mips_isa_rev) || __mips_isa_rev < 6 - case 072: /* SWC2 */ - case 076: /* SDC2 */ -#endif - is_write = 1; - break; - case 023: /* COP1X */ - /* Required in all versions of MIPS64 since - MIPS64r1 and subsequent versions of MIPS32r2. */ - switch (insn & 077) { - case 010: /* SWXC1 */ - case 011: /* SDXC1 */ - case 015: /* SUXC1 */ - is_write = 1; - } - break; - } - - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#elif defined(__riscv) - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - greg_t pc = uc->uc_mcontext.__gregs[REG_PC]; - uint32_t insn = *(uint32_t *)pc; - int is_write = 0; - - /* Detect store by reading the instruction at the program - counter. Note: we currently only generate 32-bit - instructions so we thus only detect 32-bit stores */ - switch (((insn >> 0) & 0b11)) { - case 3: - switch (((insn >> 2) & 0b11111)) { - case 8: - switch (((insn >> 12) & 0b111)) { - case 0: /* sb */ - case 1: /* sh */ - case 2: /* sw */ - case 3: /* sd */ - case 4: /* sq */ - is_write = 1; - break; - default: - break; - } - break; - case 9: - switch (((insn >> 12) & 0b111)) { - case 2: /* fsw */ - case 3: /* fsd */ - case 4: /* fsq */ - is_write = 1; - break; - default: - break; - } - break; - default: - break; - } - } - - /* Check for compressed instructions */ - switch (((insn >> 13) & 0b111)) { - case 7: - switch (insn & 0b11) { - case 0: /*c.sd */ - case 2: /* c.sdsp */ - is_write = 1; - break; - default: - break; - } - break; - case 6: - switch (insn & 0b11) { - case 0: /* c.sw */ - case 3: /* c.swsp */ - is_write = 1; - break; - default: - break; - } - break; - default: - break; - } - - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#else - -#error host CPU specific signal handler needed - -#endif - -/* The softmmu versions of these helpers are in cputlb.c. */ - -uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - uint16_t meminfo = trace_mem_get_info(MO_UB, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldub_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr) -{ - int ret; - uint16_t meminfo = trace_mem_get_info(MO_SB, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldsb_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = lduw_be_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr) -{ - int ret; - uint16_t meminfo = trace_mem_get_info(MO_BESW, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldsw_be_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldl_be_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr) -{ - uint64_t ret; - uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldq_be_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = lduw_le_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + ret = g2h(env_cpu(env), addr); + set_helper_retaddr(ra); return ret; } -int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr) +uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - int ret; - uint16_t meminfo = trace_mem_get_info(MO_LESW, MMU_USER_IDX, false); + void *haddr; + uint8_t ret; - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldsw_le_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldl_le_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr) -{ - uint64_t ret; - uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, false); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - ret = ldq_le_p(g2h(env_cpu(env), ptr)); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); - return ret; -} - -uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) -{ - uint32_t ret; - - set_helper_retaddr(retaddr); - ret = cpu_ldub_data(env, ptr); + validate_memop(oi, MO_UB); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + ret = ldub_p(haddr); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } -int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - int ret; + void *haddr; + uint16_t ret; - set_helper_retaddr(retaddr); - ret = cpu_ldsb_data(env, ptr); + validate_memop(oi, MO_BEUW); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + ret = lduw_be_p(haddr); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } -uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { + void *haddr; uint32_t ret; - set_helper_retaddr(retaddr); - ret = cpu_lduw_be_data(env, ptr); + validate_memop(oi, MO_BEUL); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + ret = ldl_be_p(haddr); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } -int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) -{ - int ret; - - set_helper_retaddr(retaddr); - ret = cpu_ldsw_be_data(env, ptr); - clear_helper_retaddr(); - return ret; -} - -uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) -{ - uint32_t ret; - - set_helper_retaddr(retaddr); - ret = cpu_ldl_be_data(env, ptr); - clear_helper_retaddr(); - return ret; -} - -uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { + void *haddr; uint64_t ret; - set_helper_retaddr(retaddr); - ret = cpu_ldq_be_data(env, ptr); - clear_helper_retaddr(); - return ret; -} - -uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) -{ - uint32_t ret; - - set_helper_retaddr(retaddr); - ret = cpu_lduw_le_data(env, ptr); + validate_memop(oi, MO_BEQ); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + ret = ldq_be_p(haddr); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } -int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { - int ret; + void *haddr; + uint16_t ret; - set_helper_retaddr(retaddr); - ret = cpu_ldsw_le_data(env, ptr); + validate_memop(oi, MO_LEUW); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + ret = lduw_le_p(haddr); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } -uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { + void *haddr; uint32_t ret; - set_helper_retaddr(retaddr); - ret = cpu_ldl_le_data(env, ptr); + validate_memop(oi, MO_LEUL); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + ret = ldl_le_p(haddr); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } -uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) +uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) { + void *haddr; uint64_t ret; - set_helper_retaddr(retaddr); - ret = cpu_ldq_le_data(env, ptr); + validate_memop(oi, MO_LEQ); + trace_guest_ld_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + ret = ldq_le_p(haddr); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); return ret; } -void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val) +void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, + MemOpIdx oi, uintptr_t ra) { - uint16_t meminfo = trace_mem_get_info(MO_UB, MMU_USER_IDX, true); + void *haddr; - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stb_p(g2h(env_cpu(env), ptr), val); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); -} - -void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val) -{ - uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, true); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stw_be_p(g2h(env_cpu(env), ptr), val); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); -} - -void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val) -{ - uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, true); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stl_be_p(g2h(env_cpu(env), ptr), val); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); -} - -void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val) -{ - uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, true); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stq_be_p(g2h(env_cpu(env), ptr), val); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + validate_memop(oi, MO_UB); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + stb_p(haddr, val); + clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } -void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val) +void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, + MemOpIdx oi, uintptr_t ra) { - uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, true); + void *haddr; - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stw_le_p(g2h(env_cpu(env), ptr), val); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + validate_memop(oi, MO_BEUW); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + stw_be_p(haddr, val); + clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } -void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val) +void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, + MemOpIdx oi, uintptr_t ra) { - uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, true); + void *haddr; - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stl_le_p(g2h(env_cpu(env), ptr), val); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); + validate_memop(oi, MO_BEUL); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + stl_be_p(haddr, val); + clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } -void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val) +void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, + MemOpIdx oi, uintptr_t ra) { - uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, true); - - trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo); - stq_le_p(g2h(env_cpu(env), ptr), val); - qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo); -} + void *haddr; -void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr) -{ - set_helper_retaddr(retaddr); - cpu_stb_data(env, ptr, val); + validate_memop(oi, MO_BEQ); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + stq_be_p(haddr, val); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } -void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, + MemOpIdx oi, uintptr_t ra) { - set_helper_retaddr(retaddr); - cpu_stw_be_data(env, ptr, val); - clear_helper_retaddr(); -} + void *haddr; -void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr) -{ - set_helper_retaddr(retaddr); - cpu_stl_be_data(env, ptr, val); + validate_memop(oi, MO_LEUW); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + stw_le_p(haddr, val); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } -void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr, - uint64_t val, uintptr_t retaddr) +void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, + MemOpIdx oi, uintptr_t ra) { - set_helper_retaddr(retaddr); - cpu_stq_be_data(env, ptr, val); - clear_helper_retaddr(); -} + void *haddr; -void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr) -{ - set_helper_retaddr(retaddr); - cpu_stw_le_data(env, ptr, val); + validate_memop(oi, MO_LEUL); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + stl_le_p(haddr, val); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } -void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, + MemOpIdx oi, uintptr_t ra) { - set_helper_retaddr(retaddr); - cpu_stl_le_data(env, ptr, val); - clear_helper_retaddr(); -} + void *haddr; -void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr, - uint64_t val, uintptr_t retaddr) -{ - set_helper_retaddr(retaddr); - cpu_stq_le_data(env, ptr, val); + validate_memop(oi, MO_LEQ); + trace_guest_st_before_exec(env_cpu(env), addr, oi); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + stq_le_p(haddr, val); clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr) @@ -1209,29 +482,48 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr) return ret; } -/* Do not allow unaligned operations to proceed. Return the host address. */ +#include "ldst_common.c.inc" + +/* + * Do not allow unaligned operations to proceed. Return the host address. + * + * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE. + */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - int size, uintptr_t retaddr) + MemOpIdx oi, int size, int prot, + uintptr_t retaddr) { + MemOp mop = get_memop(oi); + int a_bits = get_alignment_bits(mop); + void *ret; + + /* Enforce guest required alignment. */ + if (unlikely(addr & ((1 << a_bits) - 1))) { + MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE; + cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr); + } + /* Enforce qemu required alignment. */ if (unlikely(addr & (size - 1))) { cpu_loop_exit_atomic(env_cpu(env), retaddr); } - void *ret = g2h(env_cpu(env), addr); + + ret = g2h(env_cpu(env), addr); set_helper_retaddr(retaddr); return ret; } -/* Macro to call the above, with local variables from the use context. */ -#define ATOMIC_MMU_DECLS do {} while (0) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) -#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) -#define ATOMIC_MMU_IDX MMU_USER_IDX +#include "atomic_common.c.inc" -#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) -#define EXTRA_ARGS +/* + * First set of functions passes in OI and RETADDR. + * This makes them callable from other helpers. + */ -#include "atomic_common.c.inc" +#define ATOMIC_NAME(X) \ + glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu) +#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) +#define ATOMIC_MMU_IDX MMU_USER_IDX #define DATA_SIZE 1 #include "atomic_template.h" @@ -1247,20 +539,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #include "atomic_template.h" #endif -/* The following is only callable from other helpers, and matches up - with the softmmu version. */ - #if HAVE_ATOMIC128 || HAVE_CMPXCHG128 - -#undef EXTRA_ARGS -#undef ATOMIC_NAME -#undef ATOMIC_MMU_LOOKUP - -#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr -#define ATOMIC_NAME(X) \ - HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr) - #define DATA_SIZE 16 #include "atomic_template.h" #endif diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c index fcc2f62864f..2b9789e6477 100644 --- a/audio/alsaaudio.c +++ b/audio/alsaaudio.c @@ -34,6 +34,8 @@ #define AUDIO_CAP "alsa" #include "audio_int.h" +#define DEBUG_ALSA 0 + struct pollhlp { snd_pcm_t *handle; struct pollfd *pfds; @@ -587,16 +589,12 @@ static int alsa_open(bool in, struct alsa_params_req *req, *handlep = handle; - if (obtfmt != req->fmt || - obt->nchannels != req->nchannels || - obt->freq != req->freq) { + if (DEBUG_ALSA || obtfmt != req->fmt || + obt->nchannels != req->nchannels || obt->freq != req->freq) { dolog ("Audio parameters for %s\n", typ); alsa_dump_info(req, obt, obtfmt, apdo); } -#ifdef DEBUG - alsa_dump_info(req, obt, obtfmt, apdo); -#endif return 0; err: diff --git a/audio/audio.c b/audio/audio.c index 534278edfed..54a153c0ef0 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -32,6 +32,7 @@ #include "qapi/qapi-visit-audio.h" #include "qemu/cutils.h" #include "qemu/module.h" +#include "qemu-common.h" #include "sysemu/replay.h" #include "sysemu/runstate.h" #include "ui/qemu-spice.h" @@ -704,7 +705,7 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size) if (live == hwsamples) { #ifdef DEBUG_OUT - dolog ("%s is full %d\n", sw->name, live); + dolog ("%s is full %zu\n", sw->name, live); #endif return 0; } @@ -994,7 +995,7 @@ static size_t audio_get_avail (SWVoiceIn *sw) } ldebug ( - "%s: get_avail live %d ret %" PRId64 "\n", + "%s: get_avail live %zu ret %" PRId64 "\n", SW_NAME (sw), live, (((int64_t) live << 32) / sw->ratio) * sw->info.bytes_per_frame ); @@ -1021,7 +1022,7 @@ static size_t audio_get_free(SWVoiceOut *sw) dead = sw->hw->mix_buf->size - live; #ifdef DEBUG_OUT - dolog ("%s: get_free live %d dead %d ret %" PRId64 "\n", + dolog ("%s: get_free live %zu dead %zu ret %" PRId64 "\n", SW_NAME (sw), live, dead, (((int64_t) dead << 32) / sw->ratio) * sw->info.bytes_per_frame); @@ -1621,10 +1622,20 @@ void audio_cleanup(void) } } +static bool vmstate_audio_needed(void *opaque) +{ + /* + * Never needed, this vmstate only exists in case + * an old qemu sends it to us. + */ + return false; +} + static const VMStateDescription vmstate_audio = { .name = "audio", .version_id = 1, .minimum_version_id = 1, + .needed = vmstate_audio_needed, .fields = (VMStateField[]) { VMSTATE_END_OF_LIST() } @@ -2172,6 +2183,14 @@ const char *audio_get_id(QEMUSoundCard *card) } } +const char *audio_application_name(void) +{ + const char *vm_name; + + vm_name = qemu_get_vm_name(); + return vm_name ? vm_name : "qemu"; +} + void audio_rate_start(RateCtl *rate) { memset(rate, 0, sizeof(RateCtl)); diff --git a/audio/audio_int.h b/audio/audio_int.h index 06f0913835b..6d685e24a38 100644 --- a/audio/audio_int.h +++ b/audio/audio_int.h @@ -243,6 +243,8 @@ void *audio_calloc (const char *funcname, int nmemb, size_t size); void audio_run(AudioState *s, const char *msg); +const char *audio_application_name(void); + typedef struct RateCtl { int64_t start_ticks; int64_t bytes_sent; diff --git a/audio/coreaudio.c b/audio/coreaudio.c index 578ec9b8b2e..d8a21d3e507 100644 --- a/audio/coreaudio.c +++ b/audio/coreaudio.c @@ -26,6 +26,7 @@ #include #include /* pthread_X */ +#include "qemu/main-loop.h" #include "qemu/module.h" #include "audio.h" @@ -34,12 +35,11 @@ typedef struct coreaudioVoiceOut { HWVoiceOut hw; - pthread_mutex_t mutex; + pthread_mutex_t buf_mutex; AudioDeviceID outputDeviceID; int frameSizeSetting; uint32_t bufferCount; UInt32 audioDevicePropertyBufferFrameSize; - AudioStreamBasicDescription outputStreamBasicDescription; AudioDeviceIOProcID ioprocid; bool enabled; } coreaudioVoiceOut; @@ -114,24 +114,6 @@ static OSStatus coreaudio_set_framesize(AudioDeviceID id, UInt32 *framesize) framesize); } -static OSStatus coreaudio_get_streamformat(AudioDeviceID id, - AudioStreamBasicDescription *d) -{ - UInt32 size = sizeof(*d); - AudioObjectPropertyAddress addr = { - kAudioDevicePropertyStreamFormat, - kAudioDevicePropertyScopeOutput, - kAudioObjectPropertyElementMaster - }; - - return AudioObjectGetPropertyData(id, - &addr, - 0, - NULL, - &size, - d); -} - static OSStatus coreaudio_set_streamformat(AudioDeviceID id, AudioStreamBasicDescription *d) { @@ -260,11 +242,11 @@ static void GCC_FMT_ATTR (3, 4) coreaudio_logerr2 ( #define coreaudio_playback_logerr(status, ...) \ coreaudio_logerr2(status, "playback", __VA_ARGS__) -static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name) +static int coreaudio_buf_lock (coreaudioVoiceOut *core, const char *fn_name) { int err; - err = pthread_mutex_lock (&core->mutex); + err = pthread_mutex_lock (&core->buf_mutex); if (err) { dolog ("Could not lock voice for %s\nReason: %s\n", fn_name, strerror (err)); @@ -273,11 +255,11 @@ static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name) return 0; } -static int coreaudio_unlock (coreaudioVoiceOut *core, const char *fn_name) +static int coreaudio_buf_unlock (coreaudioVoiceOut *core, const char *fn_name) { int err; - err = pthread_mutex_unlock (&core->mutex); + err = pthread_mutex_unlock (&core->buf_mutex); if (err) { dolog ("Could not unlock voice for %s\nReason: %s\n", fn_name, strerror (err)); @@ -292,13 +274,13 @@ static int coreaudio_unlock (coreaudioVoiceOut *core, const char *fn_name) coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw; \ ret_type ret; \ \ - if (coreaudio_lock(core, "coreaudio_" #name)) { \ + if (coreaudio_buf_lock(core, "coreaudio_" #name)) { \ return 0; \ } \ \ ret = glue(audio_generic_, name)args; \ \ - coreaudio_unlock(core, "coreaudio_" #name); \ + coreaudio_buf_unlock(core, "coreaudio_" #name); \ return ret; \ } COREAUDIO_WRAPPER_FUNC(get_buffer_out, void *, (HWVoiceOut *hw, size_t *size), @@ -310,7 +292,10 @@ COREAUDIO_WRAPPER_FUNC(write, size_t, (HWVoiceOut *hw, void *buf, size_t size), (hw, buf, size)) #undef COREAUDIO_WRAPPER_FUNC -/* callback to feed audiooutput buffer */ +/* + * callback to feed audiooutput buffer. called without iothread lock. + * allowed to lock "buf_mutex", but disallowed to have any other locks. + */ static OSStatus audioDeviceIOProc( AudioDeviceID inDevice, const AudioTimeStamp *inNow, @@ -326,13 +311,13 @@ static OSStatus audioDeviceIOProc( coreaudioVoiceOut *core = (coreaudioVoiceOut *) hwptr; size_t len; - if (coreaudio_lock (core, "audioDeviceIOProc")) { + if (coreaudio_buf_lock (core, "audioDeviceIOProc")) { inInputTime = 0; return 0; } if (inDevice != core->outputDeviceID) { - coreaudio_unlock (core, "audioDeviceIOProc(old device)"); + coreaudio_buf_unlock (core, "audioDeviceIOProc(old device)"); return 0; } @@ -342,7 +327,7 @@ static OSStatus audioDeviceIOProc( /* if there are not enough samples, set signal and return */ if (pending_frames < frameCount) { inInputTime = 0; - coreaudio_unlock (core, "audioDeviceIOProc(empty)"); + coreaudio_buf_unlock (core, "audioDeviceIOProc(empty)"); return 0; } @@ -364,7 +349,7 @@ static OSStatus audioDeviceIOProc( out += write_len; } - coreaudio_unlock (core, "audioDeviceIOProc"); + coreaudio_buf_unlock (core, "audioDeviceIOProc"); return 0; } @@ -373,6 +358,17 @@ static OSStatus init_out_device(coreaudioVoiceOut *core) OSStatus status; AudioValueRange frameRange; + AudioStreamBasicDescription streamBasicDescription = { + .mBitsPerChannel = core->hw.info.bits, + .mBytesPerFrame = core->hw.info.bytes_per_frame, + .mBytesPerPacket = core->hw.info.bytes_per_frame, + .mChannelsPerFrame = core->hw.info.nchannels, + .mFormatFlags = kLinearPCMFormatFlagIsFloat, + .mFormatID = kAudioFormatLinearPCM, + .mFramesPerPacket = 1, + .mSampleRate = core->hw.info.freq + }; + status = coreaudio_get_voice(&core->outputDeviceID); if (status != kAudioHardwareNoError) { coreaudio_playback_logerr (status, @@ -432,34 +428,30 @@ static OSStatus init_out_device(coreaudioVoiceOut *core) } core->hw.samples = core->bufferCount * core->audioDevicePropertyBufferFrameSize; - /* get StreamFormat */ - status = coreaudio_get_streamformat(core->outputDeviceID, - &core->outputStreamBasicDescription); - if (status == kAudioHardwareBadObjectError) { - return 0; - } - if (status != kAudioHardwareNoError) { - coreaudio_playback_logerr (status, - "Could not get Device Stream properties\n"); - core->outputDeviceID = kAudioDeviceUnknown; - return status; - } - /* set Samplerate */ status = coreaudio_set_streamformat(core->outputDeviceID, - &core->outputStreamBasicDescription); + &streamBasicDescription); if (status == kAudioHardwareBadObjectError) { return 0; } if (status != kAudioHardwareNoError) { coreaudio_playback_logerr (status, "Could not set samplerate %lf\n", - core->outputStreamBasicDescription.mSampleRate); + streamBasicDescription.mSampleRate); core->outputDeviceID = kAudioDeviceUnknown; return status; } - /* set Callback */ + /* + * set Callback. + * + * On macOS 11.3.1, Core Audio calls AudioDeviceIOProc after calling an + * internal function named HALB_Mutex::Lock(), which locks a mutex in + * HALB_IOThread::Entry(void*). HALB_Mutex::Lock() is also called in + * AudioObjectGetPropertyData, which is called by coreaudio driver. + * Therefore, the specified callback must be designed to avoid a deadlock + * with the callers of AudioObjectGetPropertyData. + */ core->ioprocid = NULL; status = AudioDeviceCreateIOProcID(core->outputDeviceID, audioDeviceIOProc, @@ -542,6 +534,7 @@ static void update_device_playback_state(coreaudioVoiceOut *core) } } +/* called without iothread lock. */ static OSStatus handle_voice_change( AudioObjectID in_object_id, UInt32 in_number_addresses, @@ -551,9 +544,7 @@ static OSStatus handle_voice_change( OSStatus status; coreaudioVoiceOut *core = in_client_data; - if (coreaudio_lock(core, __func__)) { - abort(); - } + qemu_mutex_lock_iothread(); if (core->outputDeviceID) { fini_out_device(core); @@ -564,7 +555,7 @@ static OSStatus handle_voice_change( update_device_playback_state(core); } - coreaudio_unlock (core, __func__); + qemu_mutex_unlock_iothread(); return status; } @@ -579,14 +570,10 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as, struct audsettings obt_as; /* create mutex */ - err = pthread_mutex_init(&core->mutex, NULL); + err = pthread_mutex_init(&core->buf_mutex, NULL); if (err) { dolog("Could not create mutex\nReason: %s\n", strerror (err)); - goto mutex_error; - } - - if (coreaudio_lock(core, __func__)) { - goto lock_error; + return -1; } obt_as = *as; @@ -598,7 +585,6 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as, qapi_AudiodevCoreaudioPerDirectionOptions_base(cpdo), as, 11610); core->bufferCount = cpdo->has_buffer_count ? cpdo->buffer_count : 4; - core->outputStreamBasicDescription.mSampleRate = (Float64) as->freq; status = AudioObjectAddPropertyListener(kAudioObjectSystemObject, &voice_addr, handle_voice_change, @@ -606,37 +592,21 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as, if (status != kAudioHardwareNoError) { coreaudio_playback_logerr (status, "Could not listen to voice property change\n"); - goto listener_error; + return -1; } if (init_out_device(core)) { - goto device_error; + status = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, + &voice_addr, + handle_voice_change, + core); + if (status != kAudioHardwareNoError) { + coreaudio_playback_logerr(status, + "Could not remove voice property change listener\n"); + } } - coreaudio_unlock(core, __func__); return 0; - -device_error: - status = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, - &voice_addr, - handle_voice_change, - core); - if (status != kAudioHardwareNoError) { - coreaudio_playback_logerr(status, - "Could not remove voice property change listener\n"); - } - -listener_error: - coreaudio_unlock(core, __func__); - -lock_error: - err = pthread_mutex_destroy(&core->mutex); - if (err) { - dolog("Could not destroy mutex\nReason: %s\n", strerror (err)); - } - -mutex_error: - return -1; } static void coreaudio_fini_out (HWVoiceOut *hw) @@ -645,10 +615,6 @@ static void coreaudio_fini_out (HWVoiceOut *hw) int err; coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw; - if (coreaudio_lock(core, __func__)) { - abort(); - } - status = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, &voice_addr, handle_voice_change, @@ -659,10 +625,8 @@ static void coreaudio_fini_out (HWVoiceOut *hw) fini_out_device(core); - coreaudio_unlock(core, __func__); - /* destroy mutex */ - err = pthread_mutex_destroy(&core->mutex); + err = pthread_mutex_destroy(&core->buf_mutex); if (err) { dolog("Could not destroy mutex\nReason: %s\n", strerror (err)); } @@ -672,14 +636,8 @@ static void coreaudio_enable_out(HWVoiceOut *hw, bool enable) { coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw; - if (coreaudio_lock(core, __func__)) { - abort(); - } - core->enabled = enable; update_device_playback_state(core); - - coreaudio_unlock(core, __func__); } static void *coreaudio_audio_init(Audiodev *dev) diff --git a/audio/jackaudio.c b/audio/jackaudio.c index 3031c4e29bd..e7de6d5433e 100644 --- a/audio/jackaudio.c +++ b/audio/jackaudio.c @@ -26,7 +26,6 @@ #include "qemu/module.h" #include "qemu/atomic.h" #include "qemu/main-loop.h" -#include "qemu-common.h" #include "audio.h" #define AUDIO_CAP "jack" @@ -412,7 +411,7 @@ static int qjack_client_init(QJackClient *c) snprintf(client_name, sizeof(client_name), "%s-%s", c->out ? "out" : "in", - c->opt->client_name ? c->opt->client_name : qemu_get_vm_name()); + c->opt->client_name ? c->opt->client_name : audio_application_name()); if (c->opt->exact_name) { options |= JackUseExactName; diff --git a/audio/meson.build b/audio/meson.build index 7d53b0f920f..462533bb8c2 100644 --- a/audio/meson.build +++ b/audio/meson.build @@ -7,23 +7,22 @@ softmmu_ss.add(files( 'wavcapture.c', )) -softmmu_ss.add(when: [coreaudio, 'CONFIG_AUDIO_COREAUDIO'], if_true: files('coreaudio.c')) -softmmu_ss.add(when: [dsound, 'CONFIG_AUDIO_DSOUND'], if_true: files('dsoundaudio.c')) -softmmu_ss.add(when: ['CONFIG_AUDIO_WIN_INT'], if_true: files('audio_win_int.c')) +softmmu_ss.add(when: coreaudio, if_true: files('coreaudio.c')) +softmmu_ss.add(when: dsound, if_true: files('dsoundaudio.c', 'audio_win_int.c')) audio_modules = {} foreach m : [ - ['CONFIG_AUDIO_ALSA', 'alsa', alsa, 'alsaaudio.c'], - ['CONFIG_AUDIO_OSS', 'oss', oss, 'ossaudio.c'], - ['CONFIG_AUDIO_PA', 'pa', pulse, 'paaudio.c'], - ['CONFIG_AUDIO_SDL', 'sdl', sdl, 'sdlaudio.c'], - ['CONFIG_AUDIO_JACK', 'jack', jack, 'jackaudio.c'], - ['CONFIG_SPICE', 'spice', spice, 'spiceaudio.c'] + ['alsa', alsa, files('alsaaudio.c')], + ['oss', oss, files('ossaudio.c')], + ['pa', pulse, files('paaudio.c')], + ['sdl', sdl, files('sdlaudio.c')], + ['jack', jack, files('jackaudio.c')], + ['spice', spice, files('spiceaudio.c')] ] - if config_host.has_key(m[0]) + if m[1].found() module_ss = ss.source_set() - module_ss.add(when: m[2], if_true: files(m[3])) - audio_modules += {m[1] : module_ss} + module_ss.add(m[1], m[2]) + audio_modules += {m[0] : module_ss} endif endforeach diff --git a/audio/paaudio.c b/audio/paaudio.c index c97b22e970d..75401d53910 100644 --- a/audio/paaudio.c +++ b/audio/paaudio.c @@ -2,7 +2,6 @@ #include "qemu/osdep.h" #include "qemu/module.h" -#include "qemu-common.h" #include "audio.h" #include "qapi/opts-visitor.h" @@ -463,10 +462,7 @@ static pa_stream *qpa_simple_new ( pa_stream_set_state_callback(stream, stream_state_cb, c); - flags = - PA_STREAM_INTERPOLATE_TIMING - | PA_STREAM_AUTO_TIMING_UPDATE - | PA_STREAM_EARLY_REQUESTS; + flags = PA_STREAM_EARLY_REQUESTS; if (dev) { /* don't move the stream if the user specified a sink/source */ @@ -756,7 +752,6 @@ static int qpa_validate_per_direction_opts(Audiodev *dev, /* common */ static void *qpa_conn_init(const char *server) { - const char *vm_name; PAConnection *c = g_malloc0(sizeof(PAConnection)); QTAILQ_INSERT_TAIL(&pa_conns, c, list); @@ -765,9 +760,8 @@ static void *qpa_conn_init(const char *server) goto fail; } - vm_name = qemu_get_vm_name(); c->context = pa_context_new(pa_threaded_mainloop_get_api(c->mainloop), - vm_name ? vm_name : "qemu"); + audio_application_name()); if (!c->context) { goto fail; } diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c index 999bfbde47c..a8d370fe6f3 100644 --- a/audio/spiceaudio.c +++ b/audio/spiceaudio.c @@ -317,3 +317,5 @@ static void register_audio_spice(void) audio_driver_register(&spice_audio_driver); } type_init(register_audio_spice); + +module_dep("ui-spice-core"); diff --git a/audio/trace-events b/audio/trace-events index 6aec5357638..957c92337be 100644 --- a/audio/trace-events +++ b/audio/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # alsaaudio.c alsa_revents(int revents) "revents = %d" diff --git a/authz/meson.build b/authz/meson.build index 88fa7769cb1..42a1ec0ff62 100644 --- a/authz/meson.build +++ b/authz/meson.build @@ -6,4 +6,4 @@ authz_ss.add(files( 'simple.c', )) -authz_ss.add(when: ['CONFIG_AUTH_PAM', pam], if_true: files('pamacct.c')) +authz_ss.add(when: pam, if_true: files('pamacct.c')) diff --git a/authz/trace-events b/authz/trace-events index e62ebb36b7e..9c255dafb64 100644 --- a/authz/trace-events +++ b/authz/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # base.c qauthz_is_allowed(void *authz, const char *identity, bool allowed) "AuthZ %p check identity=%s allowed=%d" diff --git a/backends/cryptodev-vhost.c b/backends/cryptodev-vhost.c index 8231e7f1bca..bc13e466b4f 100644 --- a/backends/cryptodev-vhost.c +++ b/backends/cryptodev-vhost.c @@ -52,6 +52,7 @@ cryptodev_vhost_init( { int r; CryptoDevBackendVhost *crypto; + Error *local_err = NULL; crypto = g_new(CryptoDevBackendVhost, 1); crypto->dev.max_queues = 1; @@ -66,8 +67,10 @@ cryptodev_vhost_init( /* vhost-user needs vq_index to initiate a specific queue pair */ crypto->dev.vq_index = crypto->cc->queue_index * crypto->dev.nvqs; - r = vhost_dev_init(&crypto->dev, options->opaque, options->backend_type, 0); + r = vhost_dev_init(&crypto->dev, options->opaque, options->backend_type, 0, + &local_err); if (r < 0) { + error_report_err(local_err); goto fail; } diff --git a/backends/hostmem-epc.c b/backends/hostmem-epc.c new file mode 100644 index 00000000000..b47f98b6a3a --- /dev/null +++ b/backends/hostmem-epc.c @@ -0,0 +1,82 @@ +/* + * QEMU host SGX EPC memory backend + * + * Copyright (C) 2019 Intel Corporation + * + * Authors: + * Sean Christopherson + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qom/object_interfaces.h" +#include "qapi/error.h" +#include "sysemu/hostmem.h" +#include "hw/i386/hostmem-epc.h" + +static void +sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) +{ + uint32_t ram_flags; + char *name; + int fd; + + if (!backend->size) { + error_setg(errp, "can't create backend with size 0"); + return; + } + + fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, + "failed to open /dev/sgx_vepc to alloc SGX EPC"); + return; + } + + name = object_get_canonical_path(OBJECT(backend)); + ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED; + memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), + name, backend->size, ram_flags, + fd, 0, errp); + g_free(name); +} + +static void sgx_epc_backend_instance_init(Object *obj) +{ + HostMemoryBackend *m = MEMORY_BACKEND(obj); + + m->share = true; + m->merge = false; + m->dump = false; +} + +static void sgx_epc_backend_class_init(ObjectClass *oc, void *data) +{ + HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc); + + bc->alloc = sgx_epc_backend_memory_alloc; +} + +static const TypeInfo sgx_epc_backed_info = { + .name = TYPE_MEMORY_BACKEND_EPC, + .parent = TYPE_MEMORY_BACKEND, + .instance_init = sgx_epc_backend_instance_init, + .class_init = sgx_epc_backend_class_init, + .instance_size = sizeof(HostMemoryBackendEpc), +}; + +static void register_types(void) +{ + int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd >= 0) { + close(fd); + + type_register_static(&sgx_epc_backed_info); + } +} + +type_init(register_types); diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index b683da9daf8..cd038024fae 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -15,7 +15,6 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "sysemu/hostmem.h" -#include "sysemu/sysemu.h" #include "qom/object_interfaces.h" #include "qom/object.h" @@ -40,6 +39,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) object_get_typename(OBJECT(backend))); #else HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend); + uint32_t ram_flags; gchar *name; if (!backend->size) { @@ -52,11 +52,11 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) } name = host_memory_backend_get_name(backend); - memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), - name, - backend->size, fb->align, - (backend->share ? RAM_SHARED : 0) | - (fb->is_pmem ? RAM_PMEM : 0), + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= fb->is_pmem ? RAM_PMEM : 0; + memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, + backend->size, fb->align, ram_flags, fb->mem_path, fb->readonly, errp); g_free(name); #endif diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 69b0ae30bb0..3fc85c3db81 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "sysemu/hostmem.h" -#include "sysemu/sysemu.h" #include "qom/object_interfaces.h" #include "qemu/memfd.h" #include "qemu/module.h" @@ -36,6 +35,7 @@ static void memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend); + uint32_t ram_flags; char *name; int fd; @@ -53,9 +53,10 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) } name = host_memory_backend_get_name(backend); - memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), - name, backend->size, - backend->share, fd, 0, errp); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, + backend->size, ram_flags, fd, 0, errp); g_free(name); } diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c index 5cc53e76c9d..b8e55cdbd0f 100644 --- a/backends/hostmem-ram.c +++ b/backends/hostmem-ram.c @@ -19,6 +19,7 @@ static void ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { + uint32_t ram_flags; char *name; if (!backend->size) { @@ -27,8 +28,10 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) } name = host_memory_backend_get_name(backend); - memory_region_init_ram_shared_nomigrate(&backend->mr, OBJECT(backend), name, - backend->size, backend->share, errp); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name, + backend->size, ram_flags, errp); g_free(name); } diff --git a/backends/hostmem.c b/backends/hostmem.c index 984560ed343..e031c6208b8 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "sysemu/hostmem.h" -#include "sysemu/sysemu.h" #include "hw/boards.h" #include "qapi/error.h" #include "qapi/qapi-builtin-visit.h" @@ -217,6 +216,11 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, Error *local_err = NULL; HostMemoryBackend *backend = MEMORY_BACKEND(obj); + if (!backend->reserve && value) { + error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible"); + return; + } + if (!host_memory_backend_mr_inited(backend)) { backend->prealloc = value; return; @@ -268,6 +272,7 @@ static void host_memory_backend_init(Object *obj) /* TODO: convert access to globals to compat properties */ backend->merge = machine_mem_merge(machine); backend->dump = machine_dump_guest_core(machine); + backend->reserve = true; backend->prealloc_threads = 1; } @@ -435,6 +440,30 @@ static void host_memory_backend_set_share(Object *o, bool value, Error **errp) backend->share = value; } +#ifdef CONFIG_LINUX +static bool host_memory_backend_get_reserve(Object *o, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + + return backend->reserve; +} + +static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + + if (host_memory_backend_mr_inited(backend)) { + error_setg(errp, "cannot change property value"); + return; + } + if (backend->prealloc && !value) { + error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible"); + return; + } + backend->reserve = value; +} +#endif /* CONFIG_LINUX */ + static bool host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) { @@ -518,6 +547,12 @@ host_memory_backend_class_init(ObjectClass *oc, void *data) host_memory_backend_get_share, host_memory_backend_set_share); object_class_property_set_description(oc, "share", "Mark the memory as private to QEMU or shared"); +#ifdef CONFIG_LINUX + object_class_property_add_bool(oc, "reserve", + host_memory_backend_get_reserve, host_memory_backend_set_reserve); + object_class_property_set_description(oc, "reserve", + "Reserve swap space (or huge pages) if applicable"); +#endif /* CONFIG_LINUX */ /* * Do not delete/rename option. This option must be considered stable * (as if it didn't have the 'x-' prefix including deprecation period) as diff --git a/backends/meson.build b/backends/meson.build index d4221831fc3..6e689455280 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -16,5 +16,6 @@ softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vho softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c')) softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio]) +softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) subdir('tpm') diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c index a012adc1934..87d061e9bbd 100644 --- a/backends/tpm/tpm_emulator.c +++ b/backends/tpm/tpm_emulator.c @@ -30,6 +30,7 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "qemu/sockets.h" +#include "qemu/lockable.h" #include "io/channel-socket.h" #include "sysemu/tpm_backend.h" #include "sysemu/tpm_util.h" @@ -124,31 +125,26 @@ static int tpm_emulator_ctrlcmd(TPMEmulator *tpm, unsigned long cmd, void *msg, uint32_t cmd_no = cpu_to_be32(cmd); ssize_t n = sizeof(uint32_t) + msg_len_in; uint8_t *buf = NULL; - int ret = -1; - qemu_mutex_lock(&tpm->mutex); + WITH_QEMU_LOCK_GUARD(&tpm->mutex) { + buf = g_alloca(n); + memcpy(buf, &cmd_no, sizeof(cmd_no)); + memcpy(buf + sizeof(cmd_no), msg, msg_len_in); - buf = g_alloca(n); - memcpy(buf, &cmd_no, sizeof(cmd_no)); - memcpy(buf + sizeof(cmd_no), msg, msg_len_in); - - n = qemu_chr_fe_write_all(dev, buf, n); - if (n <= 0) { - goto end; - } - - if (msg_len_out != 0) { - n = qemu_chr_fe_read_all(dev, msg, msg_len_out); + n = qemu_chr_fe_write_all(dev, buf, n); if (n <= 0) { - goto end; + return -1; } - } - ret = 0; + if (msg_len_out != 0) { + n = qemu_chr_fe_read_all(dev, msg, msg_len_out); + if (n <= 0) { + return -1; + } + } + } -end: - qemu_mutex_unlock(&tpm->mutex); - return ret; + return 0; } static int tpm_emulator_unix_tx_bufs(TPMEmulator *tpm_emu, @@ -496,8 +492,7 @@ static int tpm_emulator_block_migration(TPMEmulator *tpm_emu) error_setg(&tpm_emu->migration_blocker, "Migration disabled: TPM emulator does not support " "migration"); - migrate_add_blocker(tpm_emu->migration_blocker, &err); - if (err) { + if (migrate_add_blocker(tpm_emu->migration_blocker, &err) < 0) { error_report_err(err); error_free(tpm_emu->migration_blocker); tpm_emu->migration_blocker = NULL; @@ -628,7 +623,7 @@ static TpmTypeOptions *tpm_emulator_get_tpm_options(TPMBackend *tb) TPMEmulator *tpm_emu = TPM_EMULATOR(tb); TpmTypeOptions *options = g_new0(TpmTypeOptions, 1); - options->type = TPM_TYPE_OPTIONS_KIND_EMULATOR; + options->type = TPM_TYPE_EMULATOR; options->u.emulator.data = QAPI_CLONE(TPMEmulatorOptions, tpm_emu->options); return options; diff --git a/backends/tpm/tpm_passthrough.c b/backends/tpm/tpm_passthrough.c index 21b74591838..d5558fae6cc 100644 --- a/backends/tpm/tpm_passthrough.c +++ b/backends/tpm/tpm_passthrough.c @@ -321,7 +321,7 @@ static TpmTypeOptions *tpm_passthrough_get_tpm_options(TPMBackend *tb) { TpmTypeOptions *options = g_new0(TpmTypeOptions, 1); - options->type = TPM_TYPE_OPTIONS_KIND_PASSTHROUGH; + options->type = TPM_TYPE_PASSTHROUGH; options->u.passthrough.data = QAPI_CLONE(TPMPassthroughOptions, TPM_PASSTHROUGH(tb)->options); diff --git a/backends/tpm/trace-events b/backends/tpm/trace-events index 0a2591fb2d6..3298766dd79 100644 --- a/backends/tpm/trace-events +++ b/backends/tpm/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # tpm_passthrough.c tpm_passthrough_handle_request(void *cmd) "processing command %p" diff --git a/backends/trace-events b/backends/trace-events index 59058f76303..652eb76a572 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # dbus-vmstate.c dbus_vmstate_pre_save(void) diff --git a/backends/vhost-user.c b/backends/vhost-user.c index b366610e16e..10b39992d21 100644 --- a/backends/vhost-user.c +++ b/backends/vhost-user.c @@ -48,9 +48,9 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, b->dev.nvqs = nvqs; b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs); - ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0); + ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0, + errp); if (ret < 0) { - error_setg_errno(errp, -ret, "vhost initialization failed"); return -1; } diff --git a/block.c b/block.c index c5b887cec19..0ac5b163d2a 100644 --- a/block.c +++ b/block.c @@ -2,6 +2,7 @@ * QEMU System Emulator block driver * * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2020 Virtuozzo International GmbH. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -41,7 +42,6 @@ #include "qapi/qobject-output-visitor.h" #include "qapi/qapi-visit-block-core.h" #include "sysemu/block-backend.h" -#include "sysemu/sysemu.h" #include "qemu/notify.h" #include "qemu/option.h" #include "qemu/coroutine.h" @@ -49,12 +49,14 @@ #include "qemu/timer.h" #include "qemu/cutils.h" #include "qemu/id.h" +#include "qemu/range.h" +#include "qemu/rcu.h" #include "block/coroutines.h" #ifdef CONFIG_BSD #include #include -#ifndef __DragonFly__ +#if defined(HAVE_SYS_DISK_H) #include #endif #endif @@ -82,6 +84,25 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, BdrvChildRole child_role, Error **errp); +static bool bdrv_recurse_has_child(BlockDriverState *bs, + BlockDriverState *child); + +static void bdrv_child_free(BdrvChild *child); +static void bdrv_replace_child_noperm(BdrvChild **child, + BlockDriverState *new_bs, + bool free_empty_child); +static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, + BdrvChild *child, + Transaction *tran); +static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, + Transaction *tran); + +static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, + Transaction *change_child_tran, Error **errp); +static void bdrv_reopen_commit(BDRVReopenState *reopen_state); +static void bdrv_reopen_abort(BDRVReopenState *reopen_state); + /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -246,7 +267,7 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, * image is inactivated. */ bool bdrv_is_read_only(BlockDriverState *bs) { - return bs->read_only; + return !(bs->open_flags & BDRV_O_RDWR); } int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, @@ -298,7 +319,6 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, goto fail; } - bs->read_only = true; bs->open_flags &= ~BDRV_O_RDWR; return 0; @@ -381,7 +401,6 @@ BlockDriverState *bdrv_new(void) for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { QLIST_INIT(&bs->op_blockers[i]); } - notifier_with_return_list_init(&bs->before_write_notifiers); qemu_co_mutex_init(&bs->reqs_lock); qemu_mutex_init(&bs->dirty_bitmap_mutex); bs->refcnt = 1; @@ -389,6 +408,9 @@ BlockDriverState *bdrv_new(void) qemu_co_queue_init(&bs->flush_queue); + qemu_co_mutex_init(&bs->bsc_modify_lock); + bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1); + for (i = 0; i < bdrv_drain_all_count; i++) { bdrv_drained_begin(bs); } @@ -1140,7 +1162,7 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) static char *bdrv_child_get_parent_desc(BdrvChild *c) { BlockDriverState *parent = c->opaque; - return g_strdup(bdrv_get_device_or_node_name(parent)); + return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); } static void bdrv_child_cb_drained_begin(BdrvChild *child) @@ -1367,6 +1389,8 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + QLIST_INSERT_HEAD(&bs->children, child, next); + if (child->role & BDRV_CHILD_COW) { bdrv_backing_attach(child); } @@ -1383,6 +1407,8 @@ static void bdrv_child_cb_detach(BdrvChild *child) } bdrv_unapply_subtree_drain(child, bs); + + QLIST_REMOVE(child, next); } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -1394,6 +1420,13 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, return 0; } +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) +{ + BlockDriverState *bs = c->opaque; + + return bdrv_get_aio_context(bs); +} + const BdrvChildClass child_of_bds = { .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, @@ -1407,8 +1440,14 @@ const BdrvChildClass child_of_bds = { .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, .set_aio_ctx = bdrv_child_cb_set_aio_ctx, .update_filename = bdrv_child_cb_update_filename, + .get_parent_aio_context = child_of_bds_get_parent_aio_context, }; +AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) +{ + return c->klass->get_parent_aio_context(c); +} + static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags; @@ -1518,7 +1557,6 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, } bs->drv = drv; - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); bs->opaque = g_malloc0(drv->instance_size); if (drv->bdrv_file_open) { @@ -1547,7 +1585,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, return ret; } - bdrv_refresh_limits(bs, &local_err); + bdrv_refresh_limits(bs, NULL, &local_err); if (local_err) { error_propagate(errp, local_err); return -EINVAL; @@ -1575,16 +1613,26 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, return ret; } -BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, - int flags, Error **errp) +/* + * Create and open a block node. + * + * @options is a QDict of options to pass to the block drivers, or NULL for an + * empty set of options. The reference to the QDict belongs to the block layer + * after the call (even on failure), so if the caller intends to reuse the + * dictionary, it needs to use qobject_ref() before calling bdrv_open. + */ +BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, + const char *node_name, + QDict *options, int flags, + Error **errp) { BlockDriverState *bs; int ret; bs = bdrv_new(); bs->open_flags = flags; - bs->explicit_options = qdict_new(); - bs->options = qdict_new(); + bs->options = options ?: qdict_new(); + bs->explicit_options = qdict_clone_shallow(bs->options); bs->opaque = NULL; update_options_from_flags(bs->options, flags); @@ -1602,6 +1650,13 @@ BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, return bs; } +/* Create and open a block node. */ +BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, + int flags, Error **errp) +{ + return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp); +} + QemuOptsList bdrv_runtime_opts = { .name = "bdrv_common", .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), @@ -1689,6 +1744,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, QemuOpts *opts; BlockDriver *drv; Error *local_err = NULL; + bool ro; assert(bs->file == NULL); assert(options != NULL && bs->options != options); @@ -1739,17 +1795,17 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, drv->format_name); - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); + ro = bdrv_is_read_only(bs); - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { - if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { + if (!ro && bdrv_is_whitelisted(drv, true)) { ret = bdrv_apply_auto_read_only(bs, NULL, NULL); } else { ret = -ENOTSUP; } if (ret < 0) { error_setg(errp, - !bs->read_only && bdrv_is_whitelisted(drv, true) + !ro && bdrv_is_whitelisted(drv, true) ? "Driver '%s' can only be used for read-only devices" : "Driver '%s' is not whitelisted", drv->format_name); @@ -1761,7 +1817,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, assert(qatomic_read(&bs->copy_on_read) == 0); if (bs->open_flags & BDRV_O_COPY_ON_READ) { - if (!bs->read_only) { + if (!ro) { bdrv_enable_copy_on_read(bs); } else { error_setg(errp, "Can't use copy-on-read on read-only device"); @@ -1955,12 +2011,6 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } -static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, - uint64_t perm, uint64_t shared, - GSList *ignore_children, Error **errp); -static void bdrv_child_abort_perm_update(BdrvChild *c); -static void bdrv_child_set_perm(BdrvChild *c); - typedef struct BlockReopenQueueEntry { bool prepared; bool perms_checked; @@ -2008,6 +2058,68 @@ bool bdrv_is_writable(BlockDriverState *bs) return bdrv_is_writable_after_reopen(bs, NULL); } +static char *bdrv_child_user_desc(BdrvChild *c) +{ + return c->klass->get_parent_desc(c); +} + +/* + * Check that @a allows everything that @b needs. @a and @b must reference same + * child node. + */ +static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) +{ + const char *child_bs_name; + g_autofree char *a_user = NULL; + g_autofree char *b_user = NULL; + g_autofree char *perms = NULL; + + assert(a->bs); + assert(a->bs == b->bs); + + if ((b->perm & a->shared_perm) == b->perm) { + return true; + } + + child_bs_name = bdrv_get_node_name(b->bs); + a_user = bdrv_child_user_desc(a); + b_user = bdrv_child_user_desc(b); + perms = bdrv_perm_names(b->perm & ~a->shared_perm); + + error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " + "both required by %s (uses node '%s' as '%s' child) and " + "unshared by %s (uses node '%s' as '%s' child).", + child_bs_name, perms, + b_user, child_bs_name, b->name, + a_user, child_bs_name, a->name); + + return false; +} + +static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) +{ + BdrvChild *a, *b; + + /* + * During the loop we'll look at each pair twice. That's correct because + * bdrv_a_allow_b() is asymmetric and we should check each pair in both + * directions. + */ + QLIST_FOREACH(a, &bs->parents, next_parent) { + QLIST_FOREACH(b, &bs->parents, next_parent) { + if (a == b) { + continue; + } + + if (!bdrv_a_allow_b(a, b, errp)) { + return true; + } + } + } + + return false; +} + static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, BdrvChild *c, BdrvChildRole role, BlockReopenQueue *reopen_queue, @@ -2025,22 +2137,243 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, } /* - * Check whether permissions on this node can be changed in a way that - * @cumulative_perms and @cumulative_shared_perms are the new cumulative - * permissions of all its parents. This involves checking whether all necessary - * permission changes to child nodes can be performed. + * Adds the whole subtree of @bs (including @bs itself) to the @list (except for + * nodes that are already in the @list, of course) so that final list is + * topologically sorted. Return the result (GSList @list object is updated, so + * don't use old reference after function call). + * + * On function start @list must be already topologically sorted and for any node + * in the @list the whole subtree of the node must be in the @list as well. The + * simplest way to satisfy this criteria: use only result of + * bdrv_topological_dfs() or NULL as @list parameter. + */ +static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, + BlockDriverState *bs) +{ + BdrvChild *child; + g_autoptr(GHashTable) local_found = NULL; + + if (!found) { + assert(!list); + found = local_found = g_hash_table_new(NULL, NULL); + } + + if (g_hash_table_contains(found, bs)) { + return list; + } + g_hash_table_add(found, bs); + + QLIST_FOREACH(child, &bs->children, next) { + list = bdrv_topological_dfs(list, found, child->bs); + } + + return g_slist_prepend(list, bs); +} + +typedef struct BdrvChildSetPermState { + BdrvChild *child; + uint64_t old_perm; + uint64_t old_shared_perm; +} BdrvChildSetPermState; + +static void bdrv_child_set_perm_abort(void *opaque) +{ + BdrvChildSetPermState *s = opaque; + + s->child->perm = s->old_perm; + s->child->shared_perm = s->old_shared_perm; +} + +static TransactionActionDrv bdrv_child_set_pem_drv = { + .abort = bdrv_child_set_perm_abort, + .clean = g_free, +}; + +static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, + uint64_t shared, Transaction *tran) +{ + BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); + + *s = (BdrvChildSetPermState) { + .child = c, + .old_perm = c->perm, + .old_shared_perm = c->shared_perm, + }; + + c->perm = perm; + c->shared_perm = shared; + + tran_add(tran, &bdrv_child_set_pem_drv, s); +} + +static void bdrv_drv_set_perm_commit(void *opaque) +{ + BlockDriverState *bs = opaque; + uint64_t cumulative_perms, cumulative_shared_perms; + + if (bs->drv->bdrv_set_perm) { + bdrv_get_cumulative_perm(bs, &cumulative_perms, + &cumulative_shared_perms); + bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); + } +} + +static void bdrv_drv_set_perm_abort(void *opaque) +{ + BlockDriverState *bs = opaque; + + if (bs->drv->bdrv_abort_perm_update) { + bs->drv->bdrv_abort_perm_update(bs); + } +} + +TransactionActionDrv bdrv_drv_set_perm_drv = { + .abort = bdrv_drv_set_perm_abort, + .commit = bdrv_drv_set_perm_commit, +}; + +static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, Transaction *tran, + Error **errp) +{ + if (!bs->drv) { + return 0; + } + + if (bs->drv->bdrv_check_perm) { + int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp); + if (ret < 0) { + return ret; + } + } + + if (tran) { + tran_add(tran, &bdrv_drv_set_perm_drv, bs); + } + + return 0; +} + +typedef struct BdrvReplaceChildState { + BdrvChild *child; + BdrvChild **childp; + BlockDriverState *old_bs; + bool free_empty_child; +} BdrvReplaceChildState; + +static void bdrv_replace_child_commit(void *opaque) +{ + BdrvReplaceChildState *s = opaque; + + if (s->free_empty_child && !s->child->bs) { + bdrv_child_free(s->child); + } + bdrv_unref(s->old_bs); +} + +static void bdrv_replace_child_abort(void *opaque) +{ + BdrvReplaceChildState *s = opaque; + BlockDriverState *new_bs = s->child->bs; + + /* + * old_bs reference is transparently moved from @s to s->child. + * + * Pass &s->child here instead of s->childp, because: + * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not + * modify the BdrvChild * pointer we indirectly pass to it, i.e. it + * will not modify s->child. From that perspective, it does not matter + * whether we pass s->childp or &s->child. + * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use + * it here. + * (3) If new_bs is NULL, *s->childp will have been NULLed by + * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we + * must not pass a NULL *s->childp here. + * + * So whether new_bs was NULL or not, we cannot pass s->childp here; and in + * any case, there is no reason to pass it anyway. + */ + bdrv_replace_child_noperm(&s->child, s->old_bs, true); + /* + * The child was pre-existing, so s->old_bs must be non-NULL, and + * s->child thus must not have been freed + */ + assert(s->child != NULL); + if (!new_bs) { + /* As described above, *s->childp was cleared, so restore it */ + assert(s->childp != NULL); + *s->childp = s->child; + } + bdrv_unref(new_bs); +} + +static TransactionActionDrv bdrv_replace_child_drv = { + .commit = bdrv_replace_child_commit, + .abort = bdrv_replace_child_abort, + .clean = g_free, +}; + +/* + * bdrv_replace_child_tran + * + * Note: real unref of old_bs is done only on commit. + * + * The function doesn't update permissions, caller is responsible for this. + * + * (*childp)->bs must not be NULL. * - * A call to this function must always be followed by a call to bdrv_set_perm() - * or bdrv_abort_perm_update(). + * Note that if new_bs == NULL, @childp is stored in a state object attached + * to @tran, so that the old child can be reinstated in the abort handler. + * Therefore, if @new_bs can be NULL, @childp must stay valid until the + * transaction is committed or aborted. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed (on commit). @free_empty_child should only be false if the + * caller will free the BDrvChild themselves (which may be important + * if this is in turn called in another transactional context). + */ +static void bdrv_replace_child_tran(BdrvChild **childp, + BlockDriverState *new_bs, + Transaction *tran, + bool free_empty_child) +{ + BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); + *s = (BdrvReplaceChildState) { + .child = *childp, + .childp = new_bs == NULL ? childp : NULL, + .old_bs = (*childp)->bs, + .free_empty_child = free_empty_child, + }; + tran_add(tran, &bdrv_replace_child_drv, s); + + /* The abort handler relies on this */ + assert(s->old_bs != NULL); + + if (new_bs) { + bdrv_ref(new_bs); + } + /* + * Pass free_empty_child=false, we will free the child (if + * necessary) in bdrv_replace_child_commit() (if our + * @free_empty_child parameter was true). + */ + bdrv_replace_child_noperm(childp, new_bs, false); + /* old_bs reference is transparently moved from *childp to @s */ +} + +/* + * Refresh permissions in @bs subtree. The function is intended to be called + * after some graph modification that was done without permission update. */ -static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, - uint64_t cumulative_perms, - uint64_t cumulative_shared_perms, - GSList *ignore_children, Error **errp) +static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, + Transaction *tran, Error **errp) { BlockDriver *drv = bs->drv; BdrvChild *c; int ret; + uint64_t cumulative_perms, cumulative_shared_perms; + + bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); /* Write permissions never work with read-only images */ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && @@ -2049,15 +2382,8 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, if (!bdrv_is_writable_after_reopen(bs, NULL)) { error_setg(errp, "Block node is read-only"); } else { - uint64_t current_perms, current_shared; - bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); - if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { - error_setg(errp, "Cannot make block node read-only, there is " - "a writer on it"); - } else { - error_setg(errp, "Cannot make block node read-only and create " - "a writer on it"); - } + error_setg(errp, "Read-only block node '%s' cannot support " + "read-write users", bdrv_get_node_name(bs)); } return -EPERM; @@ -2084,12 +2410,10 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, return 0; } - if (drv->bdrv_check_perm) { - ret = drv->bdrv_check_perm(bs, cumulative_perms, - cumulative_shared_perms, errp); - if (ret < 0) { - return ret; - } + ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran, + errp); + if (ret < 0) { + return ret; } /* Drivers that never have children can omit .bdrv_child_perm() */ @@ -2105,68 +2429,32 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, bdrv_child_perm(bs, c->bs, c, c->role, q, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); - ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children, - errp); - if (ret < 0) { - return ret; - } + bdrv_child_set_perm(c, cur_perm, cur_shared, tran); } return 0; } -/* - * Notifies drivers that after a previous bdrv_check_perm() call, the - * permission update is not performed and any preparations made for it (e.g. - * taken file locks) need to be undone. - * - * This function recursively notifies all child nodes. - */ -static void bdrv_abort_perm_update(BlockDriverState *bs) -{ - BlockDriver *drv = bs->drv; - BdrvChild *c; - - if (!drv) { - return; - } - - if (drv->bdrv_abort_perm_update) { - drv->bdrv_abort_perm_update(bs); - } - - QLIST_FOREACH(c, &bs->children, next) { - bdrv_child_abort_perm_update(c); - } -} - -static void bdrv_set_perm(BlockDriverState *bs) +static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, + Transaction *tran, Error **errp) { - uint64_t cumulative_perms, cumulative_shared_perms; - BlockDriver *drv = bs->drv; - BdrvChild *c; - - if (!drv) { - return; - } + int ret; + BlockDriverState *bs; - bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); + for ( ; list; list = list->next) { + bs = list->data; - /* Update this node */ - if (drv->bdrv_set_perm) { - drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); - } + if (bdrv_parent_perms_conflict(bs, errp)) { + return -EINVAL; + } - /* Drivers that never have children can omit .bdrv_child_perm() */ - if (!drv->bdrv_child_perm) { - assert(QLIST_EMPTY(&bs->children)); - return; + ret = bdrv_node_refresh_perm(bs, q, tran, errp); + if (ret < 0) { + return ret; + } } - /* Update all children */ - QLIST_FOREACH(c, &bs->children, next) { - bdrv_child_set_perm(c); - } + return 0; } void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, @@ -2185,15 +2473,6 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, *shared_perm = cumulative_shared_perms; } -static char *bdrv_child_user_desc(BdrvChild *c) -{ - if (c->klass->get_parent_desc) { - return c->klass->get_parent_desc(c); - } - - return g_strdup("another user"); -} - char *bdrv_perm_names(uint64_t perm) { struct perm_name { @@ -2223,162 +2502,52 @@ char *bdrv_perm_names(uint64_t perm) return g_string_free(result, FALSE); } -/* - * Checks whether a new reference to @bs can be added if the new user requires - * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is - * set, the BdrvChild objects in this list are ignored in the calculations; - * this allows checking permission updates for an existing reference. - * - * Needs to be followed by a call to either bdrv_set_perm() or - * bdrv_abort_perm_update(). */ -static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, - uint64_t new_used_perm, - uint64_t new_shared_perm, - GSList *ignore_children, - Error **errp) + +static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) { - BdrvChild *c; - uint64_t cumulative_perms = new_used_perm; - uint64_t cumulative_shared_perms = new_shared_perm; + int ret; + Transaction *tran = tran_new(); + g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); + ret = bdrv_list_refresh_perms(list, NULL, tran, errp); + tran_finalize(tran, ret); - /* There is no reason why anyone couldn't tolerate write_unchanged */ - assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); + return ret; +} - QLIST_FOREACH(c, &bs->parents, next_parent) { - if (g_slist_find(ignore_children, c)) { - continue; - } +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + Error *local_err = NULL; + Transaction *tran = tran_new(); + int ret; - if ((new_used_perm & c->shared_perm) != new_used_perm) { - char *user = bdrv_child_user_desc(c); - char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + bdrv_child_set_perm(c, perm, shared, tran); - error_setg(errp, "Conflicts with use by %s as '%s', which does not " - "allow '%s' on %s", - user, c->name, perm_names, bdrv_get_node_name(c->bs)); - g_free(user); - g_free(perm_names); - return -EPERM; - } + ret = bdrv_refresh_perms(c->bs, &local_err); - if ((c->perm & new_shared_perm) != c->perm) { - char *user = bdrv_child_user_desc(c); - char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + tran_finalize(tran, ret); - error_setg(errp, "Conflicts with use by %s as '%s', which uses " - "'%s' on %s", - user, c->name, perm_names, bdrv_get_node_name(c->bs)); - g_free(user); - g_free(perm_names); - return -EPERM; + if (ret < 0) { + if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { + /* tighten permissions */ + error_propagate(errp, local_err); + } else { + /* + * Our caller may intend to only loosen restrictions and + * does not expect this function to fail. Errors are not + * fatal in such a case, so we can just hide them from our + * caller. + */ + error_free(local_err); + ret = 0; } - - cumulative_perms |= c->perm; - cumulative_shared_perms &= c->shared_perm; } - return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, - ignore_children, errp); + return ret; } -/* Needs to be followed by a call to either bdrv_child_set_perm() or - * bdrv_child_abort_perm_update(). */ -static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, - uint64_t perm, uint64_t shared, - GSList *ignore_children, Error **errp) -{ - int ret; - - ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); - ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp); - g_slist_free(ignore_children); - - if (ret < 0) { - return ret; - } - - if (!c->has_backup_perm) { - c->has_backup_perm = true; - c->backup_perm = c->perm; - c->backup_shared_perm = c->shared_perm; - } - /* - * Note: it's OK if c->has_backup_perm was already set, as we can find the - * same child twice during check_perm procedure - */ - - c->perm = perm; - c->shared_perm = shared; - - return 0; -} - -static void bdrv_child_set_perm(BdrvChild *c) -{ - c->has_backup_perm = false; - - bdrv_set_perm(c->bs); -} - -static void bdrv_child_abort_perm_update(BdrvChild *c) -{ - if (c->has_backup_perm) { - c->perm = c->backup_perm; - c->shared_perm = c->backup_shared_perm; - c->has_backup_perm = false; - } - - bdrv_abort_perm_update(c->bs); -} - -static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) -{ - int ret; - uint64_t perm, shared_perm; - - bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, errp); - if (ret < 0) { - bdrv_abort_perm_update(bs); - return ret; - } - bdrv_set_perm(bs); - - return 0; -} - -int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, - Error **errp) -{ - Error *local_err = NULL; - int ret; - - ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, &local_err); - if (ret < 0) { - bdrv_child_abort_perm_update(c); - if ((perm & ~c->perm) || (c->shared_perm & ~shared)) { - /* tighten permissions */ - error_propagate(errp, local_err); - } else { - /* - * Our caller may intend to only loosen restrictions and - * does not expect this function to fail. Errors are not - * fatal in such a case, so we can just hide them from our - * caller. - */ - error_free(local_err); - ret = 0; - } - return ret; - } - - bdrv_child_set_perm(c); - - return 0; -} - -int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) +int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) { uint64_t parent_perms, parent_shared; uint64_t perms, shared; @@ -2560,14 +2729,30 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs) +/** + * Replace (*childp)->bs by @new_bs. + * + * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents + * generally cannot handle a BdrvChild with .bs == NULL, so clearing + * BdrvChild.bs should generally immediately be followed by the + * BdrvChild pointer being cleared as well. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed. @free_empty_child should only be false if the caller will + * free the BdrvChild themselves (this may be important in a + * transactional context, where it may only be freed on commit). + */ +static void bdrv_replace_child_noperm(BdrvChild **childp, + BlockDriverState *new_bs, + bool free_empty_child) { + BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; assert(!child->frozen); + assert(old_bs != new_bs); if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); @@ -2596,6 +2781,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } child->bs = new_bs; + if (!new_bs) { + *childp = NULL; + } if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); @@ -2625,40 +2813,208 @@ static void bdrv_replace_child_noperm(BdrvChild *child, bdrv_parent_drained_end_single(child); drain_saldo++; } + + if (free_empty_child && !child->bs) { + bdrv_child_free(child); + } } +/** + * Free the given @child. + * + * The child must be empty (i.e. `child->bs == NULL`) and it must be + * unused (i.e. not in a children list). + */ +static void bdrv_child_free(BdrvChild *child) +{ + assert(!child->bs); + assert(!child->next.le_prev); /* not in children list */ + + g_free(child->name); + g_free(child); +} + +typedef struct BdrvAttachChildCommonState { + BdrvChild **child; + AioContext *old_parent_ctx; + AioContext *old_child_ctx; +} BdrvAttachChildCommonState; + +static void bdrv_attach_child_common_abort(void *opaque) +{ + BdrvAttachChildCommonState *s = opaque; + BdrvChild *child = *s->child; + BlockDriverState *bs = child->bs; + + /* + * Pass free_empty_child=false, because we still need the child + * for the AioContext operations on the parent below; those + * BdrvChildClass methods all work on a BdrvChild object, so we + * need to keep it as an empty shell (after this function, it will + * not be attached to any parent, and it will not have a .bs). + */ + bdrv_replace_child_noperm(s->child, NULL, false); + + if (bdrv_get_aio_context(bs) != s->old_child_ctx) { + bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); + } + + if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { + GSList *ignore; + + /* No need to ignore `child`, because it has been detached already */ + ignore = NULL; + child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, + &error_abort); + g_slist_free(ignore); + + ignore = NULL; + child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); + g_slist_free(ignore); + } + + bdrv_unref(bs); + bdrv_child_free(child); +} + +static TransactionActionDrv bdrv_attach_child_common_drv = { + .abort = bdrv_attach_child_common_abort, + .clean = g_free, +}; + /* - * Updates @child to change its reference to point to @new_bs, including - * checking and applying the necessary permission updates both to the old node - * and to @new_bs. + * Common part of attaching bdrv child to bs or to blk or to job * - * NULL is passed as @new_bs for removing the reference before freeing @child. + * Resulting new child is returned through @child. + * At start *@child must be NULL. + * @child is saved to a new entry of @tran, so that *@child could be reverted to + * NULL on abort(). So referenced variable must live at least until transaction + * end. * - * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this - * function uses bdrv_set_perm() to update the permissions according to the new - * reference that @new_bs gets. + * Function doesn't update permissions, caller is responsible for this. + */ +static int bdrv_attach_child_common(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + uint64_t perm, uint64_t shared_perm, + void *opaque, BdrvChild **child, + Transaction *tran, Error **errp) +{ + BdrvChild *new_child; + AioContext *parent_ctx; + AioContext *child_ctx = bdrv_get_aio_context(child_bs); + + assert(child); + assert(*child == NULL); + assert(child_class->get_parent_desc); + + new_child = g_new(BdrvChild, 1); + *new_child = (BdrvChild) { + .bs = NULL, + .name = g_strdup(child_name), + .klass = child_class, + .role = child_role, + .perm = perm, + .shared_perm = shared_perm, + .opaque = opaque, + }; + + /* + * If the AioContexts don't match, first try to move the subtree of + * child_bs into the AioContext of the new parent. If this doesn't work, + * try moving the parent into the AioContext of child_bs instead. + */ + parent_ctx = bdrv_child_get_parent_aio_context(new_child); + if (child_ctx != parent_ctx) { + Error *local_err = NULL; + int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err); + + if (ret < 0 && child_class->can_set_aio_ctx) { + GSList *ignore = g_slist_prepend(NULL, new_child); + if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore, + NULL)) + { + error_free(local_err); + ret = 0; + g_slist_free(ignore); + ignore = g_slist_prepend(NULL, new_child); + child_class->set_aio_ctx(new_child, child_ctx, &ignore); + } + g_slist_free(ignore); + } + + if (ret < 0) { + error_propagate(errp, local_err); + bdrv_child_free(new_child); + return ret; + } + } + + bdrv_ref(child_bs); + bdrv_replace_child_noperm(&new_child, child_bs, true); + /* child_bs was non-NULL, so new_child must not have been freed */ + assert(new_child != NULL); + + *child = new_child; + + BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); + *s = (BdrvAttachChildCommonState) { + .child = child, + .old_parent_ctx = parent_ctx, + .old_child_ctx = child_ctx, + }; + tran_add(tran, &bdrv_attach_child_common_drv, s); + + return 0; +} + +/* + * Variable referenced by @child must live at least until transaction end. + * (see bdrv_attach_child_common() doc for details) * - * Callers must ensure that child->frozen is false. + * Function doesn't update permissions, caller is responsible for this. */ -static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) +static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + BdrvChild **child, + Transaction *tran, + Error **errp) { - BlockDriverState *old_bs = child->bs; + int ret; + uint64_t perm, shared_perm; - /* Asserts that child->frozen == false */ - bdrv_replace_child_noperm(child, new_bs); + assert(parent_bs->drv); - /* - * Start with the new node's permissions. If @new_bs is a (direct - * or indirect) child of @old_bs, we must complete the permission - * update on @new_bs before we loosen the restrictions on @old_bs. - * Otherwise, bdrv_check_perm() on @old_bs would re-initiate - * updating the permissions of @new_bs, and thus not purely loosen - * restrictions. - */ - if (new_bs) { - bdrv_set_perm(new_bs); + if (bdrv_recurse_has_child(child_bs, parent_bs)) { + error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", + child_bs->node_name, child_name, parent_bs->node_name); + return -EINVAL; } + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, + perm, shared_perm, &perm, &shared_perm); + + ret = bdrv_attach_child_common(child_bs, child_name, child_class, + child_role, perm, shared_perm, parent_bs, + child, tran, errp); + if (ret < 0) { + return ret; + } + + return 0; +} + +static void bdrv_detach_child(BdrvChild **childp) +{ + BlockDriverState *old_bs = (*childp)->bs; + + bdrv_replace_child_noperm(childp, NULL, true); + if (old_bs) { /* * Update permissions for old node. We're just taking a parent away, so @@ -2667,8 +3023,10 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) */ bdrv_refresh_perms(old_bs, NULL); - /* When the parent requiring a non-default AioContext is removed, the - * node moves back to the main AioContext */ + /* + * When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext + */ bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); } } @@ -2687,61 +3045,28 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildClass *child_class, BdrvChildRole child_role, - AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp) { - BdrvChild *child; - Error *local_err = NULL; int ret; + BdrvChild *child = NULL; + Transaction *tran = tran_new(); - ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); + ret = bdrv_attach_child_common(child_bs, child_name, child_class, + child_role, perm, shared_perm, opaque, + &child, tran, errp); if (ret < 0) { - bdrv_abort_perm_update(child_bs); - bdrv_unref(child_bs); - return NULL; + goto out; } - child = g_new(BdrvChild, 1); - *child = (BdrvChild) { - .bs = NULL, - .name = g_strdup(child_name), - .klass = child_class, - .role = child_role, - .perm = perm, - .shared_perm = shared_perm, - .opaque = opaque, - }; + ret = bdrv_refresh_perms(child_bs, errp); - /* If the AioContexts don't match, first try to move the subtree of - * child_bs into the AioContext of the new parent. If this doesn't work, - * try moving the parent into the AioContext of child_bs instead. */ - if (bdrv_get_aio_context(child_bs) != ctx) { - ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); - if (ret < 0 && child_class->can_set_aio_ctx) { - GSList *ignore = g_slist_prepend(NULL, child); - ctx = bdrv_get_aio_context(child_bs); - if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) { - error_free(local_err); - ret = 0; - g_slist_free(ignore); - ignore = g_slist_prepend(NULL, child); - child_class->set_aio_ctx(child, ctx, &ignore); - } - g_slist_free(ignore); - } - if (ret < 0) { - error_propagate(errp, local_err); - g_free(child); - bdrv_abort_perm_update(child_bs); - bdrv_unref(child_bs); - return NULL; - } - } - - /* This performs the matching bdrv_set_perm() for the above check. */ - bdrv_replace_child(child, child_bs); +out: + tran_finalize(tran, ret); + /* child is unset on failure by bdrv_attach_child_common_abort() */ + assert((ret < 0) == !child); + bdrv_unref(child_bs); return child; } @@ -2763,34 +3088,29 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BdrvChildRole child_role, Error **errp) { - BdrvChild *child; - uint64_t perm, shared_perm; - - bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); - - assert(parent_bs->drv); - bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, - perm, shared_perm, &perm, &shared_perm); + int ret; + BdrvChild *child = NULL; + Transaction *tran = tran_new(); - child = bdrv_root_attach_child(child_bs, child_name, child_class, - child_role, bdrv_get_aio_context(parent_bs), - perm, shared_perm, parent_bs, errp); - if (child == NULL) { - return NULL; + ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class, + child_role, &child, tran, errp); + if (ret < 0) { + goto out; } - QLIST_INSERT_HEAD(&parent_bs->children, child, next); - return child; -} + ret = bdrv_refresh_perms(parent_bs, errp); + if (ret < 0) { + goto out; + } -static void bdrv_detach_child(BdrvChild *child) -{ - QLIST_SAFE_REMOVE(child, next); +out: + tran_finalize(tran, ret); + /* child is unset on failure by bdrv_attach_child_common_abort() */ + assert((ret < 0) == !child); - bdrv_replace_child(child, NULL); + bdrv_unref(child_bs); - g_free(child->name); - g_free(child); + return child; } /* Callers must ensure that child->frozen is false. */ @@ -2799,15 +3119,53 @@ void bdrv_root_unref_child(BdrvChild *child) BlockDriverState *child_bs; child_bs = child->bs; - bdrv_detach_child(child); + bdrv_detach_child(&child); bdrv_unref(child_bs); } +typedef struct BdrvSetInheritsFrom { + BlockDriverState *bs; + BlockDriverState *old_inherits_from; +} BdrvSetInheritsFrom; + +static void bdrv_set_inherits_from_abort(void *opaque) +{ + BdrvSetInheritsFrom *s = opaque; + + s->bs->inherits_from = s->old_inherits_from; +} + +static TransactionActionDrv bdrv_set_inherits_from_drv = { + .abort = bdrv_set_inherits_from_abort, + .clean = g_free, +}; + +/* @tran is allowed to be NULL. In this case no rollback is possible */ +static void bdrv_set_inherits_from(BlockDriverState *bs, + BlockDriverState *new_inherits_from, + Transaction *tran) +{ + if (tran) { + BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1); + + *s = (BdrvSetInheritsFrom) { + .bs = bs, + .old_inherits_from = bs->inherits_from, + }; + + tran_add(tran, &bdrv_set_inherits_from_drv, s); + } + + bs->inherits_from = new_inherits_from; +} + /** * Clear all inherits_from pointers from children and grandchildren of * @root that point to @root, where necessary. + * @tran is allowed to be NULL. In this case no rollback is possible */ -static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child) +static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, + Transaction *tran) { BdrvChild *c; @@ -2822,12 +3180,12 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child) } } if (c == NULL) { - child->bs->inherits_from = NULL; + bdrv_set_inherits_from(child->bs, NULL, tran); } } QLIST_FOREACH(c, &child->bs->children, next) { - bdrv_unset_inherits_from(root, c); + bdrv_unset_inherits_from(root, c, tran); } } @@ -2838,7 +3196,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) return; } - bdrv_unset_inherits_from(parent, child); + bdrv_unset_inherits_from(parent, child, NULL); bdrv_root_unref_child(child); } @@ -2880,50 +3238,118 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) } /* - * Sets the bs->backing link of a BDS. A new reference is created; callers - * which don't need their own reference any more must call bdrv_unref(). + * Sets the bs->backing or bs->file link of a BDS. A new reference is created; + * callers which don't need their own reference any more must call bdrv_unref(). + * + * Function doesn't update permissions, caller is responsible for this. */ -int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp) +static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + bool is_backing, + Transaction *tran, Error **errp) { int ret = 0; - bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && - bdrv_inherits_from_recursive(backing_hd, bs); + bool update_inherits_from = + bdrv_inherits_from_recursive(child_bs, parent_bs); + BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; + BdrvChildRole role; - if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) { + if (!parent_bs->drv) { + /* + * Node without drv is an object without a class :/. TODO: finally fix + * qcow2 driver to never clear bs->drv and implement format corruption + * handling in other way. + */ + error_setg(errp, "Node corrupted"); + return -EINVAL; + } + + if (child && child->frozen) { + error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'", + child->name, parent_bs->node_name, child->bs->node_name); return -EPERM; } - if (backing_hd) { - bdrv_ref(backing_hd); + if (is_backing && !parent_bs->drv->is_filter && + !parent_bs->drv->supports_backing) + { + error_setg(errp, "Driver '%s' of node '%s' does not support backing " + "files", parent_bs->drv->format_name, parent_bs->node_name); + return -EINVAL; } - if (bs->backing) { - /* Cannot be frozen, we checked that above */ - bdrv_unref_child(bs, bs->backing); - bs->backing = NULL; + if (parent_bs->drv->is_filter) { + role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; + } else if (is_backing) { + role = BDRV_CHILD_COW; + } else { + /* + * We only can use same role as it is in existing child. We don't have + * infrastructure to determine role of file child in generic way + */ + if (!child) { + error_setg(errp, "Cannot set file child to format node without " + "file child"); + return -EINVAL; + } + role = child->role; } - if (!backing_hd) { - goto out; + if (child) { + bdrv_unset_inherits_from(parent_bs, child, tran); + bdrv_remove_file_or_backing_child(parent_bs, child, tran); } - bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds, - bdrv_backing_role(bs), errp); - if (!bs->backing) { - ret = -EPERM; + if (!child_bs) { goto out; } - /* If backing_hd was already part of bs's backing chain, and - * inherits_from pointed recursively to bs then let's update it to - * point directly to bs (else it will become NULL). */ + ret = bdrv_attach_child_noperm(parent_bs, child_bs, + is_backing ? "backing" : "file", + &child_of_bds, role, + is_backing ? &parent_bs->backing : + &parent_bs->file, + tran, errp); + if (ret < 0) { + return ret; + } + + + /* + * If inherits_from pointed recursively to bs then let's update it to + * point directly to bs (else it will become NULL). + */ if (update_inherits_from) { - backing_hd->inherits_from = bs; + bdrv_set_inherits_from(child_bs, parent_bs, tran); } out: - bdrv_refresh_limits(bs, NULL); + bdrv_refresh_limits(parent_bs, tran, NULL); + + return 0; +} + +static int bdrv_set_backing_noperm(BlockDriverState *bs, + BlockDriverState *backing_hd, + Transaction *tran, Error **errp) +{ + return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); +} + +int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) +{ + int ret; + Transaction *tran = tran_new(); + + ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); + if (ret < 0) { + goto out; + } + + ret = bdrv_refresh_perms(bs, errp); +out: + tran_finalize(tran, ret); return ret; } @@ -3213,11 +3639,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, goto out; } - /* bdrv_append() consumes a strong reference to bs_snapshot - * (i.e. it will call bdrv_unref() on it) even on error, so in - * order to be able to return one, we have to increase - * bs_snapshot's refcount here */ - bdrv_ref(bs_snapshot); ret = bdrv_append(bs_snapshot, bs, errp); if (ret < 0) { bs_snapshot = NULL; @@ -3729,10 +4150,6 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, bs_entry->state.explicit_options = explicit_options; bs_entry->state.flags = flags; - /* This needs to be overwritten in bdrv_reopen_prepare() */ - bs_entry->state.perm = UINT64_MAX; - bs_entry->state.shared_perm = 0; - /* * If keep_old_opts is false then it means that unspecified * options must be reset to their original value. We don't allow @@ -3796,6 +4213,19 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, NULL, 0, keep_old_opts); } +void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) +{ + if (bs_queue) { + BlockReopenQueueEntry *bs_entry, *next; + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + g_free(bs_entry); + } + g_free(bs_queue); + } +} + /* * Reopen multiple BlockDriverStates atomically & transactionally. * @@ -3812,43 +4242,68 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, * * All affected nodes must be drained between bdrv_reopen_queue() and * bdrv_reopen_multiple(). + * + * To be called from the main thread, with all other AioContexts unlocked. */ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) { int ret = -1; BlockReopenQueueEntry *bs_entry, *next; + AioContext *ctx; + Transaction *tran = tran_new(); + g_autoptr(GHashTable) found = NULL; + g_autoptr(GSList) refresh_list = NULL; + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); assert(bs_queue != NULL); + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); + ret = bdrv_flush(bs_entry->state.bs); + aio_context_release(ctx); + if (ret < 0) { + error_setg_errno(errp, -ret, "Error flushing drive"); + goto abort; + } + } + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { assert(bs_entry->state.bs->quiesce_counter > 0); - if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) { - goto cleanup; + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); + ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); + aio_context_release(ctx); + if (ret < 0) { + goto abort; } bs_entry->prepared = true; } + found = g_hash_table_new(NULL, NULL); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { BDRVReopenState *state = &bs_entry->state; - ret = bdrv_check_perm(state->bs, bs_queue, state->perm, - state->shared_perm, NULL, errp); - if (ret < 0) { - goto cleanup_perm; - } - /* Check if new_backing_bs would accept the new permissions */ - if (state->replace_backing_bs && state->new_backing_bs) { - uint64_t nperm, nshared; - bdrv_child_perm(state->bs, state->new_backing_bs, - NULL, bdrv_backing_role(state->bs), - bs_queue, state->perm, state->shared_perm, - &nperm, &nshared); - ret = bdrv_check_update_perm(state->new_backing_bs, NULL, - nperm, nshared, NULL, errp); - if (ret < 0) { - goto cleanup_perm; - } + + refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); + if (state->old_backing_bs) { + refresh_list = bdrv_topological_dfs(refresh_list, found, + state->old_backing_bs); + } + if (state->old_file_bs) { + refresh_list = bdrv_topological_dfs(refresh_list, found, + state->old_file_bs); } - bs_entry->perms_checked = true; + } + + /* + * Note that file-posix driver rely on permission update done during reopen + * (even if no permission changed), because it wants "new" permissions for + * reconfiguring the fd and that's why it does it in raw_check_perm(), not + * in raw_reopen_prepare() which is called with "old" permissions. + */ + ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp); + if (ret < 0) { + goto abort; } /* @@ -3861,146 +4316,76 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) * to first element. */ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); bdrv_reopen_commit(&bs_entry->state); + aio_context_release(ctx); } - ret = 0; -cleanup_perm: - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - BDRVReopenState *state = &bs_entry->state; - - if (!bs_entry->perms_checked) { - continue; - } - - if (ret == 0) { - uint64_t perm, shared; + tran_commit(tran); - bdrv_get_cumulative_perm(state->bs, &perm, &shared); - assert(perm == state->perm); - assert(shared == state->shared_perm); + QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { + BlockDriverState *bs = bs_entry->state.bs; - bdrv_set_perm(state->bs); - } else { - bdrv_abort_perm_update(state->bs); - if (state->replace_backing_bs && state->new_backing_bs) { - bdrv_abort_perm_update(state->new_backing_bs); - } + if (bs->drv->bdrv_reopen_commit_post) { + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + bs->drv->bdrv_reopen_commit_post(&bs_entry->state); + aio_context_release(ctx); } } - if (ret == 0) { - QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { - BlockDriverState *bs = bs_entry->state.bs; + ret = 0; + goto cleanup; - if (bs->drv->bdrv_reopen_commit_post) - bs->drv->bdrv_reopen_commit_post(&bs_entry->state); - } - } -cleanup: +abort: + tran_abort(tran); QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (ret) { - if (bs_entry->prepared) { - bdrv_reopen_abort(&bs_entry->state); - } - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); + if (bs_entry->prepared) { + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); + bdrv_reopen_abort(&bs_entry->state); + aio_context_release(ctx); } - if (bs_entry->state.new_backing_bs) { - bdrv_unref(bs_entry->state.new_backing_bs); - } - g_free(bs_entry); } - g_free(bs_queue); + +cleanup: + bdrv_reopen_queue_free(bs_queue); return ret; } -int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, - Error **errp) +int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + Error **errp) { - int ret; + AioContext *ctx = bdrv_get_aio_context(bs); BlockReopenQueue *queue; - QDict *opts = qdict_new(); - - qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); + int ret; bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(NULL, bs, opts, true); - ret = bdrv_reopen_multiple(queue, errp); - bdrv_subtree_drained_end(bs); - - return ret; -} - -static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q, - BdrvChild *c) -{ - BlockReopenQueueEntry *entry; - - QTAILQ_FOREACH(entry, q, entry) { - BlockDriverState *bs = entry->state.bs; - BdrvChild *child; - - QLIST_FOREACH(child, &bs->children, next) { - if (child == c) { - return entry; - } - } + if (ctx != qemu_get_aio_context()) { + aio_context_release(ctx); } - return NULL; -} - -static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, - uint64_t *perm, uint64_t *shared) -{ - BdrvChild *c; - BlockReopenQueueEntry *parent; - uint64_t cumulative_perms = 0; - uint64_t cumulative_shared_perms = BLK_PERM_ALL; - - QLIST_FOREACH(c, &bs->parents, next_parent) { - parent = find_parent_in_reopen_queue(q, c); - if (!parent) { - cumulative_perms |= c->perm; - cumulative_shared_perms &= c->shared_perm; - } else { - uint64_t nperm, nshared; - - bdrv_child_perm(parent->state.bs, bs, c, c->role, q, - parent->state.perm, parent->state.shared_perm, - &nperm, &nshared); + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + ret = bdrv_reopen_multiple(queue, errp); - cumulative_perms |= nperm; - cumulative_shared_perms &= nshared; - } + if (ctx != qemu_get_aio_context()) { + aio_context_acquire(ctx); } - *perm = cumulative_perms; - *shared = cumulative_shared_perms; + bdrv_subtree_drained_end(bs); + + return ret; } -static bool bdrv_reopen_can_attach(BlockDriverState *parent, - BdrvChild *child, - BlockDriverState *new_child, - Error **errp) +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp) { - AioContext *parent_ctx = bdrv_get_aio_context(parent); - AioContext *child_ctx = bdrv_get_aio_context(new_child); - GSList *ignore; - bool ret; + QDict *opts = qdict_new(); - ignore = g_slist_prepend(NULL, child); - ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); - g_slist_free(ignore); - if (ret) { - return ret; - } + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); - ignore = g_slist_prepend(NULL, child); - ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); - g_slist_free(ignore); - return ret; + return bdrv_reopen(bs, opts, true, errp); } /* @@ -4020,110 +4405,81 @@ static bool bdrv_reopen_can_attach(BlockDriverState *parent, * * Return 0 on success, otherwise return < 0 and set @errp. */ -static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, - Error **errp) +static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + bool is_backing, Transaction *tran, + Error **errp) { BlockDriverState *bs = reopen_state->bs; - BlockDriverState *overlay_bs, *below_bs, *new_backing_bs; + BlockDriverState *new_child_bs; + BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) : + child_bs(bs->file); + const char *child_name = is_backing ? "backing" : "file"; QObject *value; const char *str; - value = qdict_get(reopen_state->options, "backing"); + value = qdict_get(reopen_state->options, child_name); if (value == NULL) { return 0; } switch (qobject_type(value)) { case QTYPE_QNULL: - new_backing_bs = NULL; + assert(is_backing); /* The 'file' option does not allow a null value */ + new_child_bs = NULL; break; case QTYPE_QSTRING: str = qstring_get_str(qobject_to(QString, value)); - new_backing_bs = bdrv_lookup_bs(NULL, str, errp); - if (new_backing_bs == NULL) { - return -EINVAL; - } else if (bdrv_recurse_has_child(new_backing_bs, bs)) { - error_setg(errp, "Making '%s' a backing file of '%s' " - "would create a cycle", str, bs->node_name); - return -EINVAL; - } - break; - default: - /* 'backing' does not allow any other data type */ - g_assert_not_reached(); - } - - /* - * Check AioContext compatibility so that the bdrv_set_backing_hd() call in - * bdrv_reopen_commit() won't fail. - */ - if (new_backing_bs) { - if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { + new_child_bs = bdrv_lookup_bs(NULL, str, errp); + if (new_child_bs == NULL) { return -EINVAL; - } - } - - /* - * Ensure that @bs can really handle backing files, because we are - * about to give it one (or swap the existing one) - */ - if (bs->drv->is_filter) { - /* Filters always have a file or a backing child */ - if (!bs->backing) { - error_setg(errp, "'%s' is a %s filter node that does not support a " - "backing child", bs->node_name, bs->drv->format_name); + } else if (bdrv_recurse_has_child(new_child_bs, bs)) { + error_setg(errp, "Making '%s' a %s child of '%s' would create a " + "cycle", str, child_name, bs->node_name); return -EINVAL; } - } else if (!bs->drv->supports_backing) { - error_setg(errp, "Driver '%s' of node '%s' does not support backing " - "files", bs->drv->format_name, bs->node_name); - return -EINVAL; + break; + default: + /* + * The options QDict has been flattened, so 'backing' and 'file' + * do not allow any other data type here. + */ + g_assert_not_reached(); } - /* - * Find the "actual" backing file by skipping all links that point - * to an implicit node, if any (e.g. a commit filter node). - * We cannot use any of the bdrv_skip_*() functions here because - * those return the first explicit node, while we are looking for - * its overlay here. - */ - overlay_bs = bs; - for (below_bs = bdrv_filter_or_cow_bs(overlay_bs); - below_bs && below_bs->implicit; - below_bs = bdrv_filter_or_cow_bs(overlay_bs)) - { - overlay_bs = below_bs; + if (old_child_bs == new_child_bs) { + return 0; } - /* If we want to replace the backing file we need some extra checks */ - if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) { - /* Check for implicit nodes between bs and its backing file */ - if (bs != overlay_bs) { - error_setg(errp, "Cannot change backing link if '%s' has " - "an implicit backing file", bs->node_name); + if (old_child_bs) { + if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) { + return 0; + } + + if (old_child_bs->implicit) { + error_setg(errp, "Cannot replace implicit %s child of %s", + child_name, bs->node_name); return -EPERM; } + } + + if (bs->drv->is_filter && !old_child_bs) { /* - * Check if the backing link that we want to replace is frozen. - * Note that - * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing, - * because we know that overlay_bs == bs, and that @bs - * either is a filter that uses ->backing or a COW format BDS - * with bs->drv->supports_backing == true. + * Filters always have a file or a backing child, so we are trying to + * change wrong child */ - if (bdrv_is_backing_chain_frozen(overlay_bs, - child_bs(overlay_bs->backing), errp)) - { - return -EPERM; - } - reopen_state->replace_backing_bs = true; - if (new_backing_bs) { - bdrv_ref(new_backing_bs); - reopen_state->new_backing_bs = new_backing_bs; - } + error_setg(errp, "'%s' is a %s filter node that does not support a " + "%s child", bs->node_name, bs->drv->format_name, child_name); + return -EINVAL; } - return 0; + if (is_backing) { + reopen_state->old_backing_bs = old_child_bs; + } else { + reopen_state->old_file_bs = old_child_bs; + } + + return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, + tran, errp); } /* @@ -4143,8 +4499,9 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, * commit() for any other BDS that have been left in a prepare() state * */ -int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, - Error **errp) +static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, + Transaction *change_child_tran, Error **errp) { int ret = -1; int old_flags; @@ -4211,16 +4568,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, goto error; } - /* Calculate required permissions after reopening */ - bdrv_reopen_perm(queue, reopen_state->bs, - &reopen_state->perm, &reopen_state->shared_perm); - - ret = bdrv_flush(reopen_state->bs); - if (ret) { - error_setg_errno(errp, -ret, "Error flushing drive"); - goto error; - } - if (drv->bdrv_reopen_prepare) { /* * If a driver-specific option is missing, it means that we @@ -4274,12 +4621,21 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, * either a reference to an existing node (using its node name) * or NULL to simply detach the current backing file. */ - ret = bdrv_reopen_parse_backing(reopen_state, errp); + ret = bdrv_reopen_parse_file_or_backing(reopen_state, true, + change_child_tran, errp); if (ret < 0) { goto error; } qdict_del(reopen_state->options, "backing"); + /* Allow changing the 'file' option. In this case NULL is not allowed */ + ret = bdrv_reopen_parse_file_or_backing(reopen_state, false, + change_child_tran, errp); + if (ret < 0) { + goto error; + } + qdict_del(reopen_state->options, "file"); + /* Options that are not handled are only okay if they are unchanged * compared to the old state. It is expected that some options are only * used for the initial open, but not reopen (e.g. filename) */ @@ -4359,7 +4715,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, * makes them final by swapping the staging BlockDriverState contents into * the active BlockDriverState contents. */ -void bdrv_reopen_commit(BDRVReopenState *reopen_state) +static void bdrv_reopen_commit(BDRVReopenState *reopen_state) { BlockDriver *drv; BlockDriverState *bs; @@ -4378,48 +4734,32 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) /* set BDS specific flags now */ qobject_unref(bs->explicit_options); qobject_unref(bs->options); + qobject_ref(reopen_state->explicit_options); + qobject_ref(reopen_state->options); bs->explicit_options = reopen_state->explicit_options; bs->options = reopen_state->options; bs->open_flags = reopen_state->flags; - bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); bs->detect_zeroes = reopen_state->detect_zeroes; - if (reopen_state->replace_backing_bs) { - qdict_del(bs->explicit_options, "backing"); - qdict_del(bs->options, "backing"); - } - /* Remove child references from bs->options and bs->explicit_options. * Child options were already removed in bdrv_reopen_queue_child() */ QLIST_FOREACH(child, &bs->children, next) { qdict_del(bs->explicit_options, child->name); qdict_del(bs->options, child->name); } + /* backing is probably removed, so it's not handled by previous loop */ + qdict_del(bs->explicit_options, "backing"); + qdict_del(bs->options, "backing"); - /* - * Change the backing file if a new one was specified. We do this - * after updating bs->options, so bdrv_refresh_filename() (called - * from bdrv_set_backing_hd()) has the new values. - */ - if (reopen_state->replace_backing_bs) { - BlockDriverState *old_backing_bs = child_bs(bs->backing); - assert(!old_backing_bs || !old_backing_bs->implicit); - /* Abort the permission update on the backing bs we're detaching */ - if (old_backing_bs) { - bdrv_abort_perm_update(old_backing_bs); - } - bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort); - } - - bdrv_refresh_limits(bs, NULL); + bdrv_refresh_limits(bs, NULL, NULL); } /* * Abort the reopen, and delete and free the staged changes in * reopen_state */ -void bdrv_reopen_abort(BDRVReopenState *reopen_state) +static void bdrv_reopen_abort(BDRVReopenState *reopen_state) { BlockDriver *drv; @@ -4472,6 +4812,8 @@ static void bdrv_close(BlockDriverState *bs) bs->explicit_options = NULL; qobject_unref(bs->full_open_options); bs->full_open_options = NULL; + g_free(bs->block_status_cache); + bs->block_status_cache = NULL; bdrv_release_named_dirty_bitmaps(bs); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); @@ -4585,88 +4927,237 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) return ret; } +typedef struct BdrvRemoveFilterOrCowChild { + BdrvChild *child; + BlockDriverState *bs; + bool is_backing; +} BdrvRemoveFilterOrCowChild; + +static void bdrv_remove_filter_or_cow_child_abort(void *opaque) +{ + BdrvRemoveFilterOrCowChild *s = opaque; + BlockDriverState *parent_bs = s->child->opaque; + + if (s->is_backing) { + parent_bs->backing = s->child; + } else { + parent_bs->file = s->child; + } + + /* + * We don't have to restore child->bs here to undo bdrv_replace_child_tran() + * because that function is transactionable and it registered own completion + * entries in @tran, so .abort() for bdrv_replace_child_safe() will be + * called automatically. + */ +} + +static void bdrv_remove_filter_or_cow_child_commit(void *opaque) +{ + BdrvRemoveFilterOrCowChild *s = opaque; + + bdrv_child_free(s->child); +} + +static void bdrv_remove_filter_or_cow_child_clean(void *opaque) +{ + BdrvRemoveFilterOrCowChild *s = opaque; + + /* Drop the bs reference after the transaction is done */ + bdrv_unref(s->bs); + g_free(s); +} + +static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { + .abort = bdrv_remove_filter_or_cow_child_abort, + .commit = bdrv_remove_filter_or_cow_child_commit, + .clean = bdrv_remove_filter_or_cow_child_clean, +}; + +/* + * A function to remove backing or file child of @bs. + * Function doesn't update permissions, caller is responsible for this. + */ +static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, + BdrvChild *child, + Transaction *tran) +{ + BdrvChild **childp; + BdrvRemoveFilterOrCowChild *s; + + if (!child) { + return; + } + + /* + * Keep a reference to @bs so @childp will stay valid throughout the + * transaction (required by bdrv_replace_child_tran()) + */ + bdrv_ref(bs); + if (child == bs->backing) { + childp = &bs->backing; + } else if (child == bs->file) { + childp = &bs->file; + } else { + g_assert_not_reached(); + } + + if (child->bs) { + /* + * Pass free_empty_child=false, we will free the child in + * bdrv_remove_filter_or_cow_child_commit() + */ + bdrv_replace_child_tran(childp, NULL, tran, false); + } + + s = g_new(BdrvRemoveFilterOrCowChild, 1); + *s = (BdrvRemoveFilterOrCowChild) { + .child = child, + .bs = bs, + .is_backing = (childp == &bs->backing), + }; + tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); +} + /* - * With auto_skip=true bdrv_replace_node_common skips updating from parents - * if it creates a parent-child relation loop or if parent is block-job. - * - * With auto_skip=false the error is returned if from has a parent which should - * not be updated. + * A function to remove backing-chain child of @bs if exists: cow child for + * format nodes (always .backing) and filter child for filters (may be .file or + * .backing) */ -static int bdrv_replace_node_common(BlockDriverState *from, +static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, + Transaction *tran) +{ + bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran); +} + +static int bdrv_replace_node_noperm(BlockDriverState *from, BlockDriverState *to, - bool auto_skip, Error **errp) + bool auto_skip, Transaction *tran, + Error **errp) { BdrvChild *c, *next; - GSList *list = NULL, *p; - uint64_t perm = 0, shared = BLK_PERM_ALL; - int ret; - - /* Make sure that @from doesn't go away until we have successfully attached - * all of its parents to @to. */ - bdrv_ref(from); - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); - bdrv_drained_begin(from); + assert(to != NULL); - /* Put all parents into @list and calculate their cumulative permissions */ QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { if (auto_skip) { continue; } - ret = -EINVAL; error_setg(errp, "Should not change '%s' link to '%s'", c->name, from->node_name); - goto out; + return -EINVAL; } if (c->frozen) { - ret = -EPERM; error_setg(errp, "Cannot change '%s' link to '%s'", c->name, from->node_name); - goto out; + return -EPERM; + } + + /* + * Passing a pointer to the local variable @c is fine here, because + * @to is not NULL, and so &c will not be attached to the transaction. + */ + bdrv_replace_child_tran(&c, to, tran, true); + } + + return 0; +} + +/* + * With auto_skip=true bdrv_replace_node_common skips updating from parents + * if it creates a parent-child relation loop or if parent is block-job. + * + * With auto_skip=false the error is returned if from has a parent which should + * not be updated. + * + * With @detach_subchain=true @to must be in a backing chain of @from. In this + * case backing link of the cow-parent of @to is removed. + * + * @to must not be NULL. + */ +static int bdrv_replace_node_common(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, bool detach_subchain, + Error **errp) +{ + Transaction *tran = tran_new(); + g_autoptr(GHashTable) found = NULL; + g_autoptr(GSList) refresh_list = NULL; + BlockDriverState *to_cow_parent = NULL; + int ret; + + assert(to != NULL); + + if (detach_subchain) { + assert(bdrv_chain_contains(from, to)); + assert(from != to); + for (to_cow_parent = from; + bdrv_filter_or_cow_bs(to_cow_parent) != to; + to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent)) + { + ; } - list = g_slist_prepend(list, c); - perm |= c->perm; - shared &= c->shared_perm; } - /* Check whether the required permissions can be granted on @to, ignoring - * all BdrvChild in @list so that they can't block themselves. */ - ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp); + /* Make sure that @from doesn't go away until we have successfully attached + * all of its parents to @to. */ + bdrv_ref(from); + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); + bdrv_drained_begin(from); + + /* + * Do the replacement without permission update. + * Replacement may influence the permissions, we should calculate new + * permissions based on new graph. If we fail, we'll roll-back the + * replacement. + */ + ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp); if (ret < 0) { - bdrv_abort_perm_update(to); goto out; } - /* Now actually perform the change. We performed the permission check for - * all elements of @list at once, so set the permissions all at once at the - * very end. */ - for (p = list; p != NULL; p = p->next) { - c = p->data; - - bdrv_ref(to); - bdrv_replace_child_noperm(c, to); - bdrv_unref(from); + if (detach_subchain) { + bdrv_remove_filter_or_cow_child(to_cow_parent, tran); } - bdrv_set_perm(to); + found = g_hash_table_new(NULL, NULL); + + refresh_list = bdrv_topological_dfs(refresh_list, found, to); + refresh_list = bdrv_topological_dfs(refresh_list, found, from); + + ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); + if (ret < 0) { + goto out; + } ret = 0; out: - g_slist_free(list); + tran_finalize(tran, ret); + bdrv_drained_end(from); bdrv_unref(from); return ret; } +/** + * Replace node @from by @to (where neither may be NULL). + */ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { - return bdrv_replace_node_common(from, to, true, errp); + return bdrv_replace_node_common(from, to, true, false, errp); +} + +int bdrv_drop_filter(BlockDriverState *bs, Error **errp) +{ + return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true, + errp); } /* @@ -4676,37 +5167,69 @@ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, * This will modify the BlockDriverState fields, and swap contents * between bs_new and bs_top. Both bs_new and bs_top are modified. * - * bs_new must not be attached to a BlockBackend. + * bs_new must not be attached to a BlockBackend and must not have backing + * child. * * This function does not create any image files. - * - * bdrv_append() takes ownership of a bs_new reference and unrefs it because - * that's what the callers commonly need. bs_new will be referenced by the old - * parents of bs_top after bdrv_append() returns. If the caller needs to keep a - * reference of its own, it must call bdrv_ref(). */ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, Error **errp) { - int ret = bdrv_set_backing_hd(bs_new, bs_top, errp); + int ret; + Transaction *tran = tran_new(); + + assert(!bs_new->backing); + + ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing", + &child_of_bds, bdrv_backing_role(bs_new), + &bs_new->backing, tran, errp); if (ret < 0) { goto out; } - ret = bdrv_replace_node(bs_top, bs_new, errp); + ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); if (ret < 0) { - bdrv_set_backing_hd(bs_new, NULL, &error_abort); goto out; } - ret = 0; - + ret = bdrv_refresh_perms(bs_new, errp); out: - /* - * bs_new is now referenced by its new parents, we don't need the - * additional reference any more. - */ - bdrv_unref(bs_new); + tran_finalize(tran, ret); + + bdrv_refresh_limits(bs_top, NULL, NULL); + + return ret; +} + +/* Not for empty child */ +int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + Error **errp) +{ + int ret; + Transaction *tran = tran_new(); + g_autoptr(GHashTable) found = NULL; + g_autoptr(GSList) refresh_list = NULL; + BlockDriverState *old_bs = child->bs; + + bdrv_ref(old_bs); + bdrv_drained_begin(old_bs); + bdrv_drained_begin(new_bs); + + bdrv_replace_child_tran(&child, new_bs, tran, true); + /* @new_bs must have been non-NULL, so @child must not have been freed */ + assert(child != NULL); + + found = g_hash_table_new(NULL, NULL); + refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); + refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs); + + ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); + + tran_finalize(tran, ret); + + bdrv_drained_end(old_bs); + bdrv_drained_end(new_bs); + bdrv_unref(old_bs); return ret; } @@ -4727,29 +5250,61 @@ static void bdrv_delete(BlockDriverState *bs) g_free(bs); } -BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, + +/* + * Replace @bs by newly created block node. + * + * @options is a QDict of options to pass to the block drivers, or NULL for an + * empty set of options. The reference to the QDict belongs to the block layer + * after the call (even on failure), so if the caller intends to reuse the + * dictionary, it needs to use qobject_ref() before calling bdrv_open. + */ +BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - BlockDriverState *new_node_bs; - Error *local_err = NULL; + ERRP_GUARD(); + int ret; + BlockDriverState *new_node_bs = NULL; + const char *drvname, *node_name; + BlockDriver *drv; + + drvname = qdict_get_try_str(options, "driver"); + if (!drvname) { + error_setg(errp, "driver is not specified"); + goto fail; + } + + drv = bdrv_find_format(drvname); + if (!drv) { + error_setg(errp, "Unknown driver: '%s'", drvname); + goto fail; + } + + node_name = qdict_get_try_str(options, "node-name"); - new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp); - if (new_node_bs == NULL) { + new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, + errp); + options = NULL; /* bdrv_new_open_driver() eats options */ + if (!new_node_bs) { error_prepend(errp, "Could not create node: "); - return NULL; + goto fail; } bdrv_drained_begin(bs); - bdrv_replace_node(bs, new_node_bs, &local_err); + ret = bdrv_replace_node(bs, new_node_bs, errp); bdrv_drained_end(bs); - if (local_err) { - bdrv_unref(new_node_bs); - error_propagate(errp, local_err); - return NULL; + if (ret < 0) { + error_prepend(errp, "Could not replace node: "); + goto fail; } return new_node_bs; + +fail: + qobject_unref(options); + bdrv_unref(new_node_bs); + return NULL; } /* @@ -4782,7 +5337,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs, * -ENOTSUP - format driver doesn't support changing the backing file */ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, - const char *backing_fmt, bool warn) + const char *backing_fmt, bool require) { BlockDriver *drv = bs->drv; int ret; @@ -4796,10 +5351,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, return -EINVAL; } - if (warn && backing_file && !backing_fmt) { - warn_report("Deprecated use of backing file without explicit " - "backing format, use of this image requires " - "potentially unsafe format probing"); + if (require && backing_file && !backing_fmt) { + return -EINVAL; } if (drv->bdrv_change_backing_file != NULL) { @@ -5002,7 +5555,17 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, updated_children = g_slist_prepend(updated_children, c); } - bdrv_replace_node_common(top, base, false, &local_err); + /* + * It seems correct to pass detach_subchain=true here, but it triggers + * one more yet not fixed bug, when due to nested aio_poll loop we switch to + * another drained section, which modify the graph (for example, removing + * the child, which we keep in updated_children list). So, it's a TODO. + * + * Note, bug triggered if pass detach_subchain=true here and run + * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash. + * That's a FIXME. + */ + bdrv_replace_node_common(top, base, false, false, &local_err); if (local_err) { error_report_err(local_err); goto exit; @@ -5817,6 +6380,9 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, void bdrv_init(void) { +#ifdef CONFIG_BDRV_WHITELIST_TOOLS + use_bdrv_whitelist = 1; +#endif module_call_init(MODULE_INIT_BLOCK); } @@ -5940,6 +6506,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) { BdrvChild *child, *parent; int ret; + uint64_t cumulative_perms, cumulative_shared_perms; if (!bs->drv) { return -ENOMEDIUM; @@ -5970,6 +6537,13 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) } } + bdrv_get_cumulative_perm(bs, &cumulative_perms, + &cumulative_shared_perms); + if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + /* Our inactive parents still need write access. Inactivation failed. */ + return -EPERM; + } + bs->open_flags |= BDRV_O_INACTIVE; /* @@ -6277,9 +6851,13 @@ void bdrv_img_create(const char *filename, const char *fmt, } assert(full_backing); - /* backing files always opened read-only */ + /* + * No need to do I/O here, which allows us to open encrypted + * backing images without needing the secret + */ back_flags = flags; back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + back_flags |= BDRV_O_NO_IO; backing_options = qdict_new(); if (backing_fmt) { @@ -6295,24 +6873,11 @@ void bdrv_img_create(const char *filename, const char *fmt, goto out; } else { if (!backing_fmt) { - warn_report("Deprecated use of backing file without explicit " - "backing format (detected format of %s)", - bs->drv->format_name); - if (bs->drv != &bdrv_raw) { - /* - * A probe of raw deserves the most attention: - * leaving the backing format out of the image - * will ensure bs->probed is set (ensuring we - * don't accidentally commit into the backing - * file), and allow more spots to warn the users - * to fix their toolchain when opening this image - * later. For other images, we can safely record - * the format that we probed. - */ - backing_fmt = bs->drv->format_name; - qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt, - NULL); - } + error_setg(&local_err, + "Backing file specified without backing format"); + error_append_hint(&local_err, "Detected format of %s.", + bs->drv->format_name); + goto out; } if (size == -1) { /* Opened BS, have no size */ @@ -6329,9 +6894,9 @@ void bdrv_img_create(const char *filename, const char *fmt, } /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ } else if (backing_file && !backing_fmt) { - warn_report("Deprecated use of unopened backing file without " - "explicit backing format, use of this image requires " - "potentially unsafe format probing"); + error_setg(&local_err, + "Backing file specified without backing format"); + goto out; } if (size == -1) { @@ -7314,3 +7879,76 @@ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) { return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); } + +/** + * Check whether [offset, offset + bytes) overlaps with the cached + * block-status data region. + * + * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`, + * which is what bdrv_bsc_is_data()'s interface needs. + * Otherwise, *pnum is not touched. + */ +static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *pnum) +{ + BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache); + bool overlaps; + + overlaps = + qatomic_read(&bsc->valid) && + ranges_overlap(offset, bytes, bsc->data_start, + bsc->data_end - bsc->data_start); + + if (overlaps && pnum) { + *pnum = bsc->data_end - offset; + } + + return overlaps; +} + +/** + * See block_int.h for this function's documentation. + */ +bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) +{ + RCU_READ_LOCK_GUARD(); + + return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum); +} + +/** + * See block_int.h for this function's documentation. + */ +void bdrv_bsc_invalidate_range(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + RCU_READ_LOCK_GUARD(); + + if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) { + qatomic_set(&bs->block_status_cache->valid, false); + } +} + +/** + * See block_int.h for this function's documentation. + */ +void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1); + BdrvBlockStatusCache *old_bsc; + + *new_bsc = (BdrvBlockStatusCache) { + .valid = true, + .data_start = offset, + .data_end = offset + bytes, + }; + + QEMU_LOCK_GUARD(&bs->bsc_modify_lock); + + old_bsc = qatomic_rcu_read(&bs->block_status_cache); + qatomic_rcu_set(&bs->block_status_cache, new_bsc); + if (old_bsc) { + g_free_rcu(old_bsc, rcu); + } +} diff --git a/block/aio_task.c b/block/aio_task.c index 88989fa248c..9bd17ea2c13 100644 --- a/block/aio_task.c +++ b/block/aio_task.c @@ -98,6 +98,8 @@ AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks) { AioTaskPool *pool = g_new0(AioTaskPool, 1); + assert(max_busy_tasks > 0); + pool->main_co = qemu_coroutine_self(); pool->max_busy_tasks = max_busy_tasks; diff --git a/block/backup-top.c b/block/backup-top.c deleted file mode 100644 index 589e8b651d2..00000000000 --- a/block/backup-top.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * backup-top filter driver - * - * The driver performs Copy-Before-Write (CBW) operation: it is injected above - * some node, and before each write it copies _old_ data to the target node. - * - * Copyright (c) 2018-2019 Virtuozzo International GmbH. - * - * Author: - * Sementsov-Ogievskiy Vladimir - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "qemu/osdep.h" - -#include "sysemu/block-backend.h" -#include "qemu/cutils.h" -#include "qapi/error.h" -#include "block/block_int.h" -#include "block/qdict.h" -#include "block/block-copy.h" - -#include "block/backup-top.h" - -typedef struct BDRVBackupTopState { - BlockCopyState *bcs; - BdrvChild *target; - bool active; - int64_t cluster_size; -} BDRVBackupTopState; - -static coroutine_fn int backup_top_co_preadv( - BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) -{ - BDRVBackupTopState *s = bs->opaque; - - if (!s->active) { - return -EIO; - } - - return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); -} - -static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, BdrvRequestFlags flags) -{ - BDRVBackupTopState *s = bs->opaque; - uint64_t off, end; - - if (!s->active) { - return -EIO; - } - - if (flags & BDRV_REQ_WRITE_UNCHANGED) { - return 0; - } - - off = QEMU_ALIGN_DOWN(offset, s->cluster_size); - end = QEMU_ALIGN_UP(offset + bytes, s->cluster_size); - - return block_copy(s->bcs, off, end - off, true); -} - -static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) -{ - int ret = backup_top_cbw(bs, offset, bytes, 0); - if (ret < 0) { - return ret; - } - - return bdrv_co_pdiscard(bs->backing, offset, bytes); -} - -static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags) -{ - int ret = backup_top_cbw(bs, offset, bytes, flags); - if (ret < 0) { - return ret; - } - - return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); -} - -static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs, - uint64_t offset, - uint64_t bytes, - QEMUIOVector *qiov, int flags) -{ - int ret = backup_top_cbw(bs, offset, bytes, flags); - if (ret < 0) { - return ret; - } - - return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); -} - -static int coroutine_fn backup_top_co_flush(BlockDriverState *bs) -{ - if (!bs->backing) { - return 0; - } - - return bdrv_co_flush(bs->backing->bs); -} - -static void backup_top_refresh_filename(BlockDriverState *bs) -{ - if (bs->backing == NULL) { - /* - * we can be here after failed bdrv_attach_child in - * bdrv_set_backing_hd - */ - return; - } - pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), - bs->backing->bs->filename); -} - -static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared) -{ - BDRVBackupTopState *s = bs->opaque; - - if (!s->active) { - /* - * The filter node may be in process of bdrv_append(), which firstly do - * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that - * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So, - * let's require nothing during bdrv_append() and refresh permissions - * after it (see bdrv_backup_top_append()). - */ - *nperm = 0; - *nshared = BLK_PERM_ALL; - return; - } - - if (!(role & BDRV_CHILD_FILTERED)) { - /* - * Target child - * - * Share write to target (child_file), to not interfere - * with guest writes to its disk which may be in target backing chain. - * Can't resize during a backup block job because we check the size - * only upfront. - */ - *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; - *nperm = BLK_PERM_WRITE; - } else { - /* Source child */ - bdrv_default_perms(bs, c, role, reopen_queue, - perm, shared, nperm, nshared); - - if (perm & BLK_PERM_WRITE) { - *nperm = *nperm | BLK_PERM_CONSISTENT_READ; - } - *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); - } -} - -BlockDriver bdrv_backup_top_filter = { - .format_name = "backup-top", - .instance_size = sizeof(BDRVBackupTopState), - - .bdrv_co_preadv = backup_top_co_preadv, - .bdrv_co_pwritev = backup_top_co_pwritev, - .bdrv_co_pwrite_zeroes = backup_top_co_pwrite_zeroes, - .bdrv_co_pdiscard = backup_top_co_pdiscard, - .bdrv_co_flush = backup_top_co_flush, - - .bdrv_refresh_filename = backup_top_refresh_filename, - - .bdrv_child_perm = backup_top_child_perm, - - .is_filter = true, -}; - -BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, - BlockDriverState *target, - const char *filter_node_name, - uint64_t cluster_size, - BackupPerf *perf, - BdrvRequestFlags write_flags, - BlockCopyState **bcs, - Error **errp) -{ - ERRP_GUARD(); - int ret; - BDRVBackupTopState *state; - BlockDriverState *top; - bool appended = false; - - assert(source->total_sectors == target->total_sectors); - - top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name, - BDRV_O_RDWR, errp); - if (!top) { - return NULL; - } - - state = top->opaque; - top->total_sectors = source->total_sectors; - top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | - (BDRV_REQ_FUA & source->supported_write_flags); - top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & - source->supported_zero_flags); - - bdrv_ref(target); - state->target = bdrv_attach_child(top, target, "target", &child_of_bds, - BDRV_CHILD_DATA, errp); - if (!state->target) { - bdrv_unref(target); - bdrv_unref(top); - return NULL; - } - - bdrv_drained_begin(source); - - bdrv_ref(top); - ret = bdrv_append(top, source, errp); - if (ret < 0) { - error_prepend(errp, "Cannot append backup-top filter: "); - goto fail; - } - appended = true; - - /* - * bdrv_append() finished successfully, now we can require permissions - * we want. - */ - state->active = true; - ret = bdrv_child_refresh_perms(top, top->backing, errp); - if (ret < 0) { - error_prepend(errp, "Cannot set permissions for backup-top filter: "); - goto fail; - } - - state->cluster_size = cluster_size; - state->bcs = block_copy_state_new(top->backing, state->target, - cluster_size, perf->use_copy_range, - write_flags, errp); - if (!state->bcs) { - error_prepend(errp, "Cannot create block-copy-state: "); - goto fail; - } - *bcs = state->bcs; - - bdrv_drained_end(source); - - return top; - -fail: - if (appended) { - state->active = false; - bdrv_backup_top_drop(top); - } else { - bdrv_unref(top); - } - - bdrv_drained_end(source); - - return NULL; -} - -void bdrv_backup_top_drop(BlockDriverState *bs) -{ - BDRVBackupTopState *s = bs->opaque; - - bdrv_drained_begin(bs); - - block_copy_state_free(s->bcs); - - s->active = false; - bdrv_child_refresh_perms(bs, bs->backing, &error_abort); - bdrv_replace_node(bs, bs->backing->bs, &error_abort); - bdrv_set_backing_hd(bs, NULL, &error_abort); - - bdrv_drained_end(bs); - - bdrv_unref(bs); -} diff --git a/block/backup-top.h b/block/backup-top.h deleted file mode 100644 index b28b0031c45..00000000000 --- a/block/backup-top.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * backup-top filter driver - * - * The driver performs Copy-Before-Write (CBW) operation: it is injected above - * some node, and before each write it copies _old_ data to the target node. - * - * Copyright (c) 2018-2019 Virtuozzo International GmbH. - * - * Author: - * Sementsov-Ogievskiy Vladimir - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef BACKUP_TOP_H -#define BACKUP_TOP_H - -#include "block/block_int.h" -#include "block/block-copy.h" - -BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, - BlockDriverState *target, - const char *filter_node_name, - uint64_t cluster_size, - BackupPerf *perf, - BdrvRequestFlags write_flags, - BlockCopyState **bcs, - Error **errp); -void bdrv_backup_top_drop(BlockDriverState *bs); - -#endif /* BACKUP_TOP_H */ diff --git a/block/backup.c b/block/backup.c index 6cf2f974aa2..21d5983779e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -27,13 +27,11 @@ #include "qemu/bitmap.h" #include "qemu/error-report.h" -#include "block/backup-top.h" - -#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) +#include "block/copy-before-write.h" typedef struct BackupBlockJob { BlockJob common; - BlockDriverState *backup_top; + BlockDriverState *cbw; BlockDriverState *source_bs; BlockDriverState *target_bs; @@ -104,7 +102,7 @@ static void backup_clean(Job *job) { BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); block_job_remove_all_bdrv(&s->common); - bdrv_backup_top_drop(s->backup_top); + bdrv_cbw_drop(s->cbw); } void backup_do_checkpoint(BlockJob *job, Error **errp) @@ -235,18 +233,16 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job) BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs); if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) { + bdrv_clear_dirty_bitmap(bcs_bitmap, NULL); ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, NULL, true); assert(ret); - } else { - if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { - /* - * We can't hog the coroutine to initialize this thoroughly. - * Set a flag and resume work when we are able to yield safely. - */ - block_copy_set_skip_unallocated(job->bcs, true); - } - bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len); + } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { + /* + * We can't hog the coroutine to initialize this thoroughly. + * Set a flag and resume work when we are able to yield safely. + */ + block_copy_set_skip_unallocated(job->bcs, true); } estimate = bdrv_get_dirty_count(bcs_bitmap); @@ -331,11 +327,12 @@ static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed) } } -static void backup_cancel(Job *job) +static bool backup_cancel(Job *job, bool force) { BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); bdrv_cancel_in_flight(s->target_bs); + return true; } static const BlockJobDriver backup_job_driver = { @@ -354,43 +351,6 @@ static const BlockJobDriver backup_job_driver = { .set_speed = backup_set_speed, }; -static int64_t backup_calculate_cluster_size(BlockDriverState *target, - Error **errp) -{ - int ret; - BlockDriverInfo bdi; - bool target_does_cow = bdrv_backing_chain_next(target); - - /* - * If there is no backing file on the target, we cannot rely on COW if our - * backup cluster size is smaller than the target cluster size. Even for - * targets with a backing file, try to avoid COW if possible. - */ - ret = bdrv_get_info(target, &bdi); - if (ret == -ENOTSUP && !target_does_cow) { - /* Cluster size is not defined */ - warn_report("The target block device doesn't provide " - "information about the block size and it doesn't have a " - "backing file. The default block size of %u bytes is " - "used. If the actual block size of the target exceeds " - "this default, the backup may be unusable", - BACKUP_CLUSTER_SIZE_DEFAULT); - return BACKUP_CLUSTER_SIZE_DEFAULT; - } else if (ret < 0 && !target_does_cow) { - error_setg_errno(errp, -ret, - "Couldn't determine the cluster size of the target image, " - "which has no backing file"); - error_append_hint(errp, - "Aborting, since this may create an unusable destination image\n"); - return ret; - } else if (ret < 0 && target_does_cow) { - /* Not fatal; just trudge on ahead. */ - return BACKUP_CLUSTER_SIZE_DEFAULT; - } - - return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); -} - BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BlockDriverState *target, int64_t speed, MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, @@ -407,8 +367,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, int64_t len, target_len; BackupBlockJob *job = NULL; int64_t cluster_size; - BdrvRequestFlags write_flags; - BlockDriverState *backup_top = NULL; + BlockDriverState *cbw = NULL; BlockCopyState *bcs = NULL; assert(bs); @@ -449,13 +408,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, return NULL; } - cluster_size = backup_calculate_cluster_size(target, errp); - if (cluster_size < 0) { - goto error; - } - - if (perf->max_workers < 1) { - error_setg(errp, "max-workers must be greater than zero"); + if (perf->max_workers < 1 || perf->max_workers > INT_MAX) { + error_setg(errp, "max-workers must be between 1 and %d", INT_MAX); return NULL; } @@ -465,13 +419,6 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, return NULL; } - if (perf->max_chunk && perf->max_chunk < cluster_size) { - error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup " - "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size); - return NULL; - } - - if (sync_bitmap) { /* If we need to write to this bitmap, check that we can: */ if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && @@ -504,39 +451,28 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - /* - * If source is in backing chain of target assume that target is going to be - * used for "image fleecing", i.e. it should represent a kind of snapshot of - * source at backup-start point in time. And target is going to be read by - * somebody (for example, used as NBD export) during backup job. - * - * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid - * intersection of backup writes and third party reads from target, - * otherwise reading from target we may occasionally read already updated by - * guest data. - * - * For more information see commit f8d59dfb40bb and test - * tests/qemu-iotests/222 - */ - write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) | - (compress ? BDRV_REQ_WRITE_COMPRESSED : 0), + cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp); + if (!cbw) { + goto error; + } - backup_top = bdrv_backup_top_append(bs, target, filter_node_name, - cluster_size, perf, - write_flags, &bcs, errp); - if (!backup_top) { + cluster_size = block_copy_cluster_size(bcs); + + if (perf->max_chunk && perf->max_chunk < cluster_size) { + error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup " + "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size); goto error; } /* job->len is fixed, so we can't allow resize */ - job = block_job_create(job_id, &backup_job_driver, txn, backup_top, + job = block_job_create(job_id, &backup_job_driver, txn, cbw, 0, BLK_PERM_ALL, speed, creation_flags, cb, opaque, errp); if (!job) { goto error; } - job->backup_top = backup_top; + job->cbw = cbw; job->source_bs = bs; job->target_bs = target; job->on_source_error = on_source_error; @@ -549,10 +485,11 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, job->len = len; job->perf = *perf; + block_copy_set_copy_opts(bcs, perf->use_copy_range, compress); block_copy_set_progress_meter(bcs, &job->common.job.progress); block_copy_set_speed(bcs, speed); - /* Required permissions are already taken by backup-top target */ + /* Required permissions are taken by copy-before-write filter target */ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, &error_abort); @@ -562,8 +499,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, if (sync_bitmap) { bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL); } - if (backup_top) { - bdrv_backup_top_drop(backup_top); + if (cbw) { + bdrv_cbw_drop(cbw); } return NULL; diff --git a/block/blkdebug.c b/block/blkdebug.c index 2c0b9b0ee85..bbf29487030 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -38,25 +38,27 @@ #include "qapi/qobject-input-visitor.h" #include "sysemu/qtest.h" +/* All APIs are thread-safe */ + typedef struct BDRVBlkdebugState { - int state; - int new_state; + /* IN: initialized in blkdebug_open() and never changed */ uint64_t align; uint64_t max_transfer; uint64_t opt_write_zero; uint64_t max_write_zero; uint64_t opt_discard; uint64_t max_discard; - + char *config_file; /* For blkdebug_refresh_filename() */ + /* initialized in blkdebug_parse_perms() */ uint64_t take_child_perms; uint64_t unshare_child_perms; - /* For blkdebug_refresh_filename() */ - char *config_file; - + /* State. Protected by lock */ + int state; QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs; + QemuMutex lock; } BDRVBlkdebugState; typedef struct BlkdebugAIOCB { @@ -65,8 +67,11 @@ typedef struct BlkdebugAIOCB { } BlkdebugAIOCB; typedef struct BlkdebugSuspendedReq { + /* IN: initialized in suspend_request() */ Coroutine *co; char *tag; + + /* List entry protected BDRVBlkdebugState's lock */ QLIST_ENTRY(BlkdebugSuspendedReq) next; } BlkdebugSuspendedReq; @@ -74,9 +79,11 @@ enum { ACTION_INJECT_ERROR, ACTION_SET_STATE, ACTION_SUSPEND, + ACTION__MAX, }; typedef struct BlkdebugRule { + /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */ BlkdebugEvent event; int action; int state; @@ -95,6 +102,8 @@ typedef struct BlkdebugRule { char *tag; } suspend; } options; + + /* List entries protected BDRVBlkdebugState's lock */ QLIST_ENTRY(BlkdebugRule) next; QSIMPLEQ_ENTRY(BlkdebugRule) active_next; } BlkdebugRule; @@ -244,11 +253,14 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp) }; /* Add the rule */ + qemu_mutex_lock(&s->lock); QLIST_INSERT_HEAD(&s->rules[event], rule, next); + qemu_mutex_unlock(&s->lock); return 0; } +/* Called with lock held or from .bdrv_close */ static void remove_rule(BlkdebugRule *rule) { switch (rule->action) { @@ -467,6 +479,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, int ret; uint64_t align; + qemu_mutex_init(&s->lock); opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); if (!qemu_opts_absorb_qdict(opts, options, errp)) { ret = -EINVAL; @@ -567,6 +580,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, ret = 0; out: if (ret < 0) { + qemu_mutex_destroy(&s->lock); g_free(s->config_file); } qemu_opts_del(opts); @@ -581,6 +595,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int error; bool immediately; + qemu_mutex_lock(&s->lock); QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { uint64_t inject_offset = rule->options.inject.offset; @@ -594,6 +609,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } if (!rule || !rule->options.inject.error) { + qemu_mutex_unlock(&s->lock); return 0; } @@ -605,6 +621,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes, remove_rule(rule); } + qemu_mutex_unlock(&s->lock); if (!immediately) { aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self()); qemu_coroutine_yield(); @@ -614,8 +631,8 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } static int coroutine_fn -blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int err; @@ -635,8 +652,8 @@ blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } static int coroutine_fn -blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int err; @@ -667,7 +684,7 @@ static int blkdebug_co_flush(BlockDriverState *bs) } static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { uint32_t align = MAX(bs->bl.request_alignment, @@ -700,7 +717,7 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { uint32_t align = bs->bl.pdiscard_alignment; int err; @@ -770,78 +787,80 @@ static void blkdebug_close(BlockDriverState *bs) } g_free(s->config_file); + qemu_mutex_destroy(&s->lock); } +/* Called with lock held. */ static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) { BDRVBlkdebugState *s = bs->opaque; - BlkdebugSuspendedReq r; + BlkdebugSuspendedReq *r; - r = (BlkdebugSuspendedReq) { - .co = qemu_coroutine_self(), - .tag = g_strdup(rule->options.suspend.tag), - }; + r = g_new(BlkdebugSuspendedReq, 1); + + r->co = qemu_coroutine_self(); + r->tag = g_strdup(rule->options.suspend.tag); remove_rule(rule); - QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); + QLIST_INSERT_HEAD(&s->suspended_reqs, r, next); if (!qtest_enabled()) { - printf("blkdebug: Suspended request '%s'\n", r.tag); - } - qemu_coroutine_yield(); - if (!qtest_enabled()) { - printf("blkdebug: Resuming request '%s'\n", r.tag); + printf("blkdebug: Suspended request '%s'\n", r->tag); } - - QLIST_REMOVE(&r, next); - g_free(r.tag); } -static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, - bool injected) +/* Called with lock held. */ +static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, + int *action_count, int *new_state) { BDRVBlkdebugState *s = bs->opaque; /* Only process rules for the current state */ if (rule->state && rule->state != s->state) { - return injected; + return; } /* Take the action */ + action_count[rule->action]++; switch (rule->action) { case ACTION_INJECT_ERROR: - if (!injected) { + if (action_count[ACTION_INJECT_ERROR] == 1) { QSIMPLEQ_INIT(&s->active_rules); - injected = true; } QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next); break; case ACTION_SET_STATE: - s->new_state = rule->options.set_state.new_state; + *new_state = rule->options.set_state.new_state; break; case ACTION_SUSPEND: suspend_request(bs, rule); break; } - return injected; } static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event) { BDRVBlkdebugState *s = bs->opaque; struct BlkdebugRule *rule, *next; - bool injected; + int new_state; + int actions_count[ACTION__MAX] = { 0 }; assert((int)event >= 0 && event < BLKDBG__MAX); - injected = false; - s->new_state = s->state; - QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) { - injected = process_rule(bs, rule, injected); + WITH_QEMU_LOCK_GUARD(&s->lock) { + new_state = s->state; + QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) { + process_rule(bs, rule, actions_count, &new_state); + } + s->state = new_state; + } + + while (actions_count[ACTION_SUSPEND] > 0) { + qemu_coroutine_yield(); + actions_count[ACTION_SUSPEND]--; } - s->state = s->new_state; } static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, @@ -864,33 +883,64 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, .options.suspend.tag = g_strdup(tag), }; + qemu_mutex_lock(&s->lock); QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next); + qemu_mutex_unlock(&s->lock); return 0; } -static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) +/* Called with lock held. May temporarily release lock. */ +static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all) { - BDRVBlkdebugState *s = bs->opaque; - BlkdebugSuspendedReq *r, *next; + BlkdebugSuspendedReq *r; - QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) { +retry: + /* + * No need for _SAFE, since a different coroutine can remove another node + * (not the current one) in this list, and when the current one is removed + * the iteration starts back from beginning anyways. + */ + QLIST_FOREACH(r, &s->suspended_reqs, next) { if (!strcmp(r->tag, tag)) { - qemu_coroutine_enter(r->co); + Coroutine *co = r->co; + + if (!qtest_enabled()) { + printf("blkdebug: Resuming request '%s'\n", r->tag); + } + + QLIST_REMOVE(r, next); + g_free(r->tag); + g_free(r); + + qemu_mutex_unlock(&s->lock); + qemu_coroutine_enter(co); + qemu_mutex_lock(&s->lock); + + if (all) { + goto retry; + } return 0; } } return -ENOENT; } +static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + QEMU_LOCK_GUARD(&s->lock); + return resume_req_by_tag(s, tag, false); +} + static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) { BDRVBlkdebugState *s = bs->opaque; - BlkdebugSuspendedReq *r, *r_next; BlkdebugRule *rule, *next; int i, ret = -ENOENT; + QEMU_LOCK_GUARD(&s->lock); for (i = 0; i < BLKDBG__MAX; i++) { QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { if (rule->action == ACTION_SUSPEND && @@ -900,11 +950,8 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs, } } } - QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) { - if (!strcmp(r->tag, tag)) { - qemu_coroutine_enter(r->co); - ret = 0; - } + if (resume_req_by_tag(s, tag, true) == 0) { + ret = 0; } return ret; } @@ -914,6 +961,7 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) BDRVBlkdebugState *s = bs->opaque; BlkdebugSuspendedReq *r; + QEMU_LOCK_GUARD(&s->lock); QLIST_FOREACH(r, &s->suspended_reqs, next) { if (!strcmp(r->tag, tag)) { return true; diff --git a/block/blklogwrites.c b/block/blklogwrites.c index b7579370a30..f7a251e91f9 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -301,8 +301,8 @@ static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp) } static int coroutine_fn -blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +blk_log_writes_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } @@ -460,16 +460,16 @@ blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr) } static int coroutine_fn -blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { return blk_log_writes_co_log(bs, offset, bytes, qiov, flags, blk_log_writes_co_do_file_pwritev, 0, false); } static int coroutine_fn -blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, - BdrvRequestFlags flags) +blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int64_t bytes, BdrvRequestFlags flags) { return blk_log_writes_co_log(bs, offset, bytes, NULL, flags, blk_log_writes_co_do_file_pwrite_zeroes, 0, @@ -484,9 +484,9 @@ static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs) } static int coroutine_fn -blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { - return blk_log_writes_co_log(bs, offset, count, NULL, 0, + return blk_log_writes_co_log(bs, offset, bytes, NULL, 0, blk_log_writes_co_do_file_pdiscard, LOG_DISCARD_FLAG, false); } diff --git a/block/blkreplay.c b/block/blkreplay.c index 4a247752fd8..dcbe780ddbd 100644 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -72,7 +72,7 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs, } static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { uint64_t reqid = blkreplay_next_id(); int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); @@ -83,7 +83,7 @@ static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs, } static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { uint64_t reqid = blkreplay_next_id(); int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); @@ -94,7 +94,7 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs, } static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags) + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { uint64_t reqid = blkreplay_next_id(); int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); @@ -105,7 +105,7 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { uint64_t reqid = blkreplay_next_id(); int ret = bdrv_co_pdiscard(bs->file, offset, bytes); diff --git a/block/blkverify.c b/block/blkverify.c index 188d7632fae..d1facf5ba90 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -221,8 +221,8 @@ blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset, } static int coroutine_fn -blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BlkverifyRequest r; QEMUIOVector raw_qiov; @@ -250,8 +250,8 @@ blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } static int coroutine_fn -blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +blkverify_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BlkverifyRequest r; return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true); diff --git a/block/block-backend.c b/block/block-backend.c index 413af51f3ba..12ef80ea170 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -14,11 +14,11 @@ #include "sysemu/block-backend.h" #include "block/block_int.h" #include "block/blockjob.h" +#include "block/coroutines.h" #include "block/throttle-groups.h" #include "hw/qdev-core.h" #include "sysemu/blockdev.h" #include "sysemu/runstate.h" -#include "sysemu/sysemu.h" #include "sysemu/replay.h" #include "qapi/error.h" #include "qapi/qapi-events-block.h" @@ -142,19 +142,18 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, static char *blk_root_get_parent_desc(BdrvChild *child) { BlockBackend *blk = child->opaque; - char *dev_id; + g_autofree char *dev_id = NULL; if (blk->name) { - return g_strdup(blk->name); + return g_strdup_printf("block device '%s'", blk->name); } dev_id = blk_get_attached_dev_id(blk); if (*dev_id) { - return dev_id; + return g_strdup_printf("block device '%s'", dev_id); } else { /* TODO Callback into the BB owner for something more detailed */ - g_free(dev_id); - return g_strdup("a block device"); + return g_strdup("an unnamed block device"); } } @@ -298,6 +297,13 @@ static void blk_root_detach(BdrvChild *child) } } +static AioContext *blk_root_get_parent_aio_context(BdrvChild *c) +{ + BlockBackend *blk = c->opaque; + + return blk_get_aio_context(blk); +} + static const BdrvChildClass child_root = { .inherit_options = blk_root_inherit_options, @@ -318,6 +324,8 @@ static const BdrvChildClass child_root = { .can_set_aio_ctx = blk_root_can_set_aio_ctx, .set_aio_ctx = blk_root_set_aio_ctx, + + .get_parent_aio_context = blk_root_get_parent_aio_context, }; /* @@ -398,15 +406,19 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, BlockBackend *blk; BlockDriverState *bs; uint64_t perm = 0; + uint64_t shared = BLK_PERM_ALL; - /* blk_new_open() is mainly used in .bdrv_create implementations and the - * tools where sharing isn't a concern because the BDS stays private, so we - * just request permission according to the flags. + /* + * blk_new_open() is mainly used in .bdrv_create implementations and the + * tools where sharing isn't a major concern because the BDS stays private + * and the file is generally not supposed to be used by a second process, + * so we just request permission according to the flags. * * The exceptions are xen_disk and blockdev_init(); in these cases, the * caller of blk_new_open() doesn't make use of the permissions, but they * shouldn't hurt either. We can still share everything here because the - * guest devices will add their own blockers if they can't share. */ + * guest devices will add their own blockers if they can't share. + */ if ((flags & BDRV_O_NO_IO) == 0) { perm |= BLK_PERM_CONSISTENT_READ; if (flags & BDRV_O_RDWR) { @@ -416,8 +428,11 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, if (flags & BDRV_O_RESIZE) { perm |= BLK_PERM_RESIZE; } + if (flags & BDRV_O_NO_SHARE) { + shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; + } - blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), perm, shared); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); @@ -426,7 +441,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, blk->root = bdrv_root_attach_child(bs, "root", &child_root, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - blk->ctx, perm, BLK_PERM_ALL, blk, errp); + perm, shared, blk, errp); if (!blk->root) { blk_unref(blk); return NULL; @@ -840,7 +855,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) bdrv_ref(bs); blk->root = bdrv_root_attach_child(bs, "root", &child_root, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - blk->ctx, blk->perm, blk->shared_perm, + blk->perm, blk->shared_perm, blk, errp); if (blk->root == NULL) { return -EPERM; @@ -855,6 +870,14 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) return 0; } +/* + * Change BlockDriverState associated with @blk. + */ +int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp) +{ + return bdrv_replace_child_bs(blk->root, new_bs, errp); +} + /* * Sets the permission bitmasks that the user of the BlockBackend needs. */ @@ -1139,11 +1162,11 @@ void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) } static int blk_check_byte_request(BlockBackend *blk, int64_t offset, - size_t size) + int64_t bytes) { int64_t len; - if (size > INT_MAX) { + if (bytes < 0) { return -EIO; } @@ -1161,7 +1184,7 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return len; } - if (offset > len || len - offset < size) { + if (offset > len || len - offset < bytes) { return -EIO; } } @@ -1182,9 +1205,9 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ -static int coroutine_fn -blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags) +int coroutine_fn +blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; BlockDriverState *bs; @@ -1214,23 +1237,23 @@ blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, } int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, + int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; blk_inc_in_flight(blk); - ret = blk_do_preadv(blk, offset, bytes, qiov, flags); + ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags); blk_dec_in_flight(blk); return ret; } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ -static int coroutine_fn -blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags) +int coroutine_fn +blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) { int ret; BlockDriverState *bs; @@ -1264,26 +1287,40 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, } int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - unsigned int bytes, + int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags) { int ret; blk_inc_in_flight(blk); - ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); + ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); blk_dec_in_flight(blk); return ret; } int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, + int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); } +static int coroutine_fn blk_pwritev_part(BlockBackend *blk, int64_t offset, + int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); + blk_dec_in_flight(blk); + + return ret; +} + typedef struct BlkRwCo { BlockBackend *blk; int64_t offset; @@ -1292,58 +1329,11 @@ typedef struct BlkRwCo { BdrvRequestFlags flags; } BlkRwCo; -static void blk_read_entry(void *opaque) -{ - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - - rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, - qiov, rwco->flags); - aio_wait_kick(); -} - -static void blk_write_entry(void *opaque) -{ - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - - rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, - qiov, 0, rwco->flags); - aio_wait_kick(); -} - -static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - int64_t bytes, CoroutineEntry co_entry, - BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - BlkRwCo rwco = { - .blk = blk, - .offset = offset, - .iobuf = &qiov, - .flags = flags, - .ret = NOT_DONE, - }; - - blk_inc_in_flight(blk); - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - co_entry(&rwco); - } else { - Coroutine *co = qemu_coroutine_create(co_entry, &rwco); - bdrv_coroutine_enter(blk_bs(blk), co); - BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); - } - blk_dec_in_flight(blk); - - return rwco.ret; -} - int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int bytes, BdrvRequestFlags flags) + int64_t bytes, BdrvRequestFlags flags) { - return blk_prw(blk, offset, NULL, bytes, blk_write_entry, - flags | BDRV_REQ_ZERO_WRITE); + return blk_pwritev_part(blk, offset, bytes, NULL, 0, + flags | BDRV_REQ_ZERO_WRITE); } int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) @@ -1390,7 +1380,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, typedef struct BlkAioEmAIOCB { BlockAIOCB common; BlkRwCo rwco; - int bytes; + int64_t bytes; bool has_returned; } BlkAioEmAIOCB; @@ -1422,7 +1412,8 @@ static void blk_aio_complete_bh(void *opaque) blk_aio_complete(acb); } -static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, +static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, + int64_t bytes, void *iobuf, CoroutineEntry co_entry, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) @@ -1461,8 +1452,8 @@ static void blk_aio_read_entry(void *opaque) QEMUIOVector *qiov = rwco->iobuf; assert(qiov->size == acb->bytes); - rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); + rwco->ret = blk_co_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); blk_aio_complete(acb); } @@ -1473,37 +1464,40 @@ static void blk_aio_write_entry(void *opaque) QEMUIOVector *qiov = rwco->iobuf; assert(!qiov || qiov->size == acb->bytes); - rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, - qiov, 0, rwco->flags); + rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, + qiov, 0, rwco->flags); blk_aio_complete(acb); } BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags, + int64_t bytes, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { - return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry, + return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE, cb, opaque); } -int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) +int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes) { - int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0); - if (ret < 0) { - return ret; - } - return count; + int ret; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + blk_inc_in_flight(blk); + ret = blk_do_preadv(blk, offset, bytes, &qiov, 0); + blk_dec_in_flight(blk); + + return ret < 0 ? ret : bytes; } -int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, BdrvRequestFlags flags) { - int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry, - flags); - if (ret < 0) { - return ret; - } - return count; + int ret; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + + ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags); + + return ret < 0 ? ret : bytes; } int64_t blk_getlength(BlockBackend *blk) @@ -1537,6 +1531,7 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_read_entry, flags, cb, opaque); } @@ -1545,6 +1540,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_write_entry, flags, cb, opaque); } @@ -1560,8 +1556,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb) } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ -static int coroutine_fn -blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) +int coroutine_fn +blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { blk_wait_while_drained(blk); @@ -1572,18 +1568,15 @@ blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) return bdrv_co_ioctl(blk_bs(blk), req, buf); } -static void blk_ioctl_entry(void *opaque) +int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; + int ret; - rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); - aio_wait_kick(); -} + blk_inc_in_flight(blk); + ret = blk_do_ioctl(blk, req, buf); + blk_dec_in_flight(blk); -int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) -{ - return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0); + return ret; } static void blk_aio_ioctl_entry(void *opaque) @@ -1591,7 +1584,7 @@ static void blk_aio_ioctl_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); blk_aio_complete(acb); } @@ -1603,8 +1596,8 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ -static int coroutine_fn -blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +int coroutine_fn +blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; @@ -1623,45 +1616,43 @@ static void blk_aio_pdiscard_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); + rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); blk_aio_complete(acb); } BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, - int64_t offset, int bytes, + int64_t offset, int64_t bytes, BlockCompletionFunc *cb, void *opaque) { return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, cb, opaque); } -int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes) { int ret; blk_inc_in_flight(blk); - ret = blk_do_pdiscard(blk, offset, bytes); + ret = blk_co_do_pdiscard(blk, offset, bytes); blk_dec_in_flight(blk); return ret; } -static void blk_pdiscard_entry(void *opaque) +int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; + int ret; - rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); - aio_wait_kick(); -} + blk_inc_in_flight(blk); + ret = blk_do_pdiscard(blk, offset, bytes); + blk_dec_in_flight(blk); -int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -{ - return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); + return ret; } /* To be called between exactly one pair of blk_inc/dec_in_flight() */ -static int coroutine_fn blk_do_flush(BlockBackend *blk) +int coroutine_fn blk_co_do_flush(BlockBackend *blk) { blk_wait_while_drained(blk); @@ -1677,7 +1668,7 @@ static void blk_aio_flush_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_do_flush(rwco->blk); + rwco->ret = blk_co_do_flush(rwco->blk); blk_aio_complete(acb); } @@ -1692,22 +1683,21 @@ int coroutine_fn blk_co_flush(BlockBackend *blk) int ret; blk_inc_in_flight(blk); - ret = blk_do_flush(blk); + ret = blk_co_do_flush(blk); blk_dec_in_flight(blk); return ret; } -static void blk_flush_entry(void *opaque) -{ - BlkRwCo *rwco = opaque; - rwco->ret = blk_do_flush(rwco->blk); - aio_wait_kick(); -} - int blk_flush(BlockBackend *blk) { - return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0); + int ret; + + blk_inc_in_flight(blk); + ret = blk_do_flush(blk); + blk_dec_in_flight(blk); + + return ret; } void blk_drain(BlockBackend *blk) @@ -1837,7 +1827,7 @@ bool blk_supports_write_perm(BlockBackend *blk) if (bs) { return !bdrv_is_read_only(bs); } else { - return !blk->root_state.read_only; + return blk->root_state.open_flags & BDRV_O_RDWR; } } @@ -1939,16 +1929,35 @@ uint32_t blk_get_request_alignment(BlockBackend *blk) return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; } +/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */ +uint64_t blk_get_max_hw_transfer(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + uint64_t max = INT_MAX; + + if (bs) { + max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer); + max = MIN_NON_ZERO(max, bs->bl.max_transfer); + } + return ROUND_DOWN(max, blk_get_request_alignment(blk)); +} + /* Returns the maximum transfer length, in bytes; guaranteed nonzero */ uint32_t blk_get_max_transfer(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); - uint32_t max = 0; + uint32_t max = INT_MAX; if (bs) { - max = bs->bl.max_transfer; + max = MIN_NON_ZERO(max, bs->bl.max_transfer); } - return MIN_NON_ZERO(max, INT_MAX); + return ROUND_DOWN(max, blk_get_request_alignment(blk)); +} + +int blk_get_max_hw_iov(BlockBackend *blk) +{ + return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov, + blk->root->bs->bl.max_iov); } int blk_get_max_iov(BlockBackend *blk) @@ -2173,17 +2182,18 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, } int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int bytes, BdrvRequestFlags flags) + int64_t bytes, BdrvRequestFlags flags) { return blk_co_pwritev(blk, offset, bytes, NULL, flags | BDRV_REQ_ZERO_WRITE); } int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - int count) + int64_t bytes) { - return blk_prw(blk, offset, (void *) buf, count, blk_write_entry, - BDRV_REQ_WRITE_COMPRESSED); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + return blk_pwritev_part(blk, offset, bytes, &qiov, 0, + BDRV_REQ_WRITE_COMPRESSED); } int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, @@ -2254,7 +2264,6 @@ void blk_update_root_state(BlockBackend *blk) assert(blk->root); blk->root_state.open_flags = blk->root->bs->open_flags; - blk->root_state.read_only = blk->root->bs->read_only; blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; } @@ -2273,12 +2282,7 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) */ int blk_get_open_flags_from_root_state(BlockBackend *blk) { - int bs_flags; - - bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; - bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; - - return bs_flags; + return blk->root_state.open_flags; } BlockBackendRootState *blk_get_root_state(BlockBackend *blk) @@ -2378,8 +2382,13 @@ static void blk_root_drained_begin(BdrvChild *child) static bool blk_root_drained_poll(BdrvChild *child) { BlockBackend *blk = child->opaque; + bool busy = false; assert(blk->quiesce_counter); - return !!blk->in_flight; + + if (blk->dev_ops && blk->dev_ops->drained_poll) { + busy = blk->dev_ops->drained_poll(blk->dev_opaque); + } + return busy || !!blk->in_flight; } static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) @@ -2412,7 +2421,7 @@ void blk_unregister_buf(BlockBackend *blk, void *host) int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, BlockBackend *blk_out, int64_t off_out, - int bytes, BdrvRequestFlags read_flags, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { int r; diff --git a/block/block-copy.c b/block/block-copy.c index 39ae481c8b4..ce116318b57 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -21,17 +21,27 @@ #include "qemu/units.h" #include "qemu/coroutine.h" #include "block/aio_task.h" +#include "qemu/error-report.h" #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB) #define BLOCK_COPY_MAX_BUFFER (1 * MiB) #define BLOCK_COPY_MAX_MEM (128 * MiB) #define BLOCK_COPY_MAX_WORKERS 64 #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */ +#define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16) + +typedef enum { + COPY_READ_WRITE_CLUSTER, + COPY_READ_WRITE, + COPY_WRITE_ZEROES, + COPY_RANGE_SMALL, + COPY_RANGE_FULL +} BlockCopyMethod; static coroutine_fn int block_copy_task_entry(AioTask *task); typedef struct BlockCopyCallState { - /* IN parameters. Initialized in block_copy_async() and never changed. */ + /* Fields initialized in block_copy_async() and never changed. */ BlockCopyState *s; int64_t offset; int64_t bytes; @@ -40,33 +50,60 @@ typedef struct BlockCopyCallState { bool ignore_ratelimit; BlockCopyAsyncCallbackFunc cb; void *cb_opaque; - /* Coroutine where async block-copy is running */ Coroutine *co; + /* Fields whose state changes throughout the execution */ + bool finished; /* atomic */ + QemuCoSleep sleep; /* TODO: protect API with a lock */ + bool cancelled; /* atomic */ /* To reference all call states from BlockCopyState */ QLIST_ENTRY(BlockCopyCallState) list; - /* State */ - int ret; - bool finished; - QemuCoSleepState *sleep_state; - bool cancelled; - - /* OUT parameters */ + /* + * Fields that report information about return values and erros. + * Protected by lock in BlockCopyState. + */ bool error_is_read; + /* + * @ret is set concurrently by tasks under mutex. Only set once by first + * failed task (and untouched if no task failed). + * After finishing (call_state->finished is true), it is not modified + * anymore and may be safely read without mutex. + */ + int ret; } BlockCopyCallState; typedef struct BlockCopyTask { AioTask task; + /* + * Fields initialized in block_copy_task_create() + * and never changed. + */ BlockCopyState *s; BlockCopyCallState *call_state; int64_t offset; + /* + * @method can also be set again in the while loop of + * block_copy_dirty_clusters(), but it is never accessed concurrently + * because the only other function that reads it is + * block_copy_task_entry() and it is invoked afterwards in the same + * iteration. + */ + BlockCopyMethod method; + + /* + * Fields whose state changes throughout the execution + * Protected by lock in BlockCopyState. + */ + CoQueue wait_queue; /* coroutines blocked on this task */ + /* + * Only protect the case of parallel read while updating @bytes + * value in block_copy_task_shrink(). + */ int64_t bytes; - bool zeroes; QLIST_ENTRY(BlockCopyTask) list; - CoQueue wait_queue; /* coroutines blocked on this task */ } BlockCopyTask; static int64_t task_end(BlockCopyTask *task) @@ -82,17 +119,25 @@ typedef struct BlockCopyState { */ BdrvChild *source; BdrvChild *target; - BdrvDirtyBitmap *copy_bitmap; - int64_t in_flight_bytes; + + /* + * Fields initialized in block_copy_state_new() + * and never changed. + */ int64_t cluster_size; - bool use_copy_range; - int64_t copy_size; + int64_t max_transfer; uint64_t len; - QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */ - QLIST_HEAD(, BlockCopyCallState) calls; - BdrvRequestFlags write_flags; + /* + * Fields whose state changes throughout the execution + * Protected by lock. + */ + CoMutex lock; + int64_t in_flight_bytes; + BlockCopyMethod method; + QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */ + QLIST_HEAD(, BlockCopyCallState) calls; /* * skip_unallocated: * @@ -107,16 +152,15 @@ typedef struct BlockCopyState { * skip unallocated regions, clear them in the copy_bitmap, and invoke * block_copy_reset_unallocated() every time it does. */ - bool skip_unallocated; - + bool skip_unallocated; /* atomic */ + /* State fields that use a thread-safe API */ + BdrvDirtyBitmap *copy_bitmap; ProgressMeter *progress; - SharedResource *mem; - - uint64_t speed; RateLimit rate_limit; } BlockCopyState; +/* Called with lock held */ static BlockCopyTask *find_conflicting_task(BlockCopyState *s, int64_t offset, int64_t bytes) { @@ -134,6 +178,9 @@ static BlockCopyTask *find_conflicting_task(BlockCopyState *s, /* * If there are no intersecting tasks return false. Otherwise, wait for the * first found intersecting tasks to finish and return true. + * + * Called with lock held. May temporary release the lock. + * Return value of 0 proves that lock was NOT released. */ static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset, int64_t bytes) @@ -144,22 +191,43 @@ static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset, return false; } - qemu_co_queue_wait(&task->wait_queue, NULL); + qemu_co_queue_wait(&task->wait_queue, &s->lock); return true; } +/* Called with lock held */ +static int64_t block_copy_chunk_size(BlockCopyState *s) +{ + switch (s->method) { + case COPY_READ_WRITE_CLUSTER: + return s->cluster_size; + case COPY_READ_WRITE: + case COPY_RANGE_SMALL: + return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER), + s->max_transfer); + case COPY_RANGE_FULL: + return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), + s->max_transfer); + default: + /* Cannot have COPY_WRITE_ZEROES here. */ + abort(); + } +} + /* * Search for the first dirty area in offset/bytes range and create task at * the beginning of it. */ -static BlockCopyTask *block_copy_task_create(BlockCopyState *s, - BlockCopyCallState *call_state, - int64_t offset, int64_t bytes) +static coroutine_fn BlockCopyTask * +block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state, + int64_t offset, int64_t bytes) { BlockCopyTask *task; - int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk); + int64_t max_chunk; + QEMU_LOCK_GUARD(&s->lock); + max_chunk = MIN_NON_ZERO(block_copy_chunk_size(s), call_state->max_chunk); if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap, offset, offset + bytes, max_chunk, &offset, &bytes)) @@ -183,6 +251,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s, .call_state = call_state, .offset = offset, .bytes = bytes, + .method = s->method, }; qemu_co_queue_init(&task->wait_queue); QLIST_INSERT_HEAD(&s->tasks, task, list); @@ -200,6 +269,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s, static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task, int64_t new_bytes) { + QEMU_LOCK_GUARD(&task->s->lock); if (new_bytes == task->bytes) { return; } @@ -216,11 +286,17 @@ static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task, static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret) { + QEMU_LOCK_GUARD(&task->s->lock); task->s->in_flight_bytes -= task->bytes; if (ret < 0) { bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes); } QLIST_REMOVE(task, list); + if (task->s->progress) { + progress_set_remaining(task->s->progress, + bdrv_get_dirty_count(task->s->copy_bitmap) + + task->s->in_flight_bytes); + } qemu_co_queue_restart_all(&task->wait_queue); } @@ -230,6 +306,7 @@ void block_copy_state_free(BlockCopyState *s) return; } + ratelimit_destroy(&s->rate_limit); bdrv_release_dirty_bitmap(s->copy_bitmap); shres_destroy(s->mem); g_free(s); @@ -242,12 +319,82 @@ static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target) target->bs->bl.max_transfer)); } +void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range, + bool compress) +{ + /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */ + s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) | + (compress ? BDRV_REQ_WRITE_COMPRESSED : 0); + + if (s->max_transfer < s->cluster_size) { + /* + * copy_range does not respect max_transfer. We don't want to bother + * with requests smaller than block-copy cluster size, so fallback to + * buffered copying (read and write respect max_transfer on their + * behalf). + */ + s->method = COPY_READ_WRITE_CLUSTER; + } else if (compress) { + /* Compression supports only cluster-size writes and no copy-range. */ + s->method = COPY_READ_WRITE_CLUSTER; + } else { + /* + * If copy range enabled, start with COPY_RANGE_SMALL, until first + * successful copy_range (look at block_copy_do_copy). + */ + s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE; + } +} + +static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, + Error **errp) +{ + int ret; + BlockDriverInfo bdi; + bool target_does_cow = bdrv_backing_chain_next(target); + + /* + * If there is no backing file on the target, we cannot rely on COW if our + * backup cluster size is smaller than the target cluster size. Even for + * targets with a backing file, try to avoid COW if possible. + */ + ret = bdrv_get_info(target, &bdi); + if (ret == -ENOTSUP && !target_does_cow) { + /* Cluster size is not defined */ + warn_report("The target block device doesn't provide " + "information about the block size and it doesn't have a " + "backing file. The default block size of %u bytes is " + "used. If the actual block size of the target exceeds " + "this default, the backup may be unusable", + BLOCK_COPY_CLUSTER_SIZE_DEFAULT); + return BLOCK_COPY_CLUSTER_SIZE_DEFAULT; + } else if (ret < 0 && !target_does_cow) { + error_setg_errno(errp, -ret, + "Couldn't determine the cluster size of the target image, " + "which has no backing file"); + error_append_hint(errp, + "Aborting, since this may create an unusable destination image\n"); + return ret; + } else if (ret < 0 && target_does_cow) { + /* Not fatal; just trudge on ahead. */ + return BLOCK_COPY_CLUSTER_SIZE_DEFAULT; + } + + return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); +} + BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, - int64_t cluster_size, bool use_copy_range, - BdrvRequestFlags write_flags, Error **errp) + Error **errp) { BlockCopyState *s; + int64_t cluster_size; BdrvDirtyBitmap *copy_bitmap; + bool is_fleecing; + + cluster_size = block_copy_calculate_cluster_size(target->bs, errp); + if (cluster_size < 0) { + return NULL; + } copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL, errp); @@ -256,6 +403,22 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, } bdrv_disable_dirty_bitmap(copy_bitmap); + /* + * If source is in backing chain of target assume that target is going to be + * used for "image fleecing", i.e. it should represent a kind of snapshot of + * source at backup-start point in time. And target is going to be read by + * somebody (for example, used as NBD export) during backup job. + * + * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid + * intersection of backup writes and third party reads from target, + * otherwise reading from target we may occasionally read already updated by + * guest data. + * + * For more information see commit f8d59dfb40bb and test + * tests/qemu-iotests/222 + */ + is_fleecing = bdrv_chain_contains(target->bs, source->bs); + s = g_new(BlockCopyState, 1); *s = (BlockCopyState) { .source = source, @@ -263,38 +426,24 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, .copy_bitmap = copy_bitmap, .cluster_size = cluster_size, .len = bdrv_dirty_bitmap_size(copy_bitmap), - .write_flags = write_flags, + .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0), .mem = shres_create(BLOCK_COPY_MAX_MEM), + .max_transfer = QEMU_ALIGN_DOWN( + block_copy_max_transfer(source, target), + cluster_size), }; - if (block_copy_max_transfer(source, target) < cluster_size) { - /* - * copy_range does not respect max_transfer. We don't want to bother - * with requests smaller than block-copy cluster size, so fallback to - * buffered copying (read and write respect max_transfer on their - * behalf). - */ - s->use_copy_range = false; - s->copy_size = cluster_size; - } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) { - /* Compression supports only cluster-size writes and no copy-range. */ - s->use_copy_range = false; - s->copy_size = cluster_size; - } else { - /* - * We enable copy-range, but keep small copy_size, until first - * successful copy_range (look at block_copy_do_copy). - */ - s->use_copy_range = use_copy_range; - s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); - } + block_copy_set_copy_opts(s, false, false); + ratelimit_init(&s->rate_limit); + qemu_co_mutex_init(&s->lock); QLIST_INIT(&s->tasks); QLIST_INIT(&s->calls); return s; } +/* Only set before running the job, no need for locking. */ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm) { s->progress = pm; @@ -340,11 +489,15 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool, * * No sync here: nor bitmap neighter intersecting requests handling, only copy. * + * @method is an in-out argument, so that copy_range can be either extended to + * a full-size buffer or disabled if the copy_range attempt fails. The output + * value of @method should be used for subsequent tasks. * Returns 0 on success. */ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes, - bool zeroes, bool *error_is_read) + BlockCopyMethod *method, + bool *error_is_read) { int ret; int64_t nbytes = MIN(offset + bytes, s->len) - offset; @@ -358,7 +511,8 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size)); assert(nbytes < INT_MAX); - if (zeroes) { + switch (*method) { + case COPY_WRITE_ZEROES: ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags & ~BDRV_REQ_WRITE_COMPRESSED); if (ret < 0) { @@ -366,84 +520,86 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, *error_is_read = false; } return ret; - } - if (s->use_copy_range) { + case COPY_RANGE_SMALL: + case COPY_RANGE_FULL: ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes, 0, s->write_flags); + if (ret >= 0) { + /* Successful copy-range, increase chunk size. */ + *method = COPY_RANGE_FULL; + return 0; + } + + trace_block_copy_copy_range_fail(s, offset, ret); + *method = COPY_READ_WRITE; + /* Fall through to read+write with allocated buffer */ + + case COPY_READ_WRITE_CLUSTER: + case COPY_READ_WRITE: + /* + * In case of failed copy_range request above, we may proceed with + * buffered request larger than BLOCK_COPY_MAX_BUFFER. + * Still, further requests will be properly limited, so don't care too + * much. Moreover the most likely case (copy_range is unsupported for + * the configuration, so the very first copy_range request fails) + * is handled by setting large copy_size only after first successful + * copy_range. + */ + + bounce_buffer = qemu_blockalign(s->source->bs, nbytes); + + ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0); if (ret < 0) { - trace_block_copy_copy_range_fail(s, offset, ret); - s->use_copy_range = false; - s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); - /* Fallback to read+write with allocated buffer */ - } else { - if (s->use_copy_range) { - /* - * Successful copy-range. Now increase copy_size. copy_range - * does not respect max_transfer (it's a TODO), so we factor - * that in here. - * - * Note: we double-check s->use_copy_range for the case when - * parallel block-copy request unsets it during previous - * bdrv_co_copy_range call. - */ - s->copy_size = - MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), - QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, - s->target), - s->cluster_size)); - } + trace_block_copy_read_fail(s, offset, ret); + *error_is_read = true; goto out; } - } - /* - * In case of failed copy_range request above, we may proceed with buffered - * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will - * be properly limited, so don't care too much. Moreover the most likely - * case (copy_range is unsupported for the configuration, so the very first - * copy_range request fails) is handled by setting large copy_size only - * after first successful copy_range. - */ + ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer, + s->write_flags); + if (ret < 0) { + trace_block_copy_write_fail(s, offset, ret); + *error_is_read = false; + goto out; + } - bounce_buffer = qemu_blockalign(s->source->bs, nbytes); + out: + qemu_vfree(bounce_buffer); + break; - ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0); - if (ret < 0) { - trace_block_copy_read_fail(s, offset, ret); - *error_is_read = true; - goto out; + default: + abort(); } - ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer, - s->write_flags); - if (ret < 0) { - trace_block_copy_write_fail(s, offset, ret); - *error_is_read = false; - goto out; - } - -out: - qemu_vfree(bounce_buffer); - return ret; } static coroutine_fn int block_copy_task_entry(AioTask *task) { BlockCopyTask *t = container_of(task, BlockCopyTask, task); + BlockCopyState *s = t->s; bool error_is_read = false; + BlockCopyMethod method = t->method; int ret; - ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, - &error_is_read); - if (ret < 0 && !t->call_state->ret) { - t->call_state->ret = ret; - t->call_state->error_is_read = error_is_read; - } else { - progress_work_done(t->s->progress, t->bytes); + ret = block_copy_do_copy(s, t->offset, t->bytes, &method, &error_is_read); + + WITH_QEMU_LOCK_GUARD(&s->lock) { + if (s->method == t->method) { + s->method = method; + } + + if (ret < 0) { + if (!t->call_state->ret) { + t->call_state->ret = ret; + t->call_state->error_is_read = error_is_read; + } + } else if (s->progress) { + progress_work_done(s->progress, t->bytes); + } } - co_put_to_shres(t->s->mem, t->bytes); + co_put_to_shres(s->mem, t->bytes); block_copy_task_end(t, ret); return ret; @@ -456,7 +612,7 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset, BlockDriverState *base; int ret; - if (s->skip_unallocated) { + if (qatomic_read(&s->skip_unallocated)) { base = bdrv_backing_chain_next(s->source->bs); } else { base = NULL; @@ -543,10 +699,14 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s, bytes = clusters * s->cluster_size; if (!ret) { + qemu_co_mutex_lock(&s->lock); bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); - progress_set_remaining(s->progress, - bdrv_get_dirty_count(s->copy_bitmap) + - s->in_flight_bytes); + if (s->progress) { + progress_set_remaining(s->progress, + bdrv_get_dirty_count(s->copy_bitmap) + + s->in_flight_bytes); + } + qemu_co_mutex_unlock(&s->lock); } *count = bytes; @@ -582,7 +742,8 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); assert(QEMU_IS_ALIGNED(bytes, s->cluster_size)); - while (bytes && aio_task_pool_status(aio) == 0 && !call_state->cancelled) { + while (bytes && aio_task_pool_status(aio) == 0 && + !qatomic_read(&call_state->cancelled)) { BlockCopyTask *task; int64_t status_bytes; @@ -604,34 +765,32 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) if (status_bytes < task->bytes) { block_copy_task_shrink(task, status_bytes); } - if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) { + if (qatomic_read(&s->skip_unallocated) && + !(ret & BDRV_BLOCK_ALLOCATED)) { block_copy_task_end(task, 0); - progress_set_remaining(s->progress, - bdrv_get_dirty_count(s->copy_bitmap) + - s->in_flight_bytes); trace_block_copy_skip_range(s, task->offset, task->bytes); offset = task_end(task); bytes = end - offset; g_free(task); continue; } - task->zeroes = ret & BDRV_BLOCK_ZERO; - - if (s->speed) { - if (!call_state->ignore_ratelimit) { - uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0); - if (ns > 0) { - block_copy_task_end(task, -EAGAIN); - g_free(task); - qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns, - &call_state->sleep_state); - continue; - } - } + if (ret & BDRV_BLOCK_ZERO) { + task->method = COPY_WRITE_ZEROES; + } - ratelimit_calculate_delay(&s->rate_limit, task->bytes); + if (!call_state->ignore_ratelimit) { + uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0); + if (ns > 0) { + block_copy_task_end(task, -EAGAIN); + g_free(task); + qemu_co_sleep_ns_wakeable(&call_state->sleep, + QEMU_CLOCK_REALTIME, ns); + continue; + } } + ratelimit_calculate_delay(&s->rate_limit, task->bytes); + trace_block_copy_process(s, task->offset); co_get_from_shres(s->mem, task->bytes); @@ -672,9 +831,7 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) void block_copy_kick(BlockCopyCallState *call_state) { - if (call_state->sleep_state) { - qemu_co_sleep_wake(call_state->sleep_state); - } + qemu_co_sleep_wake(&call_state->sleep); } /* @@ -689,15 +846,40 @@ void block_copy_kick(BlockCopyCallState *call_state) static int coroutine_fn block_copy_common(BlockCopyCallState *call_state) { int ret; + BlockCopyState *s = call_state->s; - QLIST_INSERT_HEAD(&call_state->s->calls, call_state, list); + qemu_co_mutex_lock(&s->lock); + QLIST_INSERT_HEAD(&s->calls, call_state, list); + qemu_co_mutex_unlock(&s->lock); do { ret = block_copy_dirty_clusters(call_state); - if (ret == 0 && !call_state->cancelled) { - ret = block_copy_wait_one(call_state->s, call_state->offset, - call_state->bytes); + if (ret == 0 && !qatomic_read(&call_state->cancelled)) { + WITH_QEMU_LOCK_GUARD(&s->lock) { + /* + * Check that there is no task we still need to + * wait to complete + */ + ret = block_copy_wait_one(s, call_state->offset, + call_state->bytes); + if (ret == 0) { + /* + * No pending tasks, but check again the bitmap in this + * same critical section, since a task might have failed + * between this and the critical section in + * block_copy_dirty_clusters(). + * + * block_copy_wait_one return value 0 also means that it + * didn't release the lock. So, we are still in the same + * critical section, not interrupted by any concurrent + * access to state. + */ + ret = bdrv_dirty_bitmap_next_dirty(s->copy_bitmap, + call_state->offset, + call_state->bytes) >= 0; + } + } } /* @@ -709,15 +891,17 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state) * 2. We have waited for some intersecting block-copy request * It may have failed and produced new dirty bits. */ - } while (ret > 0 && !call_state->cancelled); + } while (ret > 0 && !qatomic_read(&call_state->cancelled)); - call_state->finished = true; + qatomic_store_release(&call_state->finished, true); if (call_state->cb) { call_state->cb(call_state->cb_opaque); } + qemu_co_mutex_lock(&s->lock); QLIST_REMOVE(call_state, list); + qemu_co_mutex_unlock(&s->lock); return ret; } @@ -772,44 +956,50 @@ void block_copy_call_free(BlockCopyCallState *call_state) return; } - assert(call_state->finished); + assert(qatomic_read(&call_state->finished)); g_free(call_state); } bool block_copy_call_finished(BlockCopyCallState *call_state) { - return call_state->finished; + return qatomic_read(&call_state->finished); } bool block_copy_call_succeeded(BlockCopyCallState *call_state) { - return call_state->finished && !call_state->cancelled && - call_state->ret == 0; + return qatomic_load_acquire(&call_state->finished) && + !qatomic_read(&call_state->cancelled) && + call_state->ret == 0; } bool block_copy_call_failed(BlockCopyCallState *call_state) { - return call_state->finished && !call_state->cancelled && - call_state->ret < 0; + return qatomic_load_acquire(&call_state->finished) && + !qatomic_read(&call_state->cancelled) && + call_state->ret < 0; } bool block_copy_call_cancelled(BlockCopyCallState *call_state) { - return call_state->cancelled; + return qatomic_read(&call_state->cancelled); } int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read) { - assert(call_state->finished); + assert(qatomic_load_acquire(&call_state->finished)); if (error_is_read) { *error_is_read = call_state->error_is_read; } return call_state->ret; } +/* + * Note that cancelling and finishing are racy. + * User can cancel a block-copy that is already finished. + */ void block_copy_call_cancel(BlockCopyCallState *call_state) { - call_state->cancelled = true; + qatomic_set(&call_state->cancelled, true); block_copy_kick(call_state); } @@ -818,17 +1008,19 @@ BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s) return s->copy_bitmap; } +int64_t block_copy_cluster_size(BlockCopyState *s) +{ + return s->cluster_size; +} + void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip) { - s->skip_unallocated = skip; + qatomic_set(&s->skip_unallocated, skip); } void block_copy_set_speed(BlockCopyState *s, uint64_t speed) { - s->speed = speed; - if (speed > 0) { - ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME); - } + ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME); /* * Note: it's good to kick all call states from here, but it should be done diff --git a/block/bochs.c b/block/bochs.c index 2f010ab40a1..4d68658087b 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -238,8 +238,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) } static int coroutine_fn -bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +bochs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVBochsState *s = bs->opaque; uint64_t sector_num = offset >> BDRV_SECTOR_BITS; diff --git a/block/cloop.c b/block/cloop.c index c99192a57f4..b8c6d0eccdb 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -245,8 +245,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num) } static int coroutine_fn -cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +cloop_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVCloopState *s = bs->opaque; uint64_t sector_num = offset >> BDRV_SECTOR_BITS; diff --git a/block/commit.c b/block/commit.c index dd9ba87349e..10cc5ff4518 100644 --- a/block/commit.c +++ b/block/commit.c @@ -119,24 +119,24 @@ static int coroutine_fn commit_run(Job *job, Error **errp) uint64_t delay_ns = 0; int ret = 0; int64_t n = 0; /* bytes */ - void *buf = NULL; + QEMU_AUTO_VFREE void *buf = NULL; int64_t len, base_len; - ret = len = blk_getlength(s->top); + len = blk_getlength(s->top); if (len < 0) { - goto out; + return len; } job_progress_set_remaining(&s->common.job, len); - ret = base_len = blk_getlength(s->base); + base_len = blk_getlength(s->base); if (base_len < 0) { - goto out; + return base_len; } if (base_len < len) { ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); if (ret) { - goto out; + return ret; } } @@ -174,7 +174,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) block_job_error_action(&s->common, s->on_error, error_in_source, -ret); if (action == BLOCK_ERROR_ACTION_REPORT) { - goto out; + return ret; } else { n = 0; continue; @@ -190,12 +190,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) } } - ret = 0; - -out: - qemu_vfree(buf); - - return ret; + return 0; } static const BlockJobDriver commit_job_driver = { @@ -212,7 +207,7 @@ static const BlockJobDriver commit_job_driver = { }; static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); } @@ -312,6 +307,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, commit_top_bs->total_sectors = top->total_sectors; ret = bdrv_append(commit_top_bs, top, errp); + bdrv_unref(commit_top_bs); /* referenced by new parents or failed */ if (ret < 0) { commit_top_bs = NULL; goto fail; @@ -434,7 +430,7 @@ int bdrv_commit(BlockDriverState *bs) int ro; int64_t n; int ret = 0; - uint8_t *buf = NULL; + QEMU_AUTO_VFREE uint8_t *buf = NULL; Error *local_err = NULL; if (!drv) @@ -452,7 +448,7 @@ int bdrv_commit(BlockDriverState *bs) return -EBUSY; } - ro = backing_file_bs->read_only; + ro = bdrv_is_read_only(backing_file_bs); if (ro) { if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) { @@ -555,8 +551,6 @@ int bdrv_commit(BlockDriverState *bs) ret = 0; ro_cleanup: - qemu_vfree(buf); - blk_unref(backing); if (bdrv_cow_bs(bs) != backing_file_bs) { bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); diff --git a/block/copy-before-write.c b/block/copy-before-write.c new file mode 100644 index 00000000000..c30a5ff8dea --- /dev/null +++ b/block/copy-before-write.c @@ -0,0 +1,257 @@ +/* + * copy-before-write filter driver + * + * The driver performs Copy-Before-Write (CBW) operation: it is injected above + * some node, and before each write it copies _old_ data to the target node. + * + * Copyright (c) 2018-2021 Virtuozzo International GmbH. + * + * Author: + * Sementsov-Ogievskiy Vladimir + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "qemu/osdep.h" + +#include "sysemu/block-backend.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "block/qdict.h" +#include "block/block-copy.h" + +#include "block/copy-before-write.h" + +typedef struct BDRVCopyBeforeWriteState { + BlockCopyState *bcs; + BdrvChild *target; +} BDRVCopyBeforeWriteState; + +static coroutine_fn int cbw_co_preadv( + BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) +{ + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, BdrvRequestFlags flags) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + uint64_t off, end; + int64_t cluster_size = block_copy_cluster_size(s->bcs); + + if (flags & BDRV_REQ_WRITE_UNCHANGED) { + return 0; + } + + off = QEMU_ALIGN_DOWN(offset, cluster_size); + end = QEMU_ALIGN_UP(offset + bytes, cluster_size); + + return block_copy(s->bcs, off, end - off, true); +} + +static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + int ret = cbw_do_copy_before_write(bs, offset, bytes, 0); + if (ret < 0) { + return ret; + } + + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + +static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, BdrvRequestFlags flags) +{ + int ret = cbw_do_copy_before_write(bs, offset, bytes, flags); + if (ret < 0) { + return ret; + } + + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + +static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs, + int64_t offset, + int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + int ret = cbw_do_copy_before_write(bs, offset, bytes, flags); + if (ret < 0) { + return ret; + } + + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); +} + +static int coroutine_fn cbw_co_flush(BlockDriverState *bs) +{ + if (!bs->file) { + return 0; + } + + return bdrv_co_flush(bs->file->bs); +} + +static void cbw_refresh_filename(BlockDriverState *bs) +{ + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + bs->file->bs->filename); +} + +static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (!(role & BDRV_CHILD_FILTERED)) { + /* + * Target child + * + * Share write to target (child_file), to not interfere + * with guest writes to its disk which may be in target backing chain. + * Can't resize during a backup block job because we check the size + * only upfront. + */ + *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; + *nperm = BLK_PERM_WRITE; + } else { + /* Source child */ + bdrv_default_perms(bs, c, role, reopen_queue, + perm, shared, nperm, nshared); + + if (!QLIST_EMPTY(&bs->parents)) { + if (perm & BLK_PERM_WRITE) { + *nperm = *nperm | BLK_PERM_CONSISTENT_READ; + } + *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } + } +} + +static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + BdrvDirtyBitmap *copy_bitmap; + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); + if (!bs->file) { + return -EINVAL; + } + + s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds, + BDRV_CHILD_DATA, false, errp); + if (!s->target) { + return -EINVAL; + } + + bs->total_sectors = bs->file->bs->total_sectors; + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + + s->bcs = block_copy_state_new(bs->file, s->target, errp); + if (!s->bcs) { + error_prepend(errp, "Cannot create block-copy-state: "); + return -EINVAL; + } + + copy_bitmap = block_copy_dirty_bitmap(s->bcs); + bdrv_set_dirty_bitmap(copy_bitmap, 0, bdrv_dirty_bitmap_size(copy_bitmap)); + + return 0; +} + +static void cbw_close(BlockDriverState *bs) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + + block_copy_state_free(s->bcs); + s->bcs = NULL; +} + +BlockDriver bdrv_cbw_filter = { + .format_name = "copy-before-write", + .instance_size = sizeof(BDRVCopyBeforeWriteState), + + .bdrv_open = cbw_open, + .bdrv_close = cbw_close, + + .bdrv_co_preadv = cbw_co_preadv, + .bdrv_co_pwritev = cbw_co_pwritev, + .bdrv_co_pwrite_zeroes = cbw_co_pwrite_zeroes, + .bdrv_co_pdiscard = cbw_co_pdiscard, + .bdrv_co_flush = cbw_co_flush, + + .bdrv_refresh_filename = cbw_refresh_filename, + + .bdrv_child_perm = cbw_child_perm, + + .is_filter = true, +}; + +BlockDriverState *bdrv_cbw_append(BlockDriverState *source, + BlockDriverState *target, + const char *filter_node_name, + BlockCopyState **bcs, + Error **errp) +{ + ERRP_GUARD(); + BDRVCopyBeforeWriteState *state; + BlockDriverState *top; + QDict *opts; + + assert(source->total_sectors == target->total_sectors); + + opts = qdict_new(); + qdict_put_str(opts, "driver", "copy-before-write"); + if (filter_node_name) { + qdict_put_str(opts, "node-name", filter_node_name); + } + qdict_put_str(opts, "file", bdrv_get_node_name(source)); + qdict_put_str(opts, "target", bdrv_get_node_name(target)); + + top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp); + if (!top) { + return NULL; + } + + state = top->opaque; + *bcs = state->bcs; + + return top; +} + +void bdrv_cbw_drop(BlockDriverState *bs) +{ + bdrv_drop_filter(bs, &error_abort); + bdrv_unref(bs); +} + +static void cbw_init(void) +{ + bdrv_register(&bdrv_cbw_filter); +} + +block_init(cbw_init); diff --git a/block/copy-before-write.h b/block/copy-before-write.h new file mode 100644 index 00000000000..51847e711ac --- /dev/null +++ b/block/copy-before-write.h @@ -0,0 +1,39 @@ +/* + * copy-before-write filter driver + * + * The driver performs Copy-Before-Write (CBW) operation: it is injected above + * some node, and before each write it copies _old_ data to the target node. + * + * Copyright (c) 2018-2021 Virtuozzo International GmbH. + * + * Author: + * Sementsov-Ogievskiy Vladimir + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef COPY_BEFORE_WRITE_H +#define COPY_BEFORE_WRITE_H + +#include "block/block_int.h" +#include "block/block-copy.h" + +BlockDriverState *bdrv_cbw_append(BlockDriverState *source, + BlockDriverState *target, + const char *filter_node_name, + BlockCopyState **bcs, + Error **errp); +void bdrv_cbw_drop(BlockDriverState *bs); + +#endif /* COPY_BEFORE_WRITE_H */ diff --git a/block/copy-on-read.c b/block/copy-on-read.c index 9cad9e1b8c2..1fc7fb3333b 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -29,7 +29,6 @@ typedef struct BDRVStateCOR { - bool active; BlockDriverState *bottom_bs; bool chain_frozen; } BDRVStateCOR; @@ -89,7 +88,6 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags, */ bdrv_ref(bottom_bs); } - state->active = true; state->bottom_bs = bottom_bs; /* @@ -112,17 +110,6 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - BDRVStateCOR *s = bs->opaque; - - if (!s->active) { - /* - * While the filter is being removed - */ - *nperm = 0; - *nshared = BLK_PERM_ALL; - return; - } - *nperm = perm & PERM_PASSTHROUGH; *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED; @@ -141,10 +128,10 @@ static int64_t cor_getlength(BlockDriverState *bs) static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, - int flags) + BdrvRequestFlags flags) { int64_t n; int local_flags; @@ -194,10 +181,11 @@ static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs, static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs, - uint64_t offset, - uint64_t bytes, + int64_t offset, + int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, int flags) + size_t qiov_offset, + BdrvRequestFlags flags) { return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset, flags); @@ -205,7 +193,7 @@ static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs, static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); @@ -213,15 +201,15 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_co_pdiscard(bs->file, offset, bytes); } static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs, - uint64_t offset, - uint64_t bytes, + int64_t offset, + int64_t bytes, QEMUIOVector *qiov) { return bdrv_co_pwritev(bs->file, offset, bytes, qiov, @@ -280,32 +268,14 @@ static BlockDriver bdrv_copy_on_read = { void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs) { - BdrvChild *child; - BlockDriverState *bs; BDRVStateCOR *s = cor_filter_bs->opaque; - child = bdrv_filter_child(cor_filter_bs); - if (!child) { - return; - } - bs = child->bs; - - /* Retain the BDS until we complete the graph change. */ - bdrv_ref(bs); - /* Hold a guest back from writing while permissions are being reset. */ - bdrv_drained_begin(bs); - /* Drop permissions before the graph change. */ - s->active = false; /* unfreeze, as otherwise bdrv_replace_node() will fail */ if (s->chain_frozen) { s->chain_frozen = false; bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs); } - bdrv_child_refresh_perms(cor_filter_bs, child, &error_abort); - bdrv_replace_node(cor_filter_bs, bs, &error_abort); - - bdrv_drained_end(bs); - bdrv_unref(bs); + bdrv_drop_filter(cor_filter_bs, &error_abort); bdrv_unref(cor_filter_bs); } diff --git a/block/coroutines.h b/block/coroutines.h index 4cfb4946e65..c8c14a29c83 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -27,6 +27,9 @@ #include "block/block_int.h" +/* For blk_bs() in generated block/block-gen.c */ +#include "sysemu/block-backend.h" + int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); @@ -66,4 +69,40 @@ int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); +int generated_co_wrapper +nbd_do_establish_connection(BlockDriverState *bs, Error **errp); +int coroutine_fn +nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp); + + +int generated_co_wrapper +blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); +int coroutine_fn +blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + + +int generated_co_wrapper +blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); +int coroutine_fn +blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + +int generated_co_wrapper +blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); +int coroutine_fn +blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + +int generated_co_wrapper +blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); +int coroutine_fn +blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); + +int generated_co_wrapper blk_do_flush(BlockBackend *blk); +int coroutine_fn blk_co_do_flush(BlockBackend *blk); + #endif /* BLOCK_COROUTINES_INT_H */ diff --git a/block/crypto.c b/block/crypto.c index 1d30fde38e5..c8ba4681e20 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -397,8 +397,8 @@ static int block_crypto_reopen_prepare(BDRVReopenState *state, #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int -block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BlockCrypto *crypto = bs->opaque; uint64_t cur_bytes; /* number of bytes in current iteration */ @@ -460,8 +460,8 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, static coroutine_fn int -block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BlockCrypto *crypto = bs->opaque; uint64_t cur_bytes; /* number of bytes in current iteration */ diff --git a/block/curl.c b/block/curl.c index 50e741a0d7a..4a8ae2b2698 100644 --- a/block/curl.c +++ b/block/curl.c @@ -896,7 +896,8 @@ static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) } static int coroutine_fn curl_co_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { CURLAIOCB acb = { .co = qemu_coroutine_self(), diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 68d295d6e3e..0ef46163e3e 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -193,7 +193,7 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags, error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used", bitmap->name); error_append_hint(errp, "Try block-dirty-bitmap-remove to delete" - " this bitmap from disk"); + " this bitmap from disk\n"); return -1; } diff --git a/block/dmg.c b/block/dmg.c index ef35a505f26..447901fbb87 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -689,8 +689,8 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) } static int coroutine_fn -dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +dmg_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVDMGState *s = bs->opaque; uint64_t sector_num = offset >> BDRV_SECTOR_BITS; diff --git a/block/export/export.c b/block/export/export.c index fec7d9f7382..6d3b9964c8d 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -111,6 +111,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) if (export->has_iothread) { IOThread *iothread; AioContext *new_ctx; + Error **set_context_errp; iothread = iothread_by_id(export->iothread); if (!iothread) { @@ -120,7 +121,9 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) new_ctx = iothread_get_aio_context(iothread); - ret = bdrv_try_set_aio_context(bs, new_ctx, errp); + /* Ignore errors with fixed-iothread=false */ + set_context_errp = fixed_iothread ? errp : NULL; + ret = bdrv_try_set_aio_context(bs, new_ctx, set_context_errp); if (ret == 0) { aio_context_release(ctx); aio_context_acquire(new_ctx); diff --git a/block/export/fuse.c b/block/export/fuse.c index 208c3a0ff51..63f9ec9726a 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -35,6 +35,9 @@ #include #endif +#ifdef __linux__ +#include +#endif /* Prevent overly long bounce buffer allocations */ #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024)) @@ -50,6 +53,12 @@ typedef struct FuseExport { char *mountpoint; bool writable; bool growable; + /* Whether allow_other was used as a mount option or not */ + bool allow_other; + + mode_t st_mode; + uid_t st_uid; + gid_t st_gid; } FuseExport; static GHashTable *exports; @@ -61,7 +70,7 @@ static void fuse_export_delete(BlockExport *exp); static void init_exports_table(void); static int setup_fuse_export(FuseExport *exp, const char *mountpoint, - Error **errp); + bool allow_other, Error **errp); static void read_from_fuse_export(void *opaque); static bool is_regular_file(const char *path, Error **errp); @@ -122,7 +131,29 @@ static int fuse_export_create(BlockExport *blk_exp, exp->writable = blk_exp_args->writable; exp->growable = args->growable; - ret = setup_fuse_export(exp, args->mountpoint, errp); + /* set default */ + if (!args->has_allow_other) { + args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO; + } + + exp->st_mode = S_IFREG | S_IRUSR; + if (exp->writable) { + exp->st_mode |= S_IWUSR; + } + exp->st_uid = getuid(); + exp->st_gid = getgid(); + + if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) { + /* Ignore errors on our first attempt */ + ret = setup_fuse_export(exp, args->mountpoint, true, NULL); + exp->allow_other = ret == 0; + if (ret < 0) { + ret = setup_fuse_export(exp, args->mountpoint, false, errp); + } + } else { + exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON; + ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp); + } if (ret < 0) { goto fail; } @@ -150,15 +181,20 @@ static void init_exports_table(void) * Create exp->fuse_session and mount it. */ static int setup_fuse_export(FuseExport *exp, const char *mountpoint, - Error **errp) + bool allow_other, Error **errp) { const char *fuse_argv[4]; char *mount_opts; struct fuse_args fuse_args; int ret; - /* Needs to match what fuse_init() sets. Only max_read must be supplied. */ - mount_opts = g_strdup_printf("max_read=%zu", FUSE_MAX_BOUNCE_BYTES); + /* + * max_read needs to match what fuse_init() sets. + * max_write need not be supplied. + */ + mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s", + FUSE_MAX_BOUNCE_BYTES, + allow_other ? ",allow_other" : ""); fuse_argv[0] = ""; /* Dummy program name */ fuse_argv[1] = "-o"; @@ -320,7 +356,6 @@ static void fuse_getattr(fuse_req_t req, fuse_ino_t inode, int64_t length, allocated_blocks; time_t now = time(NULL); FuseExport *exp = fuse_req_userdata(req); - mode_t mode; length = blk_getlength(exp->common.blk); if (length < 0) { @@ -335,17 +370,12 @@ static void fuse_getattr(fuse_req_t req, fuse_ino_t inode, allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512); } - mode = S_IFREG | S_IRUSR; - if (exp->writable) { - mode |= S_IWUSR; - } - statbuf = (struct stat) { .st_ino = inode, - .st_mode = mode, + .st_mode = exp->st_mode, .st_nlink = 1, - .st_uid = getuid(), - .st_gid = getgid(), + .st_uid = exp->st_uid, + .st_gid = exp->st_gid, .st_size = length, .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment, .st_blocks = allocated_blocks, @@ -391,28 +421,76 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size, } /** - * Let clients set file attributes. Only resizing is supported. + * Let clients set file attributes. Only resizing and changing + * permissions (st_mode, st_uid, st_gid) is allowed. + * Changing permissions is only allowed as far as it will actually + * permit access: Read-only exports cannot be given +w, and exports + * without allow_other cannot be given a different UID or GID, and + * they cannot be given non-owner access. */ static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf, int to_set, struct fuse_file_info *fi) { FuseExport *exp = fuse_req_userdata(req); + int supported_attrs; int ret; - if (!exp->writable) { - fuse_reply_err(req, EACCES); - return; + supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE; + if (exp->allow_other) { + supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID; } - if (to_set & ~FUSE_SET_ATTR_SIZE) { + if (to_set & ~supported_attrs) { fuse_reply_err(req, ENOTSUP); return; } - ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF); - if (ret < 0) { - fuse_reply_err(req, -ret); - return; + /* Do some argument checks first before committing to anything */ + if (to_set & FUSE_SET_ATTR_MODE) { + /* + * Without allow_other, non-owners can never access the export, so do + * not allow setting permissions for them + */ + if (!exp->allow_other && + (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0) + { + fuse_reply_err(req, EPERM); + return; + } + + /* +w for read-only exports makes no sense, disallow it */ + if (!exp->writable && + (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) + { + fuse_reply_err(req, EROFS); + return; + } + } + + if (to_set & FUSE_SET_ATTR_SIZE) { + if (!exp->writable) { + fuse_reply_err(req, EACCES); + return; + } + + ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF); + if (ret < 0) { + fuse_reply_err(req, -ret); + return; + } + } + + if (to_set & FUSE_SET_ATTR_MODE) { + /* Ignore FUSE-supplied file type, only change the mode */ + exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG; + } + + if (to_set & FUSE_SET_ATTR_UID) { + exp->st_uid = statbuf->st_uid; + } + + if (to_set & FUSE_SET_ATTR_GID) { + exp->st_gid = statbuf->st_gid; } fuse_getattr(req, inode, fi); diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index fa06996d37c..18625633366 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -70,9 +70,16 @@ static void vu_blk_req_complete(VuBlkReq *req) static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector, size_t size) { - uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; + uint64_t nb_sectors; uint64_t total_sectors; + if (size % VIRTIO_BLK_SECTOR_SIZE) { + return false; + } + + nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS; + + QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE); if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { return false; } diff --git a/block/file-posix.c b/block/file-posix.c index 20e14f8e96b..b283093e5b7 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -42,8 +42,11 @@ #include "scsi/constants.h" #if defined(__APPLE__) && (__MACH__) +#include +#if defined(HAVE_HOST_BLOCK_DEVICE) #include #include +#include #include #include #include @@ -52,6 +55,7 @@ //#include #include #include +#endif /* defined(HAVE_HOST_BLOCK_DEVICE) */ #endif #ifdef __sun__ @@ -106,8 +110,6 @@ #include #endif -#include "trace.h" - /* OS X does not have O_DSYNC */ #ifndef O_DSYNC #ifdef O_SYNC @@ -148,6 +150,8 @@ typedef struct BDRVRawState { uint64_t locked_perm; uint64_t locked_shared_perm; + uint64_t aio_max_batch; + int perm_change_fd; int perm_change_flags; BDRVReopenState *reopen_state; @@ -160,9 +164,10 @@ typedef struct BDRVRawState { bool discard_zeroes:1; bool use_linux_aio:1; bool use_linux_io_uring:1; - bool page_cache_inconsistent:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; + bool force_alignment; bool drop_cache; bool check_cache_dropped; struct { @@ -175,13 +180,22 @@ typedef struct BDRVRawState { } BDRVRawState; typedef struct BDRVRawReopenState { - int fd; int open_flags; bool drop_cache; bool check_cache_dropped; } BDRVRawReopenState; -static int fd_open(BlockDriverState *bs); +static int fd_open(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + /* this is just to ensure s->fd is sane (its called by io ops) */ + if (s->fd >= 0) { + return 0; + } + return -EIO; +} + static int64_t raw_getlength(BlockDriverState *bs); typedef struct RawPosixAIOData { @@ -338,6 +352,17 @@ static bool dio_byte_aligned(int fd) return false; } +static bool raw_needs_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { + return true; + } + + return s->force_alignment; +} + /* Check if read is allowed with given memory buffer and length. * * This function is used to check O_DIRECT memory buffer and request alignment. @@ -519,6 +544,11 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "host AIO implementation (threads, native, io_uring)", }, + { + .name = "aio-max-batch", + .type = QEMU_OPT_NUMBER, + .help = "AIO max batch size (0 = auto handled by AIO backend, default: 0)", + }, { .name = "locking", .type = QEMU_OPT_STRING, @@ -598,6 +628,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING); #endif + s->aio_max_batch = qemu_opt_get_number(opts, "aio-max-batch", 0); + locking = qapi_enum_parse(&OnOffAuto_lookup, qemu_opt_get(opts, "locking"), ON_OFF_AUTO_AUTO, &local_err); @@ -708,9 +740,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->has_discard = true; s->has_write_zeroes = true; - if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { - s->needs_alignment = true; - } if (fstat(s->fd, &st) < 0) { ret = -errno; @@ -764,9 +793,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, * so QEMU makes sure all IO operations on the device are aligned * to sector size, or else FreeBSD will reject them with EINVAL. */ - s->needs_alignment = true; + s->force_alignment = true; } #endif + s->needs_alignment = raw_needs_alignment(bs); #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -1075,7 +1105,6 @@ static int raw_reopen_prepare(BDRVReopenState *state, BDRVRawReopenState *rs; QemuOpts *opts; int ret; - Error *local_err = NULL; assert(state != NULL); assert(state->bs != NULL); @@ -1101,32 +1130,18 @@ static int raw_reopen_prepare(BDRVReopenState *state, * bdrv_reopen_prepare() will detect changes and complain. */ qemu_opts_to_qdict(opts, state->options); - rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags, - state->perm, true, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -1; - goto out; - } - - /* Fail already reopen_prepare() if we can't get a working O_DIRECT - * alignment with the new fd. */ - if (rs->fd != -1) { - raw_probe_alignment(state->bs, rs->fd, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto out_fd; - } - } + /* + * As part of reopen prepare we also want to create new fd by + * raw_reconfigure_getfd(). But it wants updated "perm", when in + * bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to + * permission update. Happily, permission update is always a part (a seprate + * stage) of bdrv_reopen_multiple() so we can rely on this fact and + * reconfigure fd in raw_check_perm(). + */ s->reopen_state = state; ret = 0; -out_fd: - if (ret < 0) { - qemu_close(rs->fd); - rs->fd = -1; - } + out: qemu_opts_del(opts); return ret; @@ -1140,10 +1155,6 @@ static void raw_reopen_commit(BDRVReopenState *state) s->drop_cache = rs->drop_cache; s->check_cache_dropped = rs->check_cache_dropped; s->open_flags = rs->open_flags; - - qemu_close(s->fd); - s->fd = rs->fd; - g_free(state->opaque); state->opaque = NULL; @@ -1162,10 +1173,6 @@ static void raw_reopen_abort(BDRVReopenState *state) return; } - if (rs->fd >= 0) { - qemu_close(rs->fd); - rs->fd = -1; - } g_free(state->opaque); state->opaque = NULL; @@ -1173,22 +1180,27 @@ static void raw_reopen_abort(BDRVReopenState *state) s->reopen_state = NULL; } -static int sg_get_max_transfer_length(int fd) +static int hdev_get_max_hw_transfer(int fd, struct stat *st) { #ifdef BLKSECTGET - int max_bytes = 0; - - if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { - return max_bytes; + if (S_ISBLK(st->st_mode)) { + unsigned short max_sectors = 0; + if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) { + return max_sectors * 512; + } } else { - return -errno; + int max_bytes = 0; + if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { + return max_bytes; + } } + return -errno; #else return -ENOSYS; #endif } -static int sg_get_max_segments(int fd) +static int hdev_get_max_segments(int fd, struct stat *st) { #ifdef CONFIG_LINUX char buf[32]; @@ -1197,15 +1209,20 @@ static int sg_get_max_segments(int fd) int ret; int sysfd = -1; long max_segments; - struct stat st; - if (fstat(fd, &st)) { - ret = -errno; - goto out; + if (S_ISCHR(st->st_mode)) { + if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) { + return ret; + } + return -ENOTSUP; + } + + if (!S_ISBLK(st->st_mode)) { + return -ENOTSUP; } sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", - major(st.st_rdev), minor(st.st_rdev)); + major(st->st_rdev), minor(st->st_rdev)); sysfd = open(sysfspath, O_RDONLY); if (sysfd == -1) { ret = -errno; @@ -1242,24 +1259,44 @@ static int sg_get_max_segments(int fd) static void raw_refresh_limits(BlockDriverState *bs, Error **errp) { BDRVRawState *s = bs->opaque; + struct stat st; + + s->needs_alignment = raw_needs_alignment(bs); + raw_probe_alignment(bs, s->fd, errp); - if (bs->sg) { - int ret = sg_get_max_transfer_length(s->fd); + bs->bl.min_mem_alignment = s->buf_align; + bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); + + /* + * Maximum transfers are best effort, so it is okay to ignore any + * errors. That said, based on the man page errors in fstat would be + * very much unexpected; the only possible case seems to be ENOMEM. + */ + if (fstat(s->fd, &st)) { + return; + } + +#if defined(__APPLE__) && (__MACH__) + struct statfs buf; + + if (!fstatfs(s->fd, &buf)) { + bs->bl.opt_transfer = buf.f_iosize; + bs->bl.pdiscard_alignment = buf.f_bsize; + } +#endif + + if (bs->sg || S_ISBLK(st.st_mode)) { + int ret = hdev_get_max_hw_transfer(s->fd, &st); if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { - bs->bl.max_transfer = pow2floor(ret); + bs->bl.max_hw_transfer = ret; } - ret = sg_get_max_segments(s->fd); + ret = hdev_get_max_segments(s->fd, &st); if (ret > 0) { - bs->bl.max_transfer = MIN(bs->bl.max_transfer, - ret * qemu_real_host_page_size); + bs->bl.max_hw_iov = ret; } } - - raw_probe_alignment(bs, s->fd, errp); - bs->bl.min_mem_alignment = s->buf_align; - bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); } static int check_for_dasd(int fd) @@ -1341,7 +1378,9 @@ static int handle_aiocb_ioctl(void *opaque) RawPosixAIOData *aiocb = opaque; int ret; - ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf); + do { + ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf); + } while (ret == -1 && errno == EINTR); if (ret == -1) { return -errno; } @@ -1357,11 +1396,13 @@ static int handle_aiocb_flush(void *opaque) int ret; if (s->page_cache_inconsistent) { - return -EIO; + return -s->page_cache_inconsistent; } ret = qemu_fdatasync(aiocb->aio_fildes); if (ret == -1) { + trace_file_flush_fdatasync_failed(errno); + /* There is no clear definition of the semantics of a failing fsync(), * so we may have to assume the worst. The sad truth is that this * assumption is correct for Linux. Some pages are now probably marked @@ -1376,7 +1417,7 @@ static int handle_aiocb_flush(void *opaque) * Obviously, this doesn't affect O_DIRECT, which bypasses the page * cache. */ if ((s->open_flags & O_DIRECT) == 0) { - s->page_cache_inconsistent = true; + s->page_cache_inconsistent = errno; } return -errno; } @@ -1581,6 +1622,7 @@ static int handle_aiocb_rw(void *opaque) } } +#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD) static int translate_err(int err) { if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || @@ -1589,6 +1631,7 @@ static int translate_err(int err) } return err; } +#endif #ifdef CONFIG_FALLOCATE static int do_fallocate(int fd, int mode, off_t offset, off_t len) @@ -1649,17 +1692,17 @@ static int handle_aiocb_write_zeroes(void *opaque) if (s->has_write_zeroes) { int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, aiocb->aio_offset, aiocb->aio_nbytes); - if (ret == -EINVAL) { - /* - * Allow falling back to pwrite for file systems that - * do not support fallocate() for an unaligned byte range. - */ - return -ENOTSUP; - } - if (ret == 0 || ret != -ENOTSUP) { + if (ret == -ENOTSUP) { + s->has_write_zeroes = false; + } else if (ret == 0 || ret != -EINVAL) { return ret; } - s->has_write_zeroes = false; + /* + * Note: Some file systems do not like unaligned byte ranges, and + * return EINVAL in such a case, though they should not do it according + * to the man-page of fallocate(). Thus we simply ignore this return + * value and try the other fallbacks instead. + */ } #endif @@ -1674,6 +1717,17 @@ static int handle_aiocb_write_zeroes(void *opaque) return ret; } s->has_fallocate = false; + } else if (ret == -EINVAL) { + /* + * Some file systems like older versions of GPFS do not like un- + * aligned byte ranges, and return EINVAL in such a case, though + * they should not do it according to the man-page of fallocate(). + * Warn about the bad filesystem and try the final fallback instead. + */ + warn_report_once("Your file system is misbehaving: " + "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. " + "Please report this bug to your file system " + "vendor."); } else if (ret != -ENOTSUP) { return ret; } else { @@ -1774,7 +1828,7 @@ static int handle_aiocb_copy_range(void *opaque) static int handle_aiocb_discard(void *opaque) { RawPosixAIOData *aiocb = opaque; - int ret = -EOPNOTSUPP; + int ret = -ENOTSUP; BDRVRawState *s = aiocb->bs->opaque; if (!s->has_discard) { @@ -1790,16 +1844,27 @@ static int handle_aiocb_discard(void *opaque) } } while (errno == EINTR); - ret = -errno; + ret = translate_err(-errno); #endif } else { #ifdef CONFIG_FALLOCATE_PUNCH_HOLE ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, aiocb->aio_offset, aiocb->aio_nbytes); + ret = translate_err(ret); +#elif defined(__APPLE__) && (__MACH__) + fpunchhole_t fpunchhole; + fpunchhole.fp_flags = 0; + fpunchhole.reserved = 0; + fpunchhole.fp_offset = aiocb->aio_offset; + fpunchhole.fp_length = aiocb->aio_nbytes; + if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) { + ret = errno == ENODEV ? -ENOTSUP : -errno; + } else { + ret = 0; + } #endif } - ret = translate_err(ret); if (ret == -ENOTSUP) { s->has_discard = false; } @@ -2013,7 +2078,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, } else if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); assert(qiov->size == bytes); - return laio_co_submit(bs, aio, s->fd, offset, qiov, type); + return laio_co_submit(bs, aio, s->fd, offset, qiov, type, + s->aio_max_batch); #endif } @@ -2033,16 +2099,16 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb); } -static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ); } -static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { assert(flags == 0); return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE); @@ -2071,7 +2137,7 @@ static void raw_aio_unplug(BlockDriverState *bs) #ifdef CONFIG_LINUX_AIO if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); - laio_io_unplug(bs, aio); + laio_io_unplug(bs, aio, s->aio_max_batch); } #endif #ifdef CONFIG_LINUX_IO_URING @@ -2308,39 +2374,37 @@ static int64_t raw_getlength(BlockDriverState *bs) again: #endif if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) { + size = 0; #ifdef DIOCGMEDIASIZE - if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) -#elif defined(DIOCGPART) - { - struct partinfo pi; - if (ioctl(fd, DIOCGPART, &pi) == 0) - size = pi.media_size; - else - size = 0; + if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) { + size = 0; } - if (size == 0) #endif -#if defined(__APPLE__) && defined(__MACH__) - { +#ifdef DIOCGPART + if (size == 0) { + struct partinfo pi; + if (ioctl(fd, DIOCGPART, &pi) == 0) { + size = pi.media_size; + } + } +#endif +#if defined(DKIOCGETBLOCKCOUNT) && defined(DKIOCGETBLOCKSIZE) + if (size == 0) { uint64_t sectors = 0; uint32_t sector_size = 0; if (ioctl(fd, DKIOCGETBLOCKCOUNT, §ors) == 0 && ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) == 0) { size = sectors * sector_size; - } else { - size = lseek(fd, 0LL, SEEK_END); - if (size < 0) { - return -errno; - } } } -#else - size = lseek(fd, 0LL, SEEK_END); +#endif + if (size == 0) { + size = lseek(fd, 0LL, SEEK_END); + } if (size < 0) { return -errno; } -#endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) switch(s->type) { case FTYPE_CD: @@ -2702,7 +2766,8 @@ static int find_allocation(BlockDriverState *bs, off_t start, * the specified offset) that are known to be in the same * allocated/unallocated state. * - * 'bytes' is the max value 'pnum' should be set to. + * 'bytes' is a soft cap for 'pnum'. If the information is free, 'pnum' may + * well exceed it. */ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, bool want_zero, @@ -2740,7 +2805,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, } else if (data == offset) { /* On a data extent, compute bytes to the end of the extent, * possibly including a partial sector at EOF. */ - *pnum = MIN(bytes, hole - offset); + *pnum = hole - offset; /* * We are not allowed to return partial sectors, though, so @@ -2759,7 +2824,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, } else { /* On a hole, compute bytes to the beginning of the next extent. */ assert(hole == offset); - *pnum = MIN(bytes, data - offset); + *pnum = data - offset; ret = BDRV_BLOCK_ZERO; } *map = offset; @@ -2899,7 +2964,8 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret) } static coroutine_fn int -raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev) +raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes, + bool blkdev) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; @@ -2923,13 +2989,13 @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev) } static coroutine_fn int -raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { return raw_do_pdiscard(bs, offset, bytes, false); } static int coroutine_fn -raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, +raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, BdrvRequestFlags flags, bool blkdev) { BDRVRawState *s = bs->opaque; @@ -2997,7 +3063,7 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, static int coroutine_fn raw_co_pwrite_zeroes( BlockDriverState *bs, int64_t offset, - int bytes, BdrvRequestFlags flags) + int64_t bytes, BdrvRequestFlags flags) { return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false); } @@ -3027,6 +3093,7 @@ static BlockStatsSpecific *raw_get_specific_stats(BlockDriverState *bs) return stats; } +#if defined(HAVE_HOST_BLOCK_DEVICE) static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs) { BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1); @@ -3036,6 +3103,7 @@ static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs) return stats; } +#endif /* HAVE_HOST_BLOCK_DEVICE */ static QemuOptsList raw_create_opts = { .name = "raw-create-opts", @@ -3073,39 +3141,30 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared, Error **errp) { BDRVRawState *s = bs->opaque; - BDRVRawReopenState *rs = NULL; + int input_flags = s->reopen_state ? s->reopen_state->flags : bs->open_flags; int open_flags; int ret; - if (s->perm_change_fd) { + /* We may need a new fd if auto-read-only switches the mode */ + ret = raw_reconfigure_getfd(bs, input_flags, &open_flags, perm, + false, errp); + if (ret < 0) { + return ret; + } else if (ret != s->fd) { + Error *local_err = NULL; + /* - * In the context of reopen, this function may be called several times - * (directly and recursively while change permissions of the parent). - * This is even true for children that don't inherit from the original - * reopen node, so s->reopen_state is not set. - * - * Ignore all but the first call. + * Fail already check_perm() if we can't get a working O_DIRECT + * alignment with the new fd. */ - return 0; - } - - if (s->reopen_state) { - /* We already have a new file descriptor to set permissions for */ - assert(s->reopen_state->perm == perm); - assert(s->reopen_state->shared_perm == shared); - rs = s->reopen_state->opaque; - s->perm_change_fd = rs->fd; - s->perm_change_flags = rs->open_flags; - } else { - /* We may need a new fd if auto-read-only switches the mode */ - ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm, - false, errp); - if (ret < 0) { - return ret; - } else if (ret != s->fd) { - s->perm_change_fd = ret; - s->perm_change_flags = open_flags; + raw_probe_alignment(bs, ret, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; } + + s->perm_change_fd = ret; + s->perm_change_flags = open_flags; } /* Prepare permissions on old fd to avoid conflicts between old and new, @@ -3127,7 +3186,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared, return 0; fail: - if (s->perm_change_fd && !s->reopen_state) { + if (s->perm_change_fd) { qemu_close(s->perm_change_fd); } s->perm_change_fd = 0; @@ -3158,7 +3217,7 @@ static void raw_abort_perm_update(BlockDriverState *bs) /* For reopen, .bdrv_reopen_abort is called afterwards and will close * the file descriptor. */ - if (s->perm_change_fd && !s->reopen_state) { + if (s->perm_change_fd) { qemu_close(s->perm_change_fd); } s->perm_change_fd = 0; @@ -3167,8 +3226,8 @@ static void raw_abort_perm_update(BlockDriverState *bs) } static int coroutine_fn raw_co_copy_range_from( - BlockDriverState *bs, BdrvChild *src, uint64_t src_offset, - BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, + BlockDriverState *bs, BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, @@ -3177,10 +3236,10 @@ static int coroutine_fn raw_co_copy_range_from( static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, BdrvChild *src, - uint64_t src_offset, + int64_t src_offset, BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, + int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { @@ -3260,6 +3319,8 @@ BlockDriver bdrv_file = { /***********************************************/ /* host device */ +#if defined(HAVE_HOST_BLOCK_DEVICE) + #if defined(__APPLE__) && defined(__MACH__) static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize, int flags); @@ -3552,18 +3613,8 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) } #endif /* linux */ -static int fd_open(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - - /* this is just to ensure s->fd is sane (its called by io ops) */ - if (s->fd >= 0) - return 0; - return -EIO; -} - static coroutine_fn int -hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { BDRVRawState *s = bs->opaque; int ret; @@ -3577,7 +3628,7 @@ hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) } static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags) + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { int rc; @@ -3885,6 +3936,8 @@ static BlockDriver bdrv_host_cdrom = { }; #endif /* __FreeBSD__ */ +#endif /* HAVE_HOST_BLOCK_DEVICE */ + static void bdrv_file_init(void) { /* @@ -3892,6 +3945,7 @@ static void bdrv_file_init(void) * registered last will get probed first. */ bdrv_register(&bdrv_file); +#if defined(HAVE_HOST_BLOCK_DEVICE) bdrv_register(&bdrv_host_device); #ifdef __linux__ bdrv_register(&bdrv_host_cdrom); @@ -3899,6 +3953,7 @@ static void bdrv_file_init(void) #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) bdrv_register(&bdrv_host_cdrom); #endif +#endif /* HAVE_HOST_BLOCK_DEVICE */ } block_init(bdrv_file_init); diff --git a/block/file-win32.c b/block/file-win32.c index 2642088bd6e..ec9d64d0e4e 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -58,6 +58,10 @@ typedef struct BDRVRawState { QEMUWin32AIOState *aio; } BDRVRawState; +typedef struct BDRVRawReopenState { + HANDLE hfile; +} BDRVRawReopenState; + /* * Read/writes the data to/from a given linear buffer. * @@ -392,7 +396,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } s->hfile = CreateFile(filename, access_flags, - FILE_SHARE_READ, NULL, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, overlapped, NULL); if (s->hfile == INVALID_HANDLE_VALUE) { int err = GetLastError(); @@ -436,8 +440,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; @@ -451,8 +455,8 @@ static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs, } static BlockAIOCB *raw_aio_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; @@ -634,6 +638,97 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, return raw_co_create(&options, errp); } +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRawState *s = state->bs->opaque; + BDRVRawReopenState *rs; + int access_flags; + DWORD overlapped; + int ret = 0; + + if (s->type != FTYPE_FILE) { + error_setg(errp, "Can only reopen files"); + return -EINVAL; + } + + rs = g_new0(BDRVRawReopenState, 1); + + /* + * We do not support changing any options (only flags). By leaving + * all options in state->options, we tell the generic reopen code + * that we do not support changing any of them, so it will verify + * that their values did not change. + */ + + raw_parse_flags(state->flags, s->aio != NULL, &access_flags, &overlapped); + rs->hfile = CreateFile(state->bs->filename, access_flags, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, + OPEN_EXISTING, overlapped, NULL); + + if (rs->hfile == INVALID_HANDLE_VALUE) { + int err = GetLastError(); + + error_setg_win32(errp, err, "Could not reopen '%s'", + state->bs->filename); + if (err == ERROR_ACCESS_DENIED) { + ret = -EACCES; + } else { + ret = -EINVAL; + } + goto fail; + } + + if (s->aio) { + ret = win32_aio_attach(s->aio, rs->hfile); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not enable AIO"); + CloseHandle(rs->hfile); + goto fail; + } + } + + state->opaque = rs; + + return 0; + +fail: + g_free(rs); + state->opaque = NULL; + + return ret; +} + +static void raw_reopen_commit(BDRVReopenState *state) +{ + BDRVRawState *s = state->bs->opaque; + BDRVRawReopenState *rs = state->opaque; + + assert(rs != NULL); + + CloseHandle(s->hfile); + s->hfile = rs->hfile; + + g_free(rs); + state->opaque = NULL; +} + +static void raw_reopen_abort(BDRVReopenState *state) +{ + BDRVRawReopenState *rs = state->opaque; + + if (!rs) { + return; + } + + if (rs->hfile != INVALID_HANDLE_VALUE) { + CloseHandle(rs->hfile); + } + + g_free(rs); + state->opaque = NULL; +} + static QemuOptsList raw_create_opts = { .name = "raw-create-opts", .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), @@ -659,6 +754,10 @@ BlockDriver bdrv_file = { .bdrv_co_create_opts = raw_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, + .bdrv_aio_preadv = raw_aio_preadv, .bdrv_aio_pwritev = raw_aio_pwritev, .bdrv_aio_flush = raw_aio_flush, diff --git a/block/filter-compress.c b/block/filter-compress.c index 5136371bf8b..d5be538619a 100644 --- a/block/filter-compress.c +++ b/block/filter-compress.c @@ -63,10 +63,10 @@ static int64_t compress_getlength(BlockDriverState *bs) static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, - int flags) + BdrvRequestFlags flags) { return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset, flags); @@ -74,10 +74,11 @@ static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs, static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs, - uint64_t offset, - uint64_t bytes, + int64_t offset, + int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, int flags) + size_t qiov_offset, + BdrvRequestFlags flags) { return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset, flags | BDRV_REQ_WRITE_COMPRESSED); @@ -85,7 +86,7 @@ static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs, static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); @@ -93,7 +94,7 @@ static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_co_pdiscard(bs->file, offset, bytes); } diff --git a/block/gluster.c b/block/gluster.c index e8ee14c8e9b..398976bc66d 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -891,6 +891,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_transfer = GLUSTER_MAX_TRANSFER; + bs->bl.max_pdiscard = SIZE_MAX; } static int qemu_gluster_reopen_prepare(BDRVReopenState *state, @@ -1003,19 +1004,19 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state) #ifdef CONFIG_GLUSTERFS_ZEROFILL static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int size, + int64_t bytes, BdrvRequestFlags flags) { int ret; GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; - acb.size = size; + acb.size = bytes; acb.ret = 0; acb.coroutine = qemu_coroutine_self(); acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb); + ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb); if (ret < 0) { return -errno; } @@ -1297,18 +1298,20 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) #ifdef CONFIG_GLUSTERFS_DISCARD static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs, - int64_t offset, int size) + int64_t offset, int64_t bytes) { int ret; GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; + assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */ + acb.size = 0; acb.ret = 0; acb.coroutine = qemu_coroutine_self(); acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb); + ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb); if (ret < 0) { return -errno; } @@ -1461,7 +1464,8 @@ static int find_allocation(BlockDriverState *bs, off_t start, * the specified offset) that are known to be in the same * allocated/unallocated state. * - * 'bytes' is the max value 'pnum' should be set to. + * 'bytes' is a soft cap for 'pnum'. If the information is free, 'pnum' may + * well exceed it. * * (Based on raw_co_block_status() from file-posix.c.) */ @@ -1477,6 +1481,8 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, off_t data = 0, hole = 0; int ret = -EINVAL; + assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); + if (!s->fd) { return ret; } @@ -1500,12 +1506,26 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, } else if (data == offset) { /* On a data extent, compute bytes to the end of the extent, * possibly including a partial sector at EOF. */ - *pnum = MIN(bytes, hole - offset); + *pnum = hole - offset; + + /* + * We are not allowed to return partial sectors, though, so + * round up if necessary. + */ + if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) { + int64_t file_length = qemu_gluster_getlength(bs); + if (file_length > 0) { + /* Ignore errors, this is just a safeguard */ + assert(hole == file_length); + } + *pnum = ROUND_UP(*pnum, bs->bl.request_alignment); + } + ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute bytes to the beginning of the next extent. */ assert(hole == offset); - *pnum = MIN(bytes, data - offset); + *pnum = data - offset; ret = BDRV_BLOCK_ZERO; } diff --git a/block/io.c b/block/io.c index ca2dca30070..bb0a254def1 100644 --- a/block/io.c +++ b/block/io.c @@ -30,6 +30,7 @@ #include "block/blockjob_int.h" #include "block/block_int.h" #include "block/coroutines.h" +#include "block/write-threshold.h" #include "qemu/cutils.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -124,22 +125,54 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) { + dst->pdiscard_alignment = MAX(dst->pdiscard_alignment, + src->pdiscard_alignment); dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer); + dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer, + src->max_hw_transfer); dst->opt_mem_alignment = MAX(dst->opt_mem_alignment, src->opt_mem_alignment); dst->min_mem_alignment = MAX(dst->min_mem_alignment, src->min_mem_alignment); dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov); + dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov); } -void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) +typedef struct BdrvRefreshLimitsState { + BlockDriverState *bs; + BlockLimits old_bl; +} BdrvRefreshLimitsState; + +static void bdrv_refresh_limits_abort(void *opaque) +{ + BdrvRefreshLimitsState *s = opaque; + + s->bs->bl = s->old_bl; +} + +static TransactionActionDrv bdrv_refresh_limits_drv = { + .abort = bdrv_refresh_limits_abort, + .clean = g_free, +}; + +/* @tran is allowed to be NULL, in this case no rollback is possible. */ +void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) { ERRP_GUARD(); BlockDriver *drv = bs->drv; BdrvChild *c; bool have_limits; + if (tran) { + BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1); + *s = (BdrvRefreshLimitsState) { + .bs = bs, + .old_bl = bs->bl, + }; + tran_add(tran, &bdrv_refresh_limits_drv, s); + } + memset(&bs->bl, 0, sizeof(bs->bl)); if (!drv) { @@ -156,7 +189,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) QLIST_FOREACH(c, &bs->children, next) { if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW)) { - bdrv_refresh_limits(c->bs, errp); + bdrv_refresh_limits(c->bs, tran, errp); if (*errp) { return; } @@ -924,9 +957,9 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, return waited; } -static int bdrv_check_qiov_request(int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, - Error **errp) +int bdrv_check_qiov_request(int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + Error **errp) { /* * Check generic offset/bytes correctness @@ -1198,7 +1231,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, int flags) + size_t qiov_offset, + BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; int64_t sector_num; @@ -1809,7 +1843,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, NULL); if (ret < 0) { - return ret; + goto fail; } tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); @@ -1817,10 +1851,11 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, bs->bl.request_alignment, qiov, qiov_offset, flags); tracked_request_end(&req); - bdrv_dec_in_flight(bs); - bdrv_padding_destroy(&pad); +fail: + bdrv_dec_in_flight(bs); + return ret; } @@ -1835,7 +1870,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int head = 0; int tail = 0; - int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX); + int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, + INT64_MAX); int alignment = MAX(bs->bl.pwrite_zeroes_alignment, bs->bl.request_alignment); int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER); @@ -1850,6 +1886,9 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, return -ENOTSUP; } + /* Invalidate the cached block-status data range if this write overlaps */ + bdrv_bsc_invalidate_range(bs, offset, bytes); + assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; tail = (offset + bytes) % alignment; @@ -1945,7 +1984,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, bdrv_check_request(offset, bytes, &error_abort); - if (bs->read_only) { + if (bdrv_is_read_only(bs)) { return -EPERM; } @@ -1981,8 +2020,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, } else { assert(child->perm & BLK_PERM_WRITE); } - return notifier_with_return_list_notify(&bs->before_write_notifiers, - req); + bdrv_write_threshold_check_write(bs, offset, bytes); + return 0; case BDRV_TRACKED_TRUNCATE: assert(child->perm & BLK_PERM_RESIZE); return 0; @@ -2037,7 +2076,8 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes, */ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, int64_t bytes, - int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) { BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; @@ -2210,7 +2250,11 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, return -ENOMEDIUM; } - ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset); + if (flags & BDRV_REQ_ZERO_WRITE) { + ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL); + } else { + ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset); + } if (ret < 0) { return ret; } @@ -2414,9 +2458,65 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset; if (bs->drv->bdrv_co_block_status) { - ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, - aligned_bytes, pnum, &local_map, - &local_file); + /* + * Use the block-status cache only for protocol nodes: Format + * drivers are generally quick to inquire the status, but protocol + * drivers often need to get information from outside of qemu, so + * we do not have control over the actual implementation. There + * have been cases where inquiring the status took an unreasonably + * long time, and we can do nothing in qemu to fix it. + * This is especially problematic for images with large data areas, + * because finding the few holes in them and giving them special + * treatment does not gain much performance. Therefore, we try to + * cache the last-identified data region. + * + * Second, limiting ourselves to protocol nodes allows us to assume + * the block status for data regions to be DATA | OFFSET_VALID, and + * that the host offset is the same as the guest offset. + * + * Note that it is possible that external writers zero parts of + * the cached regions without the cache being invalidated, and so + * we may report zeroes as data. This is not catastrophic, + * however, because reporting zeroes as data is fine. + */ + if (QLIST_EMPTY(&bs->children) && + bdrv_bsc_is_data(bs, aligned_offset, pnum)) + { + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + local_file = bs; + local_map = aligned_offset; + } else { + ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, + aligned_bytes, pnum, &local_map, + &local_file); + + /* + * Note that checking QLIST_EMPTY(&bs->children) is also done when + * the cache is queried above. Technically, we do not need to check + * it here; the worst that can happen is that we fill the cache for + * non-protocol nodes, and then it is never used. However, filling + * the cache requires an RCU update, so double check here to avoid + * such an update if possible. + */ + if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && + QLIST_EMPTY(&bs->children)) + { + /* + * When a protocol driver reports BLOCK_OFFSET_VALID, the + * returned local_map value must be the same as the offset we + * have passed (aligned_offset), and local_bs must be the node + * itself. + * Assert this, because we follow this rule when reading from + * the cache (see the `local_file = bs` and + * `local_map = aligned_offset` assignments above), and the + * result the cache delivers must be the same as the driver + * would deliver. + */ + assert(local_file == bs); + assert(local_map == aligned_offset); + bdrv_bsc_fill(bs, aligned_offset, *pnum); + } + } } else { /* Default code for filters */ @@ -2718,7 +2818,12 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); - int ret = -ENOTSUP; + int ret; + + ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); + if (ret < 0) { + return ret; + } if (!drv) { return -ENOMEDIUM; @@ -2730,6 +2835,8 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) ret = drv->bdrv_load_vmstate(bs, qiov, pos); } else if (child_bs) { ret = bdrv_co_readv_vmstate(child_bs, qiov, pos); + } else { + ret = -ENOTSUP; } bdrv_dec_in_flight(bs); @@ -2742,7 +2849,12 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); - int ret = -ENOTSUP; + int ret; + + ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); + if (ret < 0) { + return ret; + } if (!drv) { return -ENOMEDIUM; @@ -2754,6 +2866,8 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) ret = drv->bdrv_save_vmstate(bs, qiov, pos); } else if (child_bs) { ret = bdrv_co_writev_vmstate(child_bs, qiov, pos); + } else { + ret = -ENOTSUP; } bdrv_dec_in_flight(bs); @@ -2943,7 +3057,8 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) { BdrvTrackedRequest req; - int max_pdiscard, ret; + int ret; + int64_t max_pdiscard; int head, tail, align; BlockDriverState *bs = child->bs; @@ -2969,6 +3084,9 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, return 0; } + /* Invalidate the cached block-status data range if this discard overlaps */ + bdrv_bsc_invalidate_range(bs, offset, bytes); + /* Discard is advisory, but some devices track and coalesce * unaligned requests, so we must pass everything down rather than * round here. Still, most devices will just silently ignore @@ -2987,7 +3105,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, goto out; } - max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX), + max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX), align); assert(max_pdiscard >= bs->bl.request_alignment); @@ -3137,12 +3255,6 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return true; } -void bdrv_add_before_write_notifier(BlockDriverState *bs, - NotifierWithReturn *notifier) -{ - notifier_with_return_list_add(&bs->before_write_notifiers, notifier); -} - void bdrv_io_plug(BlockDriverState *bs) { BdrvChild *child; @@ -3368,6 +3480,11 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, return old_size; } + if (bdrv_is_read_only(bs)) { + error_setg(errp, "Image is read-only"); + return -EACCES; + } + if (offset > old_size) { new_bytes = offset - old_size; } else { @@ -3384,11 +3501,6 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, if (new_bytes) { bdrv_make_request_serialising(&req, 1); } - if (bs->read_only) { - error_setg(errp, "Image is read-only"); - ret = -EACCES; - goto out; - } ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req, 0); if (ret < 0) { diff --git a/block/io_uring.c b/block/io_uring.c index 00a3ee9fb85..dfa475cc874 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -165,7 +165,21 @@ static void luring_process_completions(LuringState *s) total_bytes = ret + luringcb->total_read; if (ret < 0) { - if (ret == -EINTR) { + /* + * Only writev/readv/fsync requests on regular files or host block + * devices are submitted. Therefore -EAGAIN is not expected but it's + * known to happen sometimes with Linux SCSI. Submit again and hope + * the request completes successfully. + * + * For more information, see: + * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u + * + * If the code is changed to submit other types of requests in the + * future, then this workaround may need to be extended to deal with + * genuine -EAGAIN results that should not be resubmitted + * immediately. + */ + if (ret == -EINTR || ret == -EAGAIN) { luring_resubmit(s, luringcb); continue; } diff --git a/block/iscsi-opts.c b/block/iscsi-opts.c index afaf8837d6c..4f2da405e64 100644 --- a/block/iscsi-opts.c +++ b/block/iscsi-opts.c @@ -68,3 +68,4 @@ static void iscsi_block_opts_init(void) } block_init(iscsi_block_opts_init); +module_opts("iscsi"); diff --git a/block/iscsi.c b/block/iscsi.c index 4d2a416ce77..57aa07a40d7 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -427,14 +427,14 @@ static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun) return sector * BDRV_SECTOR_SIZE / iscsilun->block_size; } -static bool is_byte_request_lun_aligned(int64_t offset, int count, +static bool is_byte_request_lun_aligned(int64_t offset, int64_t bytes, IscsiLun *iscsilun) { - if (offset % iscsilun->block_size || count % iscsilun->block_size) { + if (offset % iscsilun->block_size || bytes % iscsilun->block_size) { error_report("iSCSI misaligned request: " "iscsilun->block_size %u, offset %" PRIi64 - ", count %d", - iscsilun->block_size, offset, count); + ", bytes %" PRIi64, + iscsilun->block_size, offset, bytes); return false; } return true; @@ -781,9 +781,6 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, iscsi_allocmap_set_allocated(iscsilun, offset, *pnum); } - if (*pnum > bytes) { - *pnum = bytes; - } out_unlock: qemu_mutex_unlock(&iscsilun->mutex); g_free(iTask.err_str); @@ -1141,7 +1138,8 @@ iscsi_getlength(BlockDriverState *bs) } static int -coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, + int64_t bytes) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; @@ -1157,6 +1155,12 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) return 0; } + /* + * We don't want to overflow list.num which is uint32_t. + * We rely on our max_pdiscard. + */ + assert(bytes / iscsilun->block_size <= UINT32_MAX); + list.lba = offset / iscsilun->block_size; list.num = bytes / iscsilun->block_size; @@ -1205,12 +1209,12 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) static int coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int bytes, BdrvRequestFlags flags) + int64_t bytes, BdrvRequestFlags flags) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; uint64_t lba; - uint32_t nb_blocks; + uint64_t nb_blocks; bool use_16_for_ws = iscsilun->use_16_for_rw; int r = 0; @@ -1250,11 +1254,21 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, iscsi_co_init_iscsitask(iscsilun, &iTask); retry: if (use_16_for_ws) { + /* + * iscsi_writesame16_task num_blocks argument is uint32_t. We rely here + * on our max_pwrite_zeroes limit. + */ + assert(nb_blocks <= UINT32_MAX); iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba, iscsilun->zeroblock, iscsilun->block_size, nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), 0, 0, iscsi_co_generic_cb, &iTask); } else { + /* + * iscsi_writesame10_task num_blocks argument is uint16_t. We rely here + * on our max_pwrite_zeroes limit. + */ + assert(nb_blocks <= UINT16_MAX); iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba, iscsilun->zeroblock, iscsilun->block_size, nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), @@ -2064,20 +2078,19 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp) } if (iscsilun->lbp.lbpu) { - if (iscsilun->bl.max_unmap < 0xffffffff / block_size) { - bs->bl.max_pdiscard = - iscsilun->bl.max_unmap * iscsilun->block_size; - } + bs->bl.max_pdiscard = + MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size, + (uint64_t)UINT32_MAX * iscsilun->block_size); bs->bl.pdiscard_alignment = iscsilun->bl.opt_unmap_gran * iscsilun->block_size; } else { bs->bl.pdiscard_alignment = iscsilun->block_size; } - if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) { - bs->bl.max_pwrite_zeroes = - iscsilun->bl.max_ws_len * iscsilun->block_size; - } + bs->bl.max_pwrite_zeroes = + MIN_NON_ZERO(iscsilun->bl.max_ws_len * iscsilun->block_size, + max_xfer_len * iscsilun->block_size); + if (iscsilun->lbp.lbpws) { bs->bl.pwrite_zeroes_alignment = iscsilun->bl.opt_unmap_gran * iscsilun->block_size; @@ -2172,10 +2185,10 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs, static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs, BdrvChild *src, - uint64_t src_offset, + int64_t src_offset, BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, + int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { @@ -2313,10 +2326,10 @@ static void iscsi_xcopy_data(struct iscsi_data *data, static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs, BdrvChild *src, - uint64_t src_offset, + int64_t src_offset, BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, + int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { diff --git a/block/linux-aio.c b/block/linux-aio.c index 3c0527c2bf8..f53ae72e21f 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -28,6 +28,9 @@ */ #define MAX_EVENTS 1024 +/* Maximum number of requests in a batch. (default value) */ +#define DEFAULT_MAX_BATCH 32 + struct qemu_laiocb { Coroutine *co; LinuxAioState *ctx; @@ -331,22 +334,41 @@ static void ioq_submit(LinuxAioState *s) } } +static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch) +{ + uint64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH; + + /* + * AIO context can be shared between multiple block devices, so + * `dev_max_batch` allows reducing the batch size for latency-sensitive + * devices. + */ + max_batch = MIN_NON_ZERO(dev_max_batch, max_batch); + + /* limit the batch with the number of available events */ + max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch); + + return max_batch; +} + void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) { s->io_q.plugged++; } -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s) +void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch) { assert(s->io_q.plugged); - if (--s->io_q.plugged == 0 && - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || + (--s->io_q.plugged == 0 && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { ioq_submit(s); } } static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, - int type) + int type, uint64_t dev_max_batch) { LinuxAioState *s = laiocb->ctx; struct iocb *iocbs = &laiocb->iocb; @@ -371,7 +393,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, s->io_q.in_queue++; if (!s->io_q.blocked && (!s->io_q.plugged || - s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) { + s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) { ioq_submit(s); } @@ -379,7 +401,8 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, } int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - uint64_t offset, QEMUIOVector *qiov, int type) + uint64_t offset, QEMUIOVector *qiov, int type, + uint64_t dev_max_batch) { int ret; struct qemu_laiocb laiocb = { @@ -391,7 +414,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, .qiov = qiov, }; - ret = laio_do_submit(fd, &laiocb, offset, type); + ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch); if (ret < 0) { return ret; } diff --git a/block/meson.build b/block/meson.build index d21990ec95a..deb73ca389f 100644 --- a/block/meson.build +++ b/block/meson.build @@ -4,7 +4,7 @@ block_ss.add(files( 'aio_task.c', 'amend.c', 'backup.c', - 'backup-top.c', + 'copy-before-write.c', 'blkdebug.c', 'blklogwrites.c', 'blkverify.c', @@ -13,6 +13,7 @@ block_ss.add(files( 'commit.c', 'copy-on-read.c', 'preallocate.c', + 'progress_meter.c', 'create.c', 'crypto.c', 'dirty-bitmap.c', @@ -64,27 +65,26 @@ block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c')) block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c')) block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c')) -block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c')) -block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c')) -block_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: files('io_uring.c')) +block_ss.add(when: libaio, if_true: files('linux-aio.c')) +block_ss.add(when: linux_io_uring, if_true: files('io_uring.c')) block_modules = {} modsrc = [] foreach m : [ - [curl, 'curl', [curl, glib], 'curl.c'], - [glusterfs, 'gluster', glusterfs, 'gluster.c'], - [libiscsi, 'iscsi', libiscsi, 'iscsi.c'], - [libnfs, 'nfs', libnfs, 'nfs.c'], - [libssh, 'ssh', libssh, 'ssh.c'], - [rbd, 'rbd', rbd, 'rbd.c'], + [curl, 'curl', files('curl.c')], + [glusterfs, 'gluster', files('gluster.c')], + [libiscsi, 'iscsi', [files('iscsi.c'), libm]], + [libnfs, 'nfs', files('nfs.c')], + [libssh, 'ssh', files('ssh.c')], + [rbd, 'rbd', files('rbd.c')], ] if m[0].found() + module_ss = ss.source_set() + module_ss.add(when: m[0], if_true: m[2]) if enable_modules - modsrc += files(m[3]) + modsrc += module_ss.all_sources() endif - module_ss = ss.source_set() - module_ss.add(when: m[2], if_true: files(m[3])) block_modules += {m[1] : module_ss} endif endforeach diff --git a/block/mirror.c b/block/mirror.c index 5a71bd8bbcb..efec2c7674b 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -56,7 +56,6 @@ typedef struct MirrorBlockJob { bool zero_target; MirrorCopyMode copy_mode; BlockdevOnError on_source_error, on_target_error; - bool synced; /* Set when the target is synced (dirty bitmap is clean, nothing * in flight) and the job is running in active mode */ bool actively_synced; @@ -107,6 +106,7 @@ struct MirrorOp { bool is_in_flight; CoQueue waiting_requests; Coroutine *co; + MirrorOp *waiting_for_op; QTAILQ_ENTRY(MirrorOp) next; }; @@ -120,7 +120,6 @@ typedef enum MirrorMethod { static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, int error) { - s->synced = false; s->actively_synced = false; if (read) { return block_job_error_action(&s->common, s->on_source_error, @@ -159,7 +158,25 @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self, if (ranges_overlap(self_start_chunk, self_nb_chunks, op_start_chunk, op_nb_chunks)) { + if (self) { + /* + * If the operation is already (indirectly) waiting for us, + * or will wait for us as soon as it wakes up, then just go + * on (instead of producing a deadlock in the former case). + */ + if (op->waiting_for_op) { + continue; + } + + self->waiting_for_op = op; + } + qemu_co_queue_wait(&op->waiting_requests, NULL); + + if (self) { + self->waiting_for_op = NULL; + } + break; } } @@ -925,12 +942,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) if (s->bdev_length == 0) { /* Transition to the READY state and wait for complete. */ job_transition_to_ready(&s->common.job); - s->synced = true; s->actively_synced = true; - while (!job_is_cancelled(&s->common.job) && !s->should_complete) { + while (!job_cancel_requested(&s->common.job) && !s->should_complete) { job_yield(&s->common.job); } - s->common.job.cancelled = false; goto immediate_exit; } @@ -991,6 +1006,11 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) job_pause_point(&s->common.job); + if (job_is_cancelled(&s->common.job)) { + ret = 0; + goto immediate_exit; + } + cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is * the number of bytes currently being processed; together those are @@ -1017,7 +1037,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) should_complete = false; if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); - if (!s->synced) { + if (!job_is_ready(&s->common.job)) { if (mirror_flush(s) < 0) { /* Go check s->ret. */ continue; @@ -1028,14 +1048,13 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) * the target in a consistent state. */ job_transition_to_ready(&s->common.job); - s->synced = true; if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) { s->actively_synced = true; } } should_complete = s->should_complete || - job_is_cancelled(&s->common.job); + job_cancel_requested(&s->common.job); cnt = bdrv_get_dirty_count(s->dirty_bitmap); } @@ -1065,24 +1084,17 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) * completion. */ assert(QLIST_EMPTY(&bs->tracked_requests)); - s->common.job.cancelled = false; need_drain = false; break; } - ret = 0; - - if (s->synced && !should_complete) { + if (job_is_ready(&s->common.job) && !should_complete) { delay_ns = (s->in_flight == 0 && cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0); } - trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); + trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job), + delay_ns); job_sleep_ns(&s->common.job, delay_ns); - if (job_is_cancelled(&s->common.job) && - (!s->synced || s->common.job.force_cancel)) - { - break; - } s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } @@ -1092,8 +1104,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) * or it was cancelled prematurely so that we do not guarantee that * the target is a copy of the source. */ - assert(ret < 0 || ((s->common.job.force_cancel || !s->synced) && - job_is_cancelled(&s->common.job))); + assert(ret < 0 || job_is_cancelled(&s->common.job)); assert(need_drain); mirror_wait_for_all_io(s); } @@ -1116,7 +1127,7 @@ static void mirror_complete(Job *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - if (!s->synced) { + if (!job_is_ready(job)) { error_setg(errp, "The active block job '%s' cannot be completed", job->id); return; @@ -1171,19 +1182,34 @@ static bool mirror_drained_poll(BlockJob *job) * from one of our own drain sections, to avoid a deadlock waiting for * ourselves. */ - if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) { + if (!s->common.job.paused && !job_is_cancelled(&job->job) && !s->in_drain) { return true; } return !!s->in_flight; } -static void mirror_cancel(Job *job) +static bool mirror_cancel(Job *job, bool force) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); BlockDriverState *target = blk_bs(s->target); - bdrv_cancel_in_flight(target); + /* + * Before the job is READY, we treat any cancellation like a + * force-cancellation. + */ + force = force || !job_is_ready(job); + + if (force) { + bdrv_cancel_in_flight(target); + } + return force; +} + +static bool commit_active_cancel(Job *job, bool force) +{ + /* Same as above in mirror_cancel() */ + return force || !job_is_ready(job); } static const BlockJobDriver mirror_job_driver = { @@ -1213,6 +1239,7 @@ static const BlockJobDriver commit_active_job_driver = { .abort = mirror_abort, .pause = mirror_pause, .complete = mirror_complete, + .cancel = commit_active_cancel, }, .drained_poll = mirror_drained_poll, }; @@ -1341,6 +1368,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, .bytes = bytes, .is_active_write = true, .is_in_flight = true, + .co = qemu_coroutine_self(), }; qemu_co_queue_init(&op->waiting_requests); QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); @@ -1380,7 +1408,7 @@ static void coroutine_fn active_write_settle(MirrorOp *op) } static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); } @@ -1395,6 +1423,7 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, bool copy_to_target; copy_to_target = s->job->ret >= 0 && + !job_is_cancelled(&s->job->common.job) && s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; if (copy_to_target) { @@ -1434,7 +1463,7 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, } static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { MirrorBDSOpaque *s = bs->opaque; QEMUIOVector bounce_qiov; @@ -1443,6 +1472,7 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, bool copy_to_target; copy_to_target = s->job->ret >= 0 && + !job_is_cancelled(&s->job->common.job) && s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; if (copy_to_target) { @@ -1479,14 +1509,14 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) } static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags) + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL, flags); } static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes, NULL, 0); @@ -1630,9 +1660,6 @@ static BlockJob *mirror_start_job( bs_opaque->is_commit = target_is_backing; - /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep - * it alive until block_job_create() succeeds even if bs has no parent. */ - bdrv_ref(mirror_top_bs); bdrv_drained_begin(bs); ret = bdrv_append(mirror_top_bs, bs, errp); bdrv_drained_end(bs); diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index ebf1033f318..2ac4aedfff0 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -251,10 +251,10 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) if (!filename) { error_setg(&err, QERR_MISSING_PARAMETER, "target"); - hmp_handle_error(mon, err); - return; + goto end; } qmp_drive_mirror(&mirror, &err); +end: hmp_handle_error(mon, err); } @@ -281,11 +281,11 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict) if (!filename) { error_setg(&err, QERR_MISSING_PARAMETER, "target"); - hmp_handle_error(mon, err); - return; + goto end; } qmp_drive_backup(&backup, &err); +end: hmp_handle_error(mon, err); } @@ -356,8 +356,7 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) * will be taken internally. Today it's actually required. */ error_setg(&err, QERR_MISSING_PARAMETER, "snapshot-file"); - hmp_handle_error(mon, err); - return; + goto end; } mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS; @@ -365,6 +364,7 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) filename, false, NULL, !!format, format, true, mode, &err); +end: hmp_handle_error(mon, err); } @@ -557,8 +557,10 @@ void hmp_eject(Monitor *mon, const QDict *qdict) void hmp_qemu_io(Monitor *mon, const QDict *qdict) { - BlockBackend *blk; + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; BlockBackend *local_blk = NULL; + AioContext *ctx = NULL; bool qdev = qdict_get_try_bool(qdict, "qdev", false); const char *device = qdict_get_str(qdict, "device"); const char *command = qdict_get_str(qdict, "command"); @@ -573,20 +575,24 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) } else { blk = blk_by_name(device); if (!blk) { - BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); - if (bs) { - blk = local_blk = blk_new(bdrv_get_aio_context(bs), - 0, BLK_PERM_ALL); - ret = blk_insert_bs(blk, bs, &err); - if (ret < 0) { - goto fail; - } - } else { + bs = bdrv_lookup_bs(NULL, device, &err); + if (!bs) { goto fail; } } } + ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + + if (bs) { + blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &err); + if (ret < 0) { + goto fail; + } + } + /* * Notably absent: Proper permission management. This is sad, but it seems * almost impossible to achieve without changing the semantics and thereby @@ -616,6 +622,11 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) fail: blk_unref(local_blk); + + if (ctx) { + aio_context_release(ctx); + } + hmp_handle_error(mon, err); } diff --git a/block/nbd.c b/block/nbd.c index 1d4668d42d9..5ef462db1b7 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -44,6 +44,7 @@ #include "block/qdict.h" #include "block/nbd.h" #include "block/block_int.h" +#include "block/coroutines.h" #include "qemu/yank.h" @@ -56,7 +57,8 @@ typedef struct { Coroutine *coroutine; uint64_t offset; /* original offset of the request */ - bool receiving; /* waiting for connection_co? */ + bool receiving; /* sleeping in the yield in nbd_receive_replies */ + bool reply_possible; /* reply header not yet received */ } NBDClientRequest; typedef enum NBDClientState { @@ -66,64 +68,16 @@ typedef enum NBDClientState { NBD_CLIENT_QUIT } NBDClientState; -typedef enum NBDConnectThreadState { - /* No thread, no pending results */ - CONNECT_THREAD_NONE, - - /* Thread is running, no results for now */ - CONNECT_THREAD_RUNNING, - - /* - * Thread is running, but requestor exited. Thread should close - * the new socket and free the connect state on exit. - */ - CONNECT_THREAD_RUNNING_DETACHED, - - /* Thread finished, results are stored in a state */ - CONNECT_THREAD_FAIL, - CONNECT_THREAD_SUCCESS -} NBDConnectThreadState; - -typedef struct NBDConnectThread { - /* Initialization constants */ - SocketAddress *saddr; /* address to connect to */ - /* - * Bottom half to schedule on completion. Scheduled only if bh_ctx is not - * NULL - */ - QEMUBHFunc *bh_func; - void *bh_opaque; - - /* - * Result of last attempt. Valid in FAIL and SUCCESS states. - * If you want to steal error, don't forget to set pointer to NULL. - */ - QIOChannelSocket *sioc; - Error *err; - - /* state and bh_ctx are protected by mutex */ - QemuMutex mutex; - NBDConnectThreadState state; /* current state of the thread */ - AioContext *bh_ctx; /* where to schedule bh (NULL means don't schedule) */ -} NBDConnectThread; - typedef struct BDRVNBDState { - QIOChannelSocket *sioc; /* The master data channel */ - QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + QIOChannel *ioc; /* The current I/O channel */ NBDExportInfo info; CoMutex send_mutex; CoQueue free_sema; - Coroutine *connection_co; - Coroutine *teardown_co; - QemuCoSleepState *connection_co_sleep_ns_state; - bool drained; - bool wait_drained_end; + + CoMutex receive_mutex; int in_flight; NBDClientState state; - int connect_status; - Error *connect_err; - bool wait_in_flight; QEMUTimer *reconnect_delay_timer; @@ -140,20 +94,20 @@ typedef struct BDRVNBDState { char *x_dirty_bitmap; bool alloc_depth; - bool wait_connect; - NBDConnectThread *connect_thread; + NBDClientConnection *conn; } BDRVNBDState; -static int nbd_establish_connection(BlockDriverState *bs, SocketAddress *saddr, - Error **errp); -static int nbd_co_establish_connection(BlockDriverState *bs, Error **errp); -static void nbd_co_establish_connection_cancel(BlockDriverState *bs, - bool detach); -static int nbd_client_handshake(BlockDriverState *bs, Error **errp); static void nbd_yank(void *opaque); -static void nbd_clear_bdrvstate(BDRVNBDState *s) +static void nbd_clear_bdrvstate(BlockDriverState *bs) { + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + + nbd_client_connection_release(s->conn); + s->conn = NULL; + + yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); + object_unref(OBJECT(s->tlscreds)); qapi_free_SocketAddress(s->saddr); s->saddr = NULL; @@ -165,32 +119,49 @@ static void nbd_clear_bdrvstate(BDRVNBDState *s) s->x_dirty_bitmap = NULL; } -static void nbd_channel_error(BDRVNBDState *s, int ret) +static bool nbd_client_connected(BDRVNBDState *s) { - if (ret == -EIO) { - if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED) { - s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT : - NBD_CLIENT_CONNECTING_NOWAIT; - } - } else { - if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED) { - qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); - } - s->state = NBD_CLIENT_QUIT; + return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED; +} + +static bool nbd_recv_coroutine_wake_one(NBDClientRequest *req) +{ + if (req->receiving) { + req->receiving = false; + aio_co_wake(req->coroutine); + return true; } + + return false; } -static void nbd_recv_coroutines_wake_all(BDRVNBDState *s) +static void nbd_recv_coroutines_wake(BDRVNBDState *s, bool all) { int i; for (i = 0; i < MAX_NBD_REQUESTS; i++) { - NBDClientRequest *req = &s->requests[i]; + if (nbd_recv_coroutine_wake_one(&s->requests[i]) && !all) { + return; + } + } +} + +static void nbd_channel_error(BDRVNBDState *s, int ret) +{ + if (nbd_client_connected(s)) { + qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } - if (req->coroutine && req->receiving) { - aio_co_wake(req->coroutine); + if (ret == -EIO) { + if (nbd_client_connected(s)) { + s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT : + NBD_CLIENT_CONNECTING_NOWAIT; } + } else { + s->state = NBD_CLIENT_QUIT; } + + nbd_recv_coroutines_wake(s, true); } static void reconnect_delay_timer_del(BDRVNBDState *s) @@ -207,6 +178,7 @@ static void reconnect_delay_timer_cb(void *opaque) if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) { s->state = NBD_CLIENT_CONNECTING_NOWAIT; + nbd_co_establish_connection_cancel(s->conn); while (qemu_co_enter_next(&s->free_sema, NULL)) { /* Resume all queued requests */ } @@ -229,121 +201,21 @@ static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) timer_mod(s->reconnect_delay_timer, expire_time_ns); } -static void nbd_client_detach_aio_context(BlockDriverState *bs) -{ - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - - /* Timer is deleted in nbd_client_co_drain_begin() */ - assert(!s->reconnect_delay_timer); - /* - * If reconnect is in progress we may have no ->ioc. It will be - * re-instantiated in the proper aio context once the connection is - * reestablished. - */ - if (s->ioc) { - qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); - } -} - -static void nbd_client_attach_aio_context_bh(void *opaque) -{ - BlockDriverState *bs = opaque; - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - - if (s->connection_co) { - /* - * The node is still drained, so we know the coroutine has yielded in - * nbd_read_eof(), the only place where bs->in_flight can reach 0, or - * it is entered for the first time. Both places are safe for entering - * the coroutine. - */ - qemu_aio_coroutine_enter(bs->aio_context, s->connection_co); - } - bdrv_dec_in_flight(bs); -} - -static void nbd_client_attach_aio_context(BlockDriverState *bs, - AioContext *new_context) -{ - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - - /* - * s->connection_co is either yielded from nbd_receive_reply or from - * nbd_co_reconnect_loop() - */ - if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED) { - qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context); - } - - bdrv_inc_in_flight(bs); - - /* - * Need to wait here for the BH to run because the BH must run while the - * node is still drained. - */ - aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs); -} - -static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs) -{ - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - - s->drained = true; - if (s->connection_co_sleep_ns_state) { - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); - } - - nbd_co_establish_connection_cancel(bs, false); - - reconnect_delay_timer_del(s); - - if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) { - s->state = NBD_CLIENT_CONNECTING_NOWAIT; - qemu_co_queue_restart_all(&s->free_sema); - } -} - -static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs) -{ - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - - s->drained = false; - if (s->wait_drained_end) { - s->wait_drained_end = false; - aio_co_wake(s->connection_co); - } -} - - static void nbd_teardown_connection(BlockDriverState *bs) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + assert(!s->in_flight); + if (s->ioc) { - /* finish any pending coroutines */ qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); - } else if (s->sioc) { - /* abort negotiation */ - qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH, - NULL); + yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), + nbd_yank, s->bs); + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; } s->state = NBD_CLIENT_QUIT; - if (s->connection_co) { - if (s->connection_co_sleep_ns_state) { - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); - } - nbd_co_establish_connection_cancel(bs, true); - } - if (qemu_in_coroutine()) { - s->teardown_co = qemu_coroutine_self(); - /* connection_co resumes us when it terminates */ - qemu_coroutine_yield(); - s->teardown_co = NULL; - } else { - BDRV_POLL_WHILE(bs, s->connection_co); - } - assert(!s->connection_co); } static bool nbd_client_connecting(BDRVNBDState *s) @@ -358,260 +230,112 @@ static bool nbd_client_connecting_wait(BDRVNBDState *s) return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT; } -static void connect_bh(void *opaque) -{ - BDRVNBDState *state = opaque; - - assert(state->wait_connect); - state->wait_connect = false; - aio_co_wake(state->connection_co); -} - -static void nbd_init_connect_thread(BDRVNBDState *s) -{ - s->connect_thread = g_new(NBDConnectThread, 1); - - *s->connect_thread = (NBDConnectThread) { - .saddr = QAPI_CLONE(SocketAddress, s->saddr), - .state = CONNECT_THREAD_NONE, - .bh_func = connect_bh, - .bh_opaque = s, - }; - - qemu_mutex_init(&s->connect_thread->mutex); -} - -static void nbd_free_connect_thread(NBDConnectThread *thr) -{ - if (thr->sioc) { - qio_channel_close(QIO_CHANNEL(thr->sioc), NULL); - } - error_free(thr->err); - qapi_free_SocketAddress(thr->saddr); - g_free(thr); -} - -static void *connect_thread_func(void *opaque) +/* + * Update @bs with information learned during a completed negotiation process. + * Return failure if the server's advertised options are incompatible with the + * client's needs. + */ +static int nbd_handle_updated_info(BlockDriverState *bs, Error **errp) { - NBDConnectThread *thr = opaque; + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int ret; - bool do_free = false; - thr->sioc = qio_channel_socket_new(); - - error_free(thr->err); - thr->err = NULL; - ret = qio_channel_socket_connect_sync(thr->sioc, thr->saddr, &thr->err); - if (ret < 0) { - object_unref(OBJECT(thr->sioc)); - thr->sioc = NULL; + if (s->x_dirty_bitmap) { + if (!s->info.base_allocation) { + error_setg(errp, "requested x-dirty-bitmap %s not found", + s->x_dirty_bitmap); + return -EINVAL; + } + if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) { + s->alloc_depth = true; + } } - qemu_mutex_lock(&thr->mutex); - - switch (thr->state) { - case CONNECT_THREAD_RUNNING: - thr->state = ret < 0 ? CONNECT_THREAD_FAIL : CONNECT_THREAD_SUCCESS; - if (thr->bh_ctx) { - aio_bh_schedule_oneshot(thr->bh_ctx, thr->bh_func, thr->bh_opaque); - - /* play safe, don't reuse bh_ctx on further connection attempts */ - thr->bh_ctx = NULL; + if (s->info.flags & NBD_FLAG_READ_ONLY) { + ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp); + if (ret < 0) { + return ret; } - break; - case CONNECT_THREAD_RUNNING_DETACHED: - do_free = true; - break; - default: - abort(); } - qemu_mutex_unlock(&thr->mutex); + if (s->info.flags & NBD_FLAG_SEND_FUA) { + bs->supported_write_flags = BDRV_REQ_FUA; + bs->supported_zero_flags |= BDRV_REQ_FUA; + } - if (do_free) { - nbd_free_connect_thread(thr); + if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) { + bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP; + if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) { + bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK; + } } - return NULL; + trace_nbd_client_handshake_success(s->export); + + return 0; } -static int coroutine_fn -nbd_co_establish_connection(BlockDriverState *bs, Error **errp) +int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, + Error **errp) { + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int ret; - QemuThread thread; - BDRVNBDState *s = bs->opaque; - NBDConnectThread *thr = s->connect_thread; - - if (!thr) { - /* detached */ - return -1; - } - - qemu_mutex_lock(&thr->mutex); - - switch (thr->state) { - case CONNECT_THREAD_FAIL: - case CONNECT_THREAD_NONE: - error_free(thr->err); - thr->err = NULL; - thr->state = CONNECT_THREAD_RUNNING; - qemu_thread_create(&thread, "nbd-connect", - connect_thread_func, thr, QEMU_THREAD_DETACHED); - break; - case CONNECT_THREAD_SUCCESS: - /* Previous attempt finally succeeded in background */ - thr->state = CONNECT_THREAD_NONE; - s->sioc = thr->sioc; - thr->sioc = NULL; - yank_register_function(BLOCKDEV_YANK_INSTANCE(bs->node_name), - nbd_yank, bs); - qemu_mutex_unlock(&thr->mutex); - return 0; - case CONNECT_THREAD_RUNNING: - /* Already running, will wait */ - break; - default: - abort(); - } + bool blocking = nbd_client_connecting_wait(s); - thr->bh_ctx = qemu_get_current_aio_context(); - - qemu_mutex_unlock(&thr->mutex); + assert(!s->ioc); + s->ioc = nbd_co_establish_connection(s->conn, &s->info, blocking, errp); + if (!s->ioc) { + return -ECONNREFUSED; + } - /* - * We are going to wait for connect-thread finish, but - * nbd_client_co_drain_begin() can interrupt. - * - * Note that wait_connect variable is not visible for connect-thread. It - * doesn't need mutex protection, it used only inside home aio context of - * bs. - */ - s->wait_connect = true; - qemu_coroutine_yield(); - - if (!s->connect_thread) { - /* detached */ - return -1; - } - assert(thr == s->connect_thread); - - qemu_mutex_lock(&thr->mutex); - - switch (thr->state) { - case CONNECT_THREAD_SUCCESS: - case CONNECT_THREAD_FAIL: - thr->state = CONNECT_THREAD_NONE; - error_propagate(errp, thr->err); - thr->err = NULL; - s->sioc = thr->sioc; - thr->sioc = NULL; - if (s->sioc) { - yank_register_function(BLOCKDEV_YANK_INSTANCE(bs->node_name), - nbd_yank, bs); - } - ret = (s->sioc ? 0 : -1); - break; - case CONNECT_THREAD_RUNNING: - case CONNECT_THREAD_RUNNING_DETACHED: - /* - * Obviously, drained section wants to start. Report the attempt as - * failed. Still connect thread is executing in background, and its - * result may be used for next connection attempt. - */ - ret = -1; - error_setg(errp, "Connection attempt cancelled by other operation"); - break; + yank_register_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), nbd_yank, + bs); - case CONNECT_THREAD_NONE: + ret = nbd_handle_updated_info(s->bs, NULL); + if (ret < 0) { /* - * Impossible. We've seen this thread running. So it should be - * running or at least give some results. + * We have connected, but must fail for other reasons. + * Send NBD_CMD_DISC as a courtesy to the server. */ - abort(); - - default: - abort(); - } + NBDRequest request = { .type = NBD_CMD_DISC }; - qemu_mutex_unlock(&thr->mutex); + nbd_send_request(s->ioc, &request); - return ret; -} + yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), + nbd_yank, bs); + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; -/* - * nbd_co_establish_connection_cancel - * Cancel nbd_co_establish_connection asynchronously: it will finish soon, to - * allow drained section to begin. - * - * If detach is true, also cleanup the state (or if thread is running, move it - * to CONNECT_THREAD_RUNNING_DETACHED state). s->connect_thread becomes NULL if - * detach is true. - */ -static void nbd_co_establish_connection_cancel(BlockDriverState *bs, - bool detach) -{ - BDRVNBDState *s = bs->opaque; - NBDConnectThread *thr = s->connect_thread; - bool wake = false; - bool do_free = false; - - qemu_mutex_lock(&thr->mutex); - - if (thr->state == CONNECT_THREAD_RUNNING) { - /* We can cancel only in running state, when bh is not yet scheduled */ - thr->bh_ctx = NULL; - if (s->wait_connect) { - s->wait_connect = false; - wake = true; - } - if (detach) { - thr->state = CONNECT_THREAD_RUNNING_DETACHED; - s->connect_thread = NULL; - } - } else if (detach) { - do_free = true; + return ret; } - qemu_mutex_unlock(&thr->mutex); + qio_channel_set_blocking(s->ioc, false, NULL); + qio_channel_attach_aio_context(s->ioc, bdrv_get_aio_context(bs)); - if (do_free) { - nbd_free_connect_thread(thr); - s->connect_thread = NULL; - } + /* successfully connected */ + s->state = NBD_CLIENT_CONNECTED; + qemu_co_queue_restart_all(&s->free_sema); - if (wake) { - aio_co_wake(s->connection_co); - } + return 0; } +/* called under s->send_mutex */ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) { - int ret; - Error *local_err = NULL; - - if (!nbd_client_connecting(s)) { - return; - } - - /* Wait for completion of all in-flight requests */ - - qemu_co_mutex_lock(&s->send_mutex); - - while (s->in_flight > 0) { - qemu_co_mutex_unlock(&s->send_mutex); - nbd_recv_coroutines_wake_all(s); - s->wait_in_flight = true; - qemu_coroutine_yield(); - s->wait_in_flight = false; - qemu_co_mutex_lock(&s->send_mutex); - } - - qemu_co_mutex_unlock(&s->send_mutex); + assert(nbd_client_connecting(s)); + assert(s->in_flight == 0); - if (!nbd_client_connecting(s)) { - return; + if (nbd_client_connecting_wait(s) && s->reconnect_delay && + !s->reconnect_delay_timer) + { + /* + * It's first reconnect attempt after switching to + * NBD_CLIENT_CONNECTING_WAIT + */ + reconnect_delay_timer_init(s, + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + s->reconnect_delay * NANOSECONDS_PER_SECOND); } /* @@ -624,176 +348,80 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), nbd_yank, s->bs); - object_unref(OBJECT(s->sioc)); - s->sioc = NULL; object_unref(OBJECT(s->ioc)); s->ioc = NULL; } - if (nbd_co_establish_connection(s->bs, &local_err) < 0) { - ret = -ECONNREFUSED; - goto out; - } - - bdrv_dec_in_flight(s->bs); - - ret = nbd_client_handshake(s->bs, &local_err); - - if (s->drained) { - s->wait_drained_end = true; - while (s->drained) { - /* - * We may be entered once from nbd_client_attach_aio_context_bh - * and then from nbd_client_co_drain_end. So here is a loop. - */ - qemu_coroutine_yield(); - } - } - bdrv_inc_in_flight(s->bs); - -out: - s->connect_status = ret; - error_free(s->connect_err); - s->connect_err = NULL; - error_propagate(&s->connect_err, local_err); - - if (ret >= 0) { - /* successfully connected */ - s->state = NBD_CLIENT_CONNECTED; - qemu_co_queue_restart_all(&s->free_sema); - } + nbd_co_do_establish_connection(s->bs, NULL); } -static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s) +static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) { - uint64_t timeout = 1 * NANOSECONDS_PER_SECOND; - uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND; + int ret; + uint64_t ind = HANDLE_TO_INDEX(s, handle), ind2; + QEMU_LOCK_GUARD(&s->receive_mutex); - if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) { - reconnect_delay_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + - s->reconnect_delay * NANOSECONDS_PER_SECOND); - } - - nbd_reconnect_attempt(s); - - while (nbd_client_connecting(s)) { - if (s->drained) { - bdrv_dec_in_flight(s->bs); - s->wait_drained_end = true; - while (s->drained) { - /* - * We may be entered once from nbd_client_attach_aio_context_bh - * and then from nbd_client_co_drain_end. So here is a loop. - */ - qemu_coroutine_yield(); - } - bdrv_inc_in_flight(s->bs); - } else { - qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout, - &s->connection_co_sleep_ns_state); - if (s->drained) { - continue; - } - if (timeout < max_timeout) { - timeout *= 2; - } + while (true) { + if (s->reply.handle == handle) { + /* We are done */ + return 0; } - nbd_reconnect_attempt(s); - } - - reconnect_delay_timer_del(s); -} + if (!nbd_client_connected(s)) { + return -EIO; + } -static coroutine_fn void nbd_connection_entry(void *opaque) -{ - BDRVNBDState *s = opaque; - uint64_t i; - int ret = 0; - Error *local_err = NULL; + if (s->reply.handle != 0) { + /* + * Some other request is being handled now. It should already be + * woken by whoever set s->reply.handle (or never wait in this + * yield). So, we should not wake it here. + */ + ind2 = HANDLE_TO_INDEX(s, s->reply.handle); + assert(!s->requests[ind2].receiving); - while (qatomic_load_acquire(&s->state) != NBD_CLIENT_QUIT) { - /* - * The NBD client can only really be considered idle when it has - * yielded from qio_channel_readv_all_eof(), waiting for data. This is - * the point where the additional scheduled coroutine entry happens - * after nbd_client_attach_aio_context(). - * - * Therefore we keep an additional in_flight reference all the time and - * only drop it temporarily here. - */ + s->requests[ind].receiving = true; + qemu_co_mutex_unlock(&s->receive_mutex); - if (nbd_client_connecting(s)) { - nbd_co_reconnect_loop(s); - } + qemu_coroutine_yield(); + /* + * We may be woken for 3 reasons: + * 1. From this function, executing in parallel coroutine, when our + * handle is received. + * 2. From nbd_channel_error(), when connection is lost. + * 3. From nbd_co_receive_one_chunk(), when previous request is + * finished and s->reply.handle set to 0. + * Anyway, it's OK to lock the mutex and go to the next iteration. + */ - if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) { + qemu_co_mutex_lock(&s->receive_mutex); + assert(!s->requests[ind].receiving); continue; } + /* We are under mutex and handle is 0. We have to do the dirty work. */ assert(s->reply.handle == 0); - ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err); - - if (local_err) { - trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err)); - error_free(local_err); - local_err = NULL; - } + ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL); if (ret <= 0) { - nbd_channel_error(s, ret ? ret : -EIO); - continue; + ret = ret ? ret : -EIO; + nbd_channel_error(s, ret); + return ret; } - - /* - * There's no need for a mutex on the receive side, because the - * handler acts as a synchronization point and ensures that only - * one coroutine is called until the reply finishes. - */ - i = HANDLE_TO_INDEX(s, s->reply.handle); - if (i >= MAX_NBD_REQUESTS || - !s->requests[i].coroutine || - !s->requests[i].receiving || - (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply)) - { + if (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply) { nbd_channel_error(s, -EINVAL); - continue; + return -EINVAL; } - - /* - * We're woken up again by the request itself. Note that there - * is no race between yielding and reentering connection_co. This - * is because: - * - * - if the request runs on the same AioContext, it is only - * entered after we yield - * - * - if the request runs on a different AioContext, reentering - * connection_co happens through a bottom half, which can only - * run after we yield. - */ - aio_co_wake(s->requests[i].coroutine); - qemu_coroutine_yield(); - } - - qemu_co_queue_restart_all(&s->free_sema); - nbd_recv_coroutines_wake_all(s); - bdrv_dec_in_flight(s->bs); - - s->connection_co = NULL; - if (s->ioc) { - qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); - yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), - nbd_yank, s->bs); - object_unref(OBJECT(s->sioc)); - s->sioc = NULL; - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; - } - - if (s->teardown_co) { - aio_co_wake(s->teardown_co); + if (s->reply.handle == handle) { + /* We are done */ + return 0; + } + ind2 = HANDLE_TO_INDEX(s, s->reply.handle); + if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].reply_possible) { + nbd_channel_error(s, -EINVAL); + return -EINVAL; + } + nbd_recv_coroutine_wake_one(&s->requests[ind2]); } - aio_wait_kick(); } static int nbd_co_send_request(BlockDriverState *bs, @@ -804,11 +432,18 @@ static int nbd_co_send_request(BlockDriverState *bs, int rc, i = -1; qemu_co_mutex_lock(&s->send_mutex); - while (s->in_flight == MAX_NBD_REQUESTS || nbd_client_connecting_wait(s)) { + + while (s->in_flight == MAX_NBD_REQUESTS || + (!nbd_client_connected(s) && s->in_flight > 0)) + { qemu_co_queue_wait(&s->free_sema, &s->send_mutex); } - if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) { + if (nbd_client_connecting(s)) { + nbd_reconnect_attempt(s); + } + + if (!nbd_client_connected(s)) { rc = -EIO; goto err; } @@ -827,6 +462,7 @@ static int nbd_co_send_request(BlockDriverState *bs, s->requests[i].coroutine = qemu_coroutine_self(); s->requests[i].offset = request->from; s->requests[i].receiving = false; + s->requests[i].reply_possible = true; request->handle = INDEX_TO_HANDLE(s, i); @@ -835,8 +471,7 @@ static int nbd_co_send_request(BlockDriverState *bs, if (qiov) { qio_channel_set_cork(s->ioc, true); rc = nbd_send_request(s->ioc, request); - if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED && - rc >= 0) { + if (nbd_client_connected(s) && rc >= 0) { if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov, NULL) < 0) { rc = -EIO; @@ -855,10 +490,6 @@ static int nbd_co_send_request(BlockDriverState *bs, if (i != -1) { s->requests[i].coroutine = NULL; s->in_flight--; - } - if (s->in_flight == 0 && s->wait_in_flight) { - aio_co_wake(s->connection_co); - } else { qemu_co_queue_next(&s->free_sema); } } @@ -1157,11 +788,8 @@ static coroutine_fn int nbd_co_do_receive_one_chunk( } *request_ret = 0; - /* Wait until we're woken up by nbd_connection_entry. */ - s->requests[i].receiving = true; - qemu_coroutine_yield(); - s->requests[i].receiving = false; - if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) { + nbd_receive_replies(s, handle); + if (!nbd_client_connected(s)) { error_setg(errp, "Connection closed"); return -EIO; } @@ -1253,14 +881,7 @@ static coroutine_fn int nbd_co_receive_one_chunk( } s->reply.handle = 0; - if (s->connection_co && !s->wait_in_flight) { - /* - * We must check s->wait_in_flight, because we may entered by - * nbd_recv_coroutines_wake_all(), in this case we should not - * wake connection_co here, it will woken by last request. - */ - aio_co_wake(s->connection_co); - } + nbd_recv_coroutines_wake(s, false); return ret; } @@ -1320,7 +941,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, NBDReply local_reply; NBDStructuredReplyChunk *chunk; Error *local_err = NULL; - if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) { + if (!nbd_client_connected(s)) { error_setg(&local_err, "Connection closed"); nbd_iter_channel_error(iter, -EIO, &local_err); goto break_loop; @@ -1345,8 +966,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, } /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */ - if (nbd_reply_is_simple(reply) || - qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) { + if (nbd_reply_is_simple(reply) || !nbd_client_connected(s)) { goto break_loop; } @@ -1372,11 +992,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, qemu_co_mutex_lock(&s->send_mutex); s->in_flight--; - if (s->in_flight == 0 && s->wait_in_flight) { - aio_co_wake(s->connection_co); - } else { - qemu_co_queue_next(&s->free_sema); - } + qemu_co_queue_next(&s->free_sema); qemu_co_mutex_unlock(&s->send_mutex); return false; @@ -1545,8 +1161,9 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request, return ret ? ret : request_ret; } -static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, int flags) +static int nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { int ret, request_ret; Error *local_err = NULL; @@ -1603,8 +1220,9 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, return ret ? ret : request_ret; } -static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, int flags) +static int nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { @@ -1628,15 +1246,17 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, } static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int bytes, BdrvRequestFlags flags) + int64_t bytes, BdrvRequestFlags flags) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { .type = NBD_CMD_WRITE_ZEROES, .from = offset, - .len = bytes, + .len = bytes, /* .len is uint32_t actually */ }; + assert(bytes <= UINT32_MAX); /* rely on max_pwrite_zeroes */ + assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); if (!(s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) { return -ENOTSUP; @@ -1676,15 +1296,17 @@ static int nbd_client_co_flush(BlockDriverState *bs) } static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, - int bytes) + int64_t bytes) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { .type = NBD_CMD_TRIM, .from = offset, - .len = bytes, + .len = bytes, /* len is uint32_t */ }; + assert(bytes <= UINT32_MAX); /* rely on max_pdiscard */ + assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) { return 0; @@ -1784,7 +1406,7 @@ static void nbd_yank(void *opaque) BDRVNBDState *s = (BDRVNBDState *)bs->opaque; qatomic_store_release(&s->state, NBD_CLIENT_QUIT); - qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + qio_channel_shutdown(QIO_CHANNEL(s->ioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } static void nbd_client_close(BlockDriverState *bs) @@ -1799,111 +1421,6 @@ static void nbd_client_close(BlockDriverState *bs) nbd_teardown_connection(bs); } -static int nbd_establish_connection(BlockDriverState *bs, - SocketAddress *saddr, - Error **errp) -{ - ERRP_GUARD(); - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - - s->sioc = qio_channel_socket_new(); - qio_channel_set_name(QIO_CHANNEL(s->sioc), "nbd-client"); - - qio_channel_socket_connect_sync(s->sioc, saddr, errp); - if (*errp) { - object_unref(OBJECT(s->sioc)); - s->sioc = NULL; - return -1; - } - - yank_register_function(BLOCKDEV_YANK_INSTANCE(bs->node_name), nbd_yank, bs); - qio_channel_set_delay(QIO_CHANNEL(s->sioc), false); - - return 0; -} - -/* nbd_client_handshake takes ownership on s->sioc. On failure it's unref'ed. */ -static int nbd_client_handshake(BlockDriverState *bs, Error **errp) -{ - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - AioContext *aio_context = bdrv_get_aio_context(bs); - int ret; - - trace_nbd_client_handshake(s->export); - qio_channel_set_blocking(QIO_CHANNEL(s->sioc), false, NULL); - qio_channel_attach_aio_context(QIO_CHANNEL(s->sioc), aio_context); - - s->info.request_sizes = true; - s->info.structured_reply = true; - s->info.base_allocation = true; - s->info.x_dirty_bitmap = g_strdup(s->x_dirty_bitmap); - s->info.name = g_strdup(s->export ?: ""); - ret = nbd_receive_negotiate(aio_context, QIO_CHANNEL(s->sioc), s->tlscreds, - s->hostname, &s->ioc, &s->info, errp); - g_free(s->info.x_dirty_bitmap); - g_free(s->info.name); - if (ret < 0) { - yank_unregister_function(BLOCKDEV_YANK_INSTANCE(bs->node_name), - nbd_yank, bs); - object_unref(OBJECT(s->sioc)); - s->sioc = NULL; - return ret; - } - if (s->x_dirty_bitmap) { - if (!s->info.base_allocation) { - error_setg(errp, "requested x-dirty-bitmap %s not found", - s->x_dirty_bitmap); - ret = -EINVAL; - goto fail; - } - if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) { - s->alloc_depth = true; - } - } - if (s->info.flags & NBD_FLAG_READ_ONLY) { - ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp); - if (ret < 0) { - goto fail; - } - } - if (s->info.flags & NBD_FLAG_SEND_FUA) { - bs->supported_write_flags = BDRV_REQ_FUA; - bs->supported_zero_flags |= BDRV_REQ_FUA; - } - if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) { - bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP; - if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) { - bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK; - } - } - - if (!s->ioc) { - s->ioc = QIO_CHANNEL(s->sioc); - object_ref(OBJECT(s->ioc)); - } - - trace_nbd_client_handshake_success(s->export); - - return 0; - - fail: - /* - * We have connected, but must fail for other reasons. - * Send NBD_CMD_DISC as a courtesy to the server. - */ - { - NBDRequest request = { .type = NBD_CMD_DISC }; - - nbd_send_request(s->ioc ?: QIO_CHANNEL(s->sioc), &request); - - yank_unregister_function(BLOCKDEV_YANK_INSTANCE(bs->node_name), - nbd_yank, bs); - object_unref(OBJECT(s->sioc)); - s->sioc = NULL; - - return ret; - } -} /* * Parse nbd_open options @@ -2137,6 +1654,12 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, goto done; } + if (socket_address_parse_named_fd(saddr, errp) < 0) { + qapi_free_SocketAddress(saddr); + saddr = NULL; + goto done; + } + done: qobject_unref(addr); visit_free(iv); @@ -2163,9 +1686,9 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp) return NULL; } - if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { - error_setg(errp, - "Expecting TLS credentials with a client endpoint"); + if (!qcrypto_tls_creds_check_endpoint(creds, + QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, + errp)) { return NULL; } object_ref(obj); @@ -2278,9 +1801,6 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options, ret = 0; error: - if (ret < 0) { - nbd_clear_bdrvstate(s); - } qemu_opts_del(opts); return ret; } @@ -2291,44 +1811,37 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, int ret; BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - ret = nbd_process_options(bs, options, errp); - if (ret < 0) { - return ret; - } - s->bs = bs; qemu_co_mutex_init(&s->send_mutex); qemu_co_queue_init(&s->free_sema); + qemu_co_mutex_init(&s->receive_mutex); if (!yank_register_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name), errp)) { return -EEXIST; } - /* - * establish TCP connection, return error if it fails - * TODO: Configurable retry-until-timeout behaviour. - */ - if (nbd_establish_connection(bs, s->saddr, errp) < 0) { - yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); - return -ECONNREFUSED; + ret = nbd_process_options(bs, options, errp); + if (ret < 0) { + goto fail; } - ret = nbd_client_handshake(bs, errp); + s->conn = nbd_client_connection_new(s->saddr, true, s->export, + s->x_dirty_bitmap, s->tlscreds); + + /* TODO: Configurable retry-until-timeout behaviour. */ + s->state = NBD_CLIENT_CONNECTING_WAIT; + ret = nbd_do_establish_connection(bs, errp); if (ret < 0) { - yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); - nbd_clear_bdrvstate(s); - return ret; + goto fail; } - /* successfully connected */ - s->state = NBD_CLIENT_CONNECTED; - - nbd_init_connect_thread(s); - s->connection_co = qemu_coroutine_create(nbd_connection_entry, s); - bdrv_inc_in_flight(bs); - aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co); + nbd_client_connection_enable_retry(s->conn); return 0; + +fail: + nbd_clear_bdrvstate(bs); + return ret; } static int nbd_co_flush(BlockDriverState *bs) @@ -2372,11 +1885,8 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) static void nbd_close(BlockDriverState *bs) { - BDRVNBDState *s = bs->opaque; - nbd_client_close(bs); - yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); - nbd_clear_bdrvstate(s); + nbd_clear_bdrvstate(bs); } /* @@ -2479,6 +1989,8 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) s->state = NBD_CLIENT_CONNECTING_NOWAIT; qemu_co_queue_restart_all(&s->free_sema); } + + nbd_co_establish_connection_cancel(s->conn); } static BlockDriver bdrv_nbd = { @@ -2499,10 +2011,6 @@ static BlockDriver bdrv_nbd = { .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_co_truncate = nbd_co_truncate, .bdrv_getlength = nbd_getlength, - .bdrv_detach_aio_context = nbd_client_detach_aio_context, - .bdrv_attach_aio_context = nbd_client_attach_aio_context, - .bdrv_co_drain_begin = nbd_client_co_drain_begin, - .bdrv_co_drain_end = nbd_client_co_drain_end, .bdrv_refresh_filename = nbd_refresh_filename, .bdrv_co_block_status = nbd_client_co_block_status, .bdrv_dirname = nbd_dirname, @@ -2528,10 +2036,6 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_co_truncate = nbd_co_truncate, .bdrv_getlength = nbd_getlength, - .bdrv_detach_aio_context = nbd_client_detach_aio_context, - .bdrv_attach_aio_context = nbd_client_attach_aio_context, - .bdrv_co_drain_begin = nbd_client_co_drain_begin, - .bdrv_co_drain_end = nbd_client_co_drain_end, .bdrv_refresh_filename = nbd_refresh_filename, .bdrv_co_block_status = nbd_client_co_block_status, .bdrv_dirname = nbd_dirname, @@ -2557,10 +2061,6 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_co_truncate = nbd_co_truncate, .bdrv_getlength = nbd_getlength, - .bdrv_detach_aio_context = nbd_client_detach_aio_context, - .bdrv_attach_aio_context = nbd_client_attach_aio_context, - .bdrv_co_drain_begin = nbd_client_co_drain_begin, - .bdrv_co_drain_end = nbd_client_co_drain_end, .bdrv_refresh_filename = nbd_refresh_filename, .bdrv_co_block_status = nbd_client_co_block_status, .bdrv_dirname = nbd_dirname, diff --git a/block/nfs.c b/block/nfs.c index 8c1968bb415..577aea1d222 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -39,7 +39,6 @@ #include "qemu/option.h" #include "qemu/uri.h" #include "qemu/cutils.h" -#include "sysemu/sysemu.h" #include "sysemu/replay.h" #include "qapi/qapi-visit-block-core.h" #include "qapi/qmp/qdict.h" @@ -148,9 +147,7 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp) if (qp) { query_params_free(qp); } - if (uri) { - uri_free(uri); - } + uri_free(uri); return ret; } @@ -265,9 +262,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data, nfs_co_generic_bh_cb, task); } -static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *iov, - int flags) +static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *iov, + BdrvRequestFlags flags) { NFSClient *client = bs->opaque; NFSRPC task; @@ -299,9 +296,9 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset, return 0; } -static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *iov, - int flags) +static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *iov, + BdrvRequestFlags flags) { NFSClient *client = bs->opaque; NFSRPC task; diff --git a/block/null.c b/block/null.c index cc9b1d4ea72..75f7d0db40c 100644 --- a/block/null.c +++ b/block/null.c @@ -116,8 +116,9 @@ static coroutine_fn int null_co_common(BlockDriverState *bs) } static coroutine_fn int null_co_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVNullState *s = bs->opaque; @@ -129,8 +130,9 @@ static coroutine_fn int null_co_preadv(BlockDriverState *bs, } static coroutine_fn int null_co_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { return null_co_common(bs); } @@ -187,8 +189,8 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs, } static BlockAIOCB *null_aio_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { @@ -202,8 +204,8 @@ static BlockAIOCB *null_aio_preadv(BlockDriverState *bs, } static BlockAIOCB *null_aio_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { diff --git a/block/nvme.c b/block/nvme.c index 2b5421e7aa6..e4f336d79c2 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -176,23 +176,27 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, return false; } memset(q->queue, 0, bytes); - r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova); + r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova, errp); if (r) { - error_setg(errp, "Cannot map queue"); - return false; + error_prepend(errp, "Cannot map queue: "); } - return true; + return r == 0; +} + +static void nvme_free_queue(NVMeQueue *q) +{ + qemu_vfree(q->queue); } static void nvme_free_queue_pair(NVMeQueuePair *q) { - trace_nvme_free_queue_pair(q->index, q); + trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq); if (q->completion_bh) { qemu_bh_delete(q->completion_bh); } + nvme_free_queue(&q->sq); + nvme_free_queue(&q->cq); qemu_vfree(q->prp_list_pages); - qemu_vfree(q->sq.queue); - qemu_vfree(q->cq.queue); qemu_mutex_destroy(&q->lock); g_free(q); } @@ -220,6 +224,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, q = g_try_new0(NVMeQueuePair, 1); if (!q) { + error_setg(errp, "Cannot allocate queue pair"); return NULL; } trace_nvme_create_queue_pair(idx, q, size, aio_context, @@ -228,6 +233,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, qemu_real_host_page_size); q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes); if (!q->prp_list_pages) { + error_setg(errp, "Cannot allocate PRP page list"); goto fail; } memset(q->prp_list_pages, 0, bytes); @@ -237,8 +243,9 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, qemu_co_queue_init(&q->free_req_queue); q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q); r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes, - false, &prp_list_iova); + false, &prp_list_iova, errp); if (r) { + error_prepend(errp, "Cannot map buffer for DMA: "); goto fail; } q->free_req_head = -1; @@ -512,10 +519,10 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) { BDRVNVMeState *s = bs->opaque; bool ret = false; - union { + QEMU_AUTO_VFREE union { NvmeIdCtrl ctrl; NvmeIdNs ns; - } *id; + } *id = NULL; NvmeLBAF *lbaf; uint16_t oncs; int r; @@ -531,9 +538,9 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) error_setg(errp, "Cannot allocate buffer for identify response"); goto out; } - r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova); + r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova, errp); if (r) { - error_setg(errp, "Cannot map buffer for DMA"); + error_prepend(errp, "Cannot map buffer for DMA: "); goto out; } @@ -593,7 +600,6 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) s->blkshift = lbaf->ds; out: qemu_vfio_dma_unmap(s->vfio, id); - qemu_vfree(id); return ret; } @@ -1017,6 +1023,7 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd, uint64_t *pagelist = req->prp_list_page; int i, j, r; int entries = 0; + Error *local_err = NULL, **errp = NULL; assert(qiov->size); assert(QEMU_IS_ALIGNED(qiov->size, s->page_size)); @@ -1029,8 +1036,30 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd, try_map: r = qemu_vfio_dma_map(s->vfio, qiov->iov[i].iov_base, - len, true, &iova); + len, true, &iova, errp); + if (r == -ENOSPC) { + /* + * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA + * ioctl returns -ENOSPC to signal the user exhausted the DMA + * mappings available for a container since Linux kernel commit + * 492855939bdb ("vfio/type1: Limit DMA mappings per container", + * April 2019, see CVE-2019-3882). + * + * This block driver already handles this error path by checking + * for the -ENOMEM error, so we directly replace -ENOSPC by + * -ENOMEM. Beside, -ENOSPC has a specific meaning for blockdev + * coroutines: it triggers BLOCKDEV_ON_ERROR_ENOSPC and + * BLOCK_ERROR_ACTION_STOP which stops the VM, asking the operator + * to add more storage to the blockdev. Not something we can do + * easily with an IOMMU :) + */ + r = -ENOMEM; + } if (r == -ENOMEM && retry) { + /* + * We exhausted the DMA mappings available for our container: + * recycle the volatile IOVA mappings. + */ retry = false; trace_nvme_dma_flush_queue_wait(s); if (s->dma_map_count) { @@ -1042,6 +1071,8 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd, goto fail; } } + errp = &local_err; + goto try_map; } if (r) { @@ -1085,6 +1116,9 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd, * because they are already mapped before calling this function; for * temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by * calling qemu_vfio_dma_reset_temporary when necessary. */ + if (local_err) { + error_reportf_err(local_err, "Cannot map buffer for DMA: "); + } return r; } @@ -1189,7 +1223,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, { BDRVNVMeState *s = bs->opaque; int r; - uint8_t *buf = NULL; + QEMU_AUTO_VFREE uint8_t *buf = NULL; QEMUIOVector local_qiov; size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size); assert(QEMU_IS_ALIGNED(offset, s->page_size)); @@ -1216,20 +1250,21 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, if (!r && !is_write) { qemu_iovec_from_buf(qiov, 0, buf, bytes); } - qemu_vfree(buf); return r; } static coroutine_fn int nvme_co_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { return nvme_co_prw(bs, offset, bytes, qiov, false, flags); } static coroutine_fn int nvme_co_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { return nvme_co_prw(bs, offset, bytes, qiov, true, flags); } @@ -1264,19 +1299,29 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs) static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int bytes, + int64_t bytes, BdrvRequestFlags flags) { BDRVNVMeState *s = bs->opaque; NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; NVMeRequest *req; - - uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF; + uint32_t cdw12; if (!s->supports_write_zeroes) { return -ENOTSUP; } + if (bytes == 0) { + return 0; + } + + cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF; + /* + * We should not lose information. pwrite_zeroes_alignment and + * max_pwrite_zeroes guarantees it. + */ + assert(((cdw12 + 1) << s->blkshift) == bytes); + NvmeCmd cmd = { .opcode = NVME_CMD_WRITE_ZEROES, .nsid = cpu_to_le32(s->nsid), @@ -1318,12 +1363,12 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, int64_t offset, - int bytes) + int64_t bytes) { BDRVNVMeState *s = bs->opaque; NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; NVMeRequest *req; - NvmeDsmRange *buf; + QEMU_AUTO_VFREE NvmeDsmRange *buf = NULL; QEMUIOVector local_qiov; int ret; @@ -1345,6 +1390,14 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, assert(s->queue_count > 1); + /* + * Filling the @buf requires @offset and @bytes to satisfy restrictions + * defined in nvme_refresh_limits(). + */ + assert(QEMU_IS_ALIGNED(bytes, 1UL << s->blkshift)); + assert(QEMU_IS_ALIGNED(offset, 1UL << s->blkshift)); + assert((bytes >> s->blkshift) <= UINT32_MAX); + buf = qemu_try_memalign(s->page_size, s->page_size); if (!buf) { return -ENOMEM; @@ -1390,7 +1443,6 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, trace_nvme_dsm_done(s, offset, bytes, ret); out: qemu_iovec_destroy(&local_qiov); - qemu_vfree(buf); return ret; } @@ -1440,6 +1492,18 @@ static void nvme_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.opt_mem_alignment = s->page_size; bs->bl.request_alignment = s->page_size; bs->bl.max_transfer = s->max_transfer; + + /* + * Look at nvme_co_pwrite_zeroes: after shift and decrement we should get + * at most 0xFFFF + */ + bs->bl.max_pwrite_zeroes = 1ULL << (s->blkshift + 16); + bs->bl.pwrite_zeroes_alignment = MAX(bs->bl.request_alignment, + 1UL << s->blkshift); + + bs->bl.max_pdiscard = (uint64_t)UINT32_MAX << s->blkshift; + bs->bl.pdiscard_alignment = MAX(bs->bl.request_alignment, + 1UL << s->blkshift); } static void nvme_detach_aio_context(BlockDriverState *bs) @@ -1499,14 +1563,15 @@ static void nvme_aio_unplug(BlockDriverState *bs) static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size) { int ret; + Error *local_err = NULL; BDRVNVMeState *s = bs->opaque; - ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL); + ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err); if (ret) { /* FIXME: we may run out of IOVA addresses after repeated * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap * doesn't reclaim addresses for fixed mappings. */ - error_report("nvme_register_buf failed: %s", strerror(-ret)); + error_reportf_err(local_err, "nvme_register_buf failed: "); } } diff --git a/block/preallocate.c b/block/preallocate.c index b6192063046..1d4233f7300 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -227,15 +227,15 @@ static void preallocate_reopen_abort(BDRVReopenState *state) } static coroutine_fn int preallocate_co_preadv_part( - BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, int flags) + BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags) { return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset, flags); } static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_co_pdiscard(bs->file, offset, bytes); } @@ -337,7 +337,7 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset, } static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags) + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { bool want_merge_zero = !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK)); @@ -349,11 +349,11 @@ static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs, } static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs, - uint64_t offset, - uint64_t bytes, + int64_t offset, + int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, - int flags) + BdrvRequestFlags flags) { handle_write(bs, offset, bytes, false); diff --git a/block/progress_meter.c b/block/progress_meter.c new file mode 100644 index 00000000000..aa2e60248c0 --- /dev/null +++ b/block/progress_meter.c @@ -0,0 +1,64 @@ +/* + * Helper functionality for some process progress tracking. + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012, 2018 Red Hat, Inc. + * Copyright (c) 2020 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu/progress_meter.h" + +void progress_init(ProgressMeter *pm) +{ + qemu_mutex_init(&pm->lock); +} + +void progress_destroy(ProgressMeter *pm) +{ + qemu_mutex_destroy(&pm->lock); +} + +void progress_get_snapshot(ProgressMeter *pm, uint64_t *current, + uint64_t *total) +{ + QEMU_LOCK_GUARD(&pm->lock); + + *current = pm->current; + *total = pm->total; +} + +void progress_work_done(ProgressMeter *pm, uint64_t done) +{ + QEMU_LOCK_GUARD(&pm->lock); + pm->current += done; +} + +void progress_set_remaining(ProgressMeter *pm, uint64_t remaining) +{ + QEMU_LOCK_GUARD(&pm->lock); + pm->total = pm->current + remaining; +} + +void progress_increase_remaining(ProgressMeter *pm, uint64_t delta) +{ + QEMU_LOCK_GUARD(&pm->lock); + pm->total += delta; +} diff --git a/block/qapi.c b/block/qapi.c index 943e7b15ad2..cf557e3aea7 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -59,7 +59,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, info = g_malloc0(sizeof(*info)); info->file = g_strdup(bs->filename); - info->ro = bs->read_only; + info->ro = bdrv_is_read_only(bs); info->drv = g_strdup(bs->drv->format_name); info->encrypted = bs->encrypted; @@ -663,10 +663,8 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, void bdrv_snapshot_dump(QEMUSnapshotInfo *sn) { - char date_buf[128], clock_buf[128]; + char clock_buf[128]; char icount_buf[128] = {0}; - struct tm tm; - time_t ti; int64_t secs; char *sizing = NULL; @@ -674,10 +672,9 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn) qemu_printf("%-10s%-17s%8s%20s%13s%11s", "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT"); } else { - ti = sn->date_sec; - localtime_r(&ti, &tm); - strftime(date_buf, sizeof(date_buf), - "%Y-%m-%d %H:%M:%S", &tm); + g_autoptr(GDateTime) date = g_date_time_new_from_unix_local(sn->date_sec); + g_autofree char *date_buf = g_date_time_format(date, "%Y-%m-%d %H:%M:%S"); + secs = sn->vm_clock_nsec / 1000000000; snprintf(clock_buf, sizeof(clock_buf), "%02d:%02d:%02d.%03d", diff --git a/block/qcow.c b/block/qcow.c index f8919a44d19..c39940f33eb 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -617,9 +617,9 @@ static void qcow_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.request_alignment = BDRV_SECTOR_SIZE; } -static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVQcowState *s = bs->opaque; int offset_in_cluster; @@ -714,9 +714,9 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, return ret; } -static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVQcowState *s = bs->opaque; int offset_in_cluster; @@ -1047,8 +1047,8 @@ static int qcow_make_empty(BlockDriverState *bs) /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int -qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov) +qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov) { BDRVQcowState *s = bs->opaque; z_stream strm; diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index bd0597842f3..21884a1ab9a 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -505,7 +505,20 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, return -ENOMEDIUM; } - /* Call .bdrv_co_readv() directly instead of using the public block-layer + /* + * We never deal with requests that don't satisfy + * bdrv_check_qiov_request(), and aligning requests to clusters never + * breaks this condition. So, do some assertions before calling + * bs->drv->bdrv_co_preadv_part() which has int64_t arguments. + */ + assert(src_cluster_offset <= INT64_MAX); + assert(src_cluster_offset + offset_in_cluster <= INT64_MAX); + /* Cast qiov->size to uint64_t to silence a compiler warning on -m32 */ + assert((uint64_t)qiov->size <= INT64_MAX); + bdrv_check_qiov_request(src_cluster_offset + offset_in_cluster, qiov->size, + qiov, 0, &error_abort); + /* + * Call .bdrv_co_readv() directly instead of using the public block-layer * interface. This avoids double I/O throttling and request tracking, * which can lead to deadlock when block layer copy-on-read is enabled. */ @@ -556,8 +569,7 @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, * offset needs to be aligned to a cluster boundary. * * If the cluster is unallocated then *host_offset will be 0. - * If the cluster is compressed then *host_offset will contain the - * complete compressed cluster descriptor. + * If the cluster is compressed then *host_offset will contain the l2 entry. * * On entry, *bytes is the maximum number of contiguous bytes starting at * offset that we are interested in. @@ -660,7 +672,7 @@ int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, ret = -EIO; goto fail; } - *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK; + *host_offset = l2_entry; break; case QCOW2_SUBCLUSTER_ZERO_PLAIN: case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: @@ -1400,29 +1412,47 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, if (end <= old_start || start >= old_end) { /* No intersection */ + continue; + } + + if (old_alloc->keep_old_clusters && + (end <= l2meta_cow_start(old_alloc) || + start >= l2meta_cow_end(old_alloc))) + { + /* + * Clusters intersect but COW areas don't. And cluster itself is + * already allocated. So, there is no actual conflict. + */ + continue; + } + + /* Conflict */ + + if (start < old_start) { + /* Stop at the start of a running allocation */ + bytes = old_start - start; } else { - if (start < old_start) { - /* Stop at the start of a running allocation */ - bytes = old_start - start; - } else { - bytes = 0; - } + bytes = 0; + } - /* Stop if already an l2meta exists. After yielding, it wouldn't - * be valid any more, so we'd have to clean up the old L2Metas - * and deal with requests depending on them before starting to - * gather new ones. Not worth the trouble. */ - if (bytes == 0 && *m) { - *cur_bytes = 0; - return 0; - } + /* + * Stop if an l2meta already exists. After yielding, it wouldn't + * be valid any more, so we'd have to clean up the old L2Metas + * and deal with requests depending on them before starting to + * gather new ones. Not worth the trouble. + */ + if (bytes == 0 && *m) { + *cur_bytes = 0; + return 0; + } - if (bytes == 0) { - /* Wait for the dependency to complete. We need to recheck - * the free/allocated clusters when we continue. */ - qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock); - return -EAGAIN; - } + if (bytes == 0) { + /* + * Wait for the dependency to complete. We need to recheck + * the free/allocated clusters when we continue. + */ + qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock); + return -EAGAIN; } } @@ -2463,3 +2493,18 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs, g_free(l1_table); return ret; } + +void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, + uint64_t *coffset, int *csize) +{ + BDRVQcow2State *s = bs->opaque; + int nb_csectors; + + assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED); + + *coffset = l2_entry & s->cluster_offset_mask; + + nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1; + *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - + (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1)); +} diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 8e649b008e8..46145722527 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1177,11 +1177,11 @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry, switch (ctype) { case QCOW2_CLUSTER_COMPRESSED: { - int64_t offset = (l2_entry & s->cluster_offset_mask) - & QCOW2_COMPRESSED_SECTOR_MASK; - int size = QCOW2_COMPRESSED_SECTOR_SIZE * - (((l2_entry >> s->csize_shift) & s->csize_mask) + 1); - qcow2_free_clusters(bs, offset, size, type); + uint64_t coffset; + int csize; + + qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize); + qcow2_free_clusters(bs, coffset, csize, type); } break; case QCOW2_CLUSTER_NORMAL: @@ -1247,7 +1247,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, bool l1_allocated = false; int64_t old_entry, old_l2_offset; unsigned slice, slice_size2, n_slices; - int i, j, l1_modified = 0, nb_csectors; + int i, j, l1_modified = 0; int ret; assert(addend >= -1 && addend <= 1); @@ -1318,14 +1318,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, switch (qcow2_get_cluster_type(bs, entry)) { case QCOW2_CLUSTER_COMPRESSED: - nb_csectors = ((entry >> s->csize_shift) & - s->csize_mask) + 1; if (addend != 0) { - uint64_t coffset = (entry & s->cluster_offset_mask) - & QCOW2_COMPRESSED_SECTOR_MASK; + uint64_t coffset; + int csize; + + qcow2_parse_compressed_l2_entry(bs, entry, + &coffset, &csize); ret = update_refcount( - bs, coffset, - nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE, + bs, coffset, csize, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); if (ret < 0) { @@ -1587,6 +1587,66 @@ enum { CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */ }; +/* + * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN (or making all its present + * subclusters QCOW2_SUBCLUSTER_ZERO_PLAIN). + * + * This function decrements res->corruptions on success, so the caller is + * responsible to increment res->corruptions prior to the call. + * + * On failure in-memory @l2_table may be modified. + */ +static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res, + uint64_t l2_offset, + uint64_t *l2_table, int l2_index, bool active, + bool *metadata_overlap) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t)); + uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s); + int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2; + + if (has_subclusters(s)) { + uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index); + + /* Allocated subclusters become zero */ + l2_bitmap |= l2_bitmap << 32; + l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES; + + set_l2_bitmap(s, l2_table, l2_index, l2_bitmap); + set_l2_entry(s, l2_table, l2_index, 0); + } else { + set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO); + } + + ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s), + false); + if (metadata_overlap) { + *metadata_overlap = ret < 0; + } + if (ret < 0) { + fprintf(stderr, "ERROR: Overlap check failed\n"); + goto fail; + } + + ret = bdrv_pwrite_sync(bs->file, l2e_offset, &l2_table[idx], + l2_entry_size(s)); + if (ret < 0) { + fprintf(stderr, "ERROR: Failed to overwrite L2 " + "table entry: %s\n", strerror(-ret)); + goto fail; + } + + res->corruptions--; + res->corruptions_fixed++; + return 0; + +fail: + res->check_errors++; + return ret; +} + /* * Increases the refcount in the given refcount table for the all clusters * referenced in the L2 table. While doing so, performs some checks on L2 @@ -1601,26 +1661,41 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, int flags, BdrvCheckMode fix, bool active) { BDRVQcow2State *s = bs->opaque; - uint64_t *l2_table, l2_entry; + uint64_t l2_entry, l2_bitmap; uint64_t next_contiguous_offset = 0; - int i, l2_size, nb_csectors, ret; + int i, ret; + size_t l2_size_bytes = s->l2_size * l2_entry_size(s); + g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes); + bool metadata_overlap; /* Read L2 table from disk */ - l2_size = s->l2_size * l2_entry_size(s); - l2_table = g_malloc(l2_size); - - ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size); + ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes); if (ret < 0) { fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n"); res->check_errors++; - goto fail; + return ret; } /* Do the actual checks */ - for(i = 0; i < s->l2_size; i++) { + for (i = 0; i < s->l2_size; i++) { + uint64_t coffset; + int csize; + QCow2ClusterType type; + l2_entry = get_l2_entry(s, l2_table, i); + l2_bitmap = get_l2_bitmap(s, l2_table, i); + type = qcow2_get_cluster_type(bs, l2_entry); + + if (type != QCOW2_CLUSTER_COMPRESSED) { + /* Check reserved bits of Standard Cluster Descriptor */ + if (l2_entry & L2E_STD_RESERVED_MASK) { + fprintf(stderr, "ERROR found l2 entry with reserved bits set: " + "%" PRIx64 "\n", l2_entry); + res->corruptions++; + } + } - switch (qcow2_get_cluster_type(bs, l2_entry)) { + switch (type) { case QCOW2_CLUSTER_COMPRESSED: /* Compressed clusters don't have QCOW_OFLAG_COPIED */ if (l2_entry & QCOW_OFLAG_COPIED) { @@ -1638,23 +1713,28 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, break; } + if (l2_bitmap) { + fprintf(stderr, "ERROR compressed cluster %d with non-zero " + "subcluster allocation bitmap, entry=0x%" PRIx64 "\n", + i, l2_entry); + res->corruptions++; + break; + } + /* Mark cluster as used */ - nb_csectors = ((l2_entry >> s->csize_shift) & - s->csize_mask) + 1; - l2_entry &= s->cluster_offset_mask; + qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize); ret = qcow2_inc_refcounts_imrt( - bs, res, refcount_table, refcount_table_size, - l2_entry & QCOW2_COMPRESSED_SECTOR_MASK, - nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE); + bs, res, refcount_table, refcount_table_size, coffset, csize); if (ret < 0) { - goto fail; + return ret; } if (flags & CHECK_FRAG_INFO) { res->bfi.allocated_clusters++; res->bfi.compressed_clusters++; - /* Compressed clusters are fragmented by nature. Since they + /* + * Compressed clusters are fragmented by nature. Since they * take up sub-sector space but we only have sector granularity * I/O we need to re-read the same sectors even for adjacent * compressed clusters. @@ -1668,13 +1748,19 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, { uint64_t offset = l2_entry & L2E_OFFSET_MASK; + if ((l2_bitmap >> 32) & l2_bitmap) { + res->corruptions++; + fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated " + "cluster has corrupted subcluster allocation bitmap\n", + offset); + } + /* Correct offsets are cluster aligned */ if (offset_into_cluster(s, offset)) { bool contains_data; res->corruptions++; if (has_subclusters(s)) { - uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, i); contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC); } else { contains_data = !(l2_entry & QCOW_OFLAG_ZERO); @@ -1687,40 +1773,30 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", offset); if (fix & BDRV_FIX_ERRORS) { - int idx = i * (l2_entry_size(s) / sizeof(uint64_t)); - uint64_t l2e_offset = - l2_offset + (uint64_t)i * l2_entry_size(s); - int ign = active ? QCOW2_OL_ACTIVE_L2 : - QCOW2_OL_INACTIVE_L2; - - l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO; - set_l2_entry(s, l2_table, i, l2_entry); - ret = qcow2_pre_write_overlap_check(bs, ign, - l2e_offset, l2_entry_size(s), false); - if (ret < 0) { - fprintf(stderr, "ERROR: Overlap check failed\n"); - res->check_errors++; - /* Something is seriously wrong, so abort checking - * this L2 table */ - goto fail; + ret = fix_l2_entry_by_zero(bs, res, l2_offset, + l2_table, i, active, + &metadata_overlap); + if (metadata_overlap) { + /* + * Something is seriously wrong, so abort checking + * this L2 table. + */ + return ret; } - ret = bdrv_pwrite_sync(bs->file, l2e_offset, - &l2_table[idx], - l2_entry_size(s)); - if (ret < 0) { - fprintf(stderr, "ERROR: Failed to overwrite L2 " - "table entry: %s\n", strerror(-ret)); - res->check_errors++; - /* Do not abort, continue checking the rest of this - * L2 table's entries */ - } else { - res->corruptions--; - res->corruptions_fixed++; - /* Skip marking the cluster as used - * (it is unused now) */ + if (ret == 0) { + /* + * Skip marking the cluster as used + * (it is unused now). + */ continue; } + + /* + * Failed to fix. + * Do not abort, continue checking the rest of this + * L2 table's entries. + */ } } else { fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is " @@ -1743,14 +1819,23 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, refcount_table_size, offset, s->cluster_size); if (ret < 0) { - goto fail; + return ret; } } break; } case QCOW2_CLUSTER_ZERO_PLAIN: + /* Impossible when image has subclusters */ + assert(!l2_bitmap); + break; + case QCOW2_CLUSTER_UNALLOCATED: + if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) { + res->corruptions++; + fprintf(stderr, "ERROR: Unallocated " + "cluster has non-zero subcluster allocation map\n"); + } break; default: @@ -1758,12 +1843,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, } } - g_free(l2_table); return 0; - -fail: - g_free(l2_table); - return ret; } /* @@ -1782,71 +1862,79 @@ static int check_refcounts_l1(BlockDriverState *bs, int flags, BdrvCheckMode fix, bool active) { BDRVQcow2State *s = bs->opaque; - uint64_t *l1_table = NULL, l2_offset, l1_size2; + size_t l1_size_bytes = l1_size * L1E_SIZE; + g_autofree uint64_t *l1_table = NULL; + uint64_t l2_offset; int i, ret; - l1_size2 = l1_size * L1E_SIZE; + if (!l1_size) { + return 0; + } /* Mark L1 table as used */ ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size, - l1_table_offset, l1_size2); + l1_table_offset, l1_size_bytes); if (ret < 0) { - goto fail; + return ret; + } + + l1_table = g_try_malloc(l1_size_bytes); + if (l1_table == NULL) { + res->check_errors++; + return -ENOMEM; } /* Read L1 table entries from disk */ - if (l1_size2 > 0) { - l1_table = g_try_malloc(l1_size2); - if (l1_table == NULL) { - ret = -ENOMEM; - res->check_errors++; - goto fail; - } - ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); - if (ret < 0) { - fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); - res->check_errors++; - goto fail; - } - for(i = 0;i < l1_size; i++) - be64_to_cpus(&l1_table[i]); + ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size_bytes); + if (ret < 0) { + fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); + res->check_errors++; + return ret; + } + + for (i = 0; i < l1_size; i++) { + be64_to_cpus(&l1_table[i]); } /* Do the actual checks */ - for(i = 0; i < l1_size; i++) { - l2_offset = l1_table[i]; - if (l2_offset) { - /* Mark L2 table as used */ - l2_offset &= L1E_OFFSET_MASK; - ret = qcow2_inc_refcounts_imrt(bs, res, - refcount_table, refcount_table_size, - l2_offset, s->cluster_size); - if (ret < 0) { - goto fail; - } + for (i = 0; i < l1_size; i++) { + if (!l1_table[i]) { + continue; + } - /* L2 tables are cluster aligned */ - if (offset_into_cluster(s, l2_offset)) { - fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " - "cluster aligned; L1 entry corrupted\n", l2_offset); - res->corruptions++; - } + if (l1_table[i] & L1E_RESERVED_MASK) { + fprintf(stderr, "ERROR found L1 entry with reserved bits set: " + "%" PRIx64 "\n", l1_table[i]); + res->corruptions++; + } - /* Process and check L2 entries */ - ret = check_refcounts_l2(bs, res, refcount_table, - refcount_table_size, l2_offset, flags, - fix, active); - if (ret < 0) { - goto fail; - } + l2_offset = l1_table[i] & L1E_OFFSET_MASK; + + /* Mark L2 table as used */ + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + l2_offset, s->cluster_size); + if (ret < 0) { + return ret; + } + + /* L2 tables are cluster aligned */ + if (offset_into_cluster(s, l2_offset)) { + fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " + "cluster aligned; L1 entry corrupted\n", l2_offset); + res->corruptions++; + } + + /* Process and check L2 entries */ + ret = check_refcounts_l2(bs, res, refcount_table, + refcount_table_size, l2_offset, flags, + fix, active); + if (ret < 0) { + return ret; } } - g_free(l1_table); - return 0; -fail: - g_free(l1_table); - return ret; + return 0; } /* @@ -2001,9 +2089,17 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, for(i = 0; i < s->refcount_table_size; i++) { uint64_t offset, cluster; - offset = s->refcount_table[i]; + offset = s->refcount_table[i] & REFT_OFFSET_MASK; cluster = offset >> s->cluster_bits; + if (s->refcount_table[i] & REFT_RESERVED_MASK) { + fprintf(stderr, "ERROR refcount table entry %" PRId64 " has " + "reserved bits set\n", i); + res->corruptions++; + *rebuild = true; + continue; + } + /* Refcount blocks are cluster aligned */ if (offset_into_cluster(s, offset)) { fprintf(stderr, "ERROR refcount block %" PRId64 " is not " diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 2e98c7f4b62..71ddb08c212 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -1026,7 +1026,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, int new_l1_bytes; int ret; - assert(bs->read_only); + assert(bdrv_is_read_only(bs)); /* Search the snapshot */ snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); diff --git a/block/qcow2.c b/block/qcow2.c index 9727ae8fe34..d5090167569 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -74,7 +74,7 @@ typedef struct { static int coroutine_fn qcow2_co_preadv_compressed(BlockDriverState *bs, - uint64_t cluster_descriptor, + uint64_t l2_entry, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, @@ -1723,8 +1723,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, /* Clear unknown autoclear feature bits */ update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; - update_header = - update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); + update_header = update_header && bdrv_is_writable(bs); if (update_header) { s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; } @@ -1811,7 +1810,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; /* Repair image if dirty */ - if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && + if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) && (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { BdrvCheckResult result = {0}; @@ -1927,6 +1926,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) static int qcow2_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { + BDRVQcow2State *s = state->bs->opaque; Qcow2ReopenState *r; int ret; @@ -1957,6 +1957,16 @@ static int qcow2_reopen_prepare(BDRVReopenState *state, } } + /* + * Without an external data file, s->data_file points to the same BdrvChild + * as bs->file. It needs to be resynced after reopen because bs->file may + * be changed. We can't use it in the meantime. + */ + if (!has_data_file(state->bs)) { + assert(s->data_file == state->bs->file); + s->data_file = NULL; + } + return 0; fail: @@ -1967,7 +1977,16 @@ static int qcow2_reopen_prepare(BDRVReopenState *state, static void qcow2_reopen_commit(BDRVReopenState *state) { + BDRVQcow2State *s = state->bs->opaque; + qcow2_update_options_commit(state->bs, state->opaque); + if (!s->data_file) { + /* + * If we don't have an external data file, s->data_file was cleared by + * qcow2_reopen_prepare() and needs to be updated. + */ + s->data_file = state->bs->file; + } g_free(state->opaque); } @@ -1991,6 +2010,15 @@ static void qcow2_reopen_commit_post(BDRVReopenState *state) static void qcow2_reopen_abort(BDRVReopenState *state) { + BDRVQcow2State *s = state->bs->opaque; + + if (!s->data_file) { + /* + * If we don't have an external data file, s->data_file was cleared by + * qcow2_reopen_prepare() and needs to be restored. + */ + s->data_file = state->bs->file; + } qcow2_update_options_abort(state->bs, state->opaque); g_free(state->opaque); } @@ -2177,7 +2205,7 @@ typedef struct Qcow2AioTask { BlockDriverState *bs; QCow2SubclusterType subcluster_type; /* only for read */ - uint64_t host_offset; /* or full descriptor in compressed clusters */ + uint64_t host_offset; /* or l2_entry for compressed read */ uint64_t offset; uint64_t bytes; QEMUIOVector *qiov; @@ -2282,9 +2310,10 @@ static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task) } static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, int flags) + size_t qiov_offset, + BdrvRequestFlags flags) { BDRVQcow2State *s = bs->opaque; int ret = 0; @@ -2568,8 +2597,8 @@ static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task) } static coroutine_fn int qcow2_co_pwritev_part( - BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, int flags) + BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags) { BDRVQcow2State *s = bs->opaque; int offset_in_cluster; @@ -3912,7 +3941,7 @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) } static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags) + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { int ret; BDRVQcow2State *s = bs->opaque; @@ -3967,7 +3996,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, } static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { int ret; BDRVQcow2State *s = bs->opaque; @@ -3997,9 +4026,9 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, static int coroutine_fn qcow2_co_copy_range_from(BlockDriverState *bs, - BdrvChild *src, uint64_t src_offset, - BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags read_flags, + BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { BDRVQcow2State *s = bs->opaque; @@ -4080,9 +4109,9 @@ qcow2_co_copy_range_from(BlockDriverState *bs, static int coroutine_fn qcow2_co_copy_range_to(BlockDriverState *bs, - BdrvChild *src, uint64_t src_offset, - BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags read_flags, + BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { BDRVQcow2State *s = bs->opaque; @@ -4602,7 +4631,7 @@ static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task) */ static coroutine_fn int qcow2_co_pwritev_compressed_part(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset) { BDRVQcow2State *s = bs->opaque; @@ -4665,22 +4694,19 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, static int coroutine_fn qcow2_co_preadv_compressed(BlockDriverState *bs, - uint64_t cluster_descriptor, + uint64_t l2_entry, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, size_t qiov_offset) { BDRVQcow2State *s = bs->opaque; - int ret = 0, csize, nb_csectors; + int ret = 0, csize; uint64_t coffset; uint8_t *buf, *out_buf; int offset_in_cluster = offset_into_cluster(s, offset); - coffset = cluster_descriptor & s->cluster_offset_mask; - nb_csectors = ((cluster_descriptor >> s->csize_shift) & s->csize_mask) + 1; - csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - - (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK); + qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize); buf = g_try_malloc(csize); if (!buf) { @@ -5089,6 +5115,7 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) BDRVQcow2State *s = bs->opaque; bdi->cluster_size = s->cluster_size; bdi->vm_state_offset = qcow2_vm_state_offset(s); + bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY; return 0; } @@ -5201,24 +5228,55 @@ static int qcow2_has_zero_init(BlockDriverState *bs) } } +/* + * Check the request to vmstate. On success return + * qcow2_vm_state_offset(bs) + @pos + */ +static int64_t qcow2_check_vmstate_request(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos) +{ + BDRVQcow2State *s = bs->opaque; + int64_t vmstate_offset = qcow2_vm_state_offset(s); + int ret; + + /* Incoming requests must be OK */ + bdrv_check_qiov_request(pos, qiov->size, qiov, 0, &error_abort); + + if (INT64_MAX - pos < vmstate_offset) { + return -EIO; + } + + pos += vmstate_offset; + ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); + if (ret < 0) { + return ret; + } + + return pos; +} + static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BDRVQcow2State *s = bs->opaque; + int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos); + if (offset < 0) { + return offset; + } BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); - return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos, - qiov->size, qiov, 0, 0); + return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0); } static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BDRVQcow2State *s = bs->opaque; + int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos); + if (offset < 0) { + return offset; + } BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); - return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos, - qiov->size, qiov, 0, 0); + return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0); } /* @@ -5620,15 +5678,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, if (backing_file || backing_format) { if (g_strcmp0(backing_file, s->image_backing_file) || g_strcmp0(backing_format, s->image_backing_format)) { - warn_report("Deprecated use of amend to alter the backing file; " - "use qemu-img rebase instead"); - } - ret = qcow2_change_backing_file(bs, - backing_file ?: s->image_backing_file, - backing_format ?: s->image_backing_format); - if (ret < 0) { - error_setg_errno(errp, -ret, "Failed to change the backing file"); - return ret; + error_setg(errp, "Cannot amend the backing file"); + error_append_hint(errp, + "You can use 'qemu-img rebase' instead.\n"); + return -EINVAL; } } diff --git a/block/qcow2.h b/block/qcow2.h index 0fe5f74ed3e..fd48a89d452 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -110,7 +110,6 @@ /* Defined in the qcow2 spec (compressed cluster descriptor) */ #define QCOW2_COMPRESSED_SECTOR_SIZE 512U -#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL)) /* Must be at least 2 to cover COW */ #define MIN_L2_CACHE_SIZE 2 /* cache entries */ @@ -587,10 +586,12 @@ typedef enum QCow2MetadataOverlap { (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2) #define L1E_OFFSET_MASK 0x00fffffffffffe00ULL +#define L1E_RESERVED_MASK 0x7f000000000001ffULL #define L2E_OFFSET_MASK 0x00fffffffffffe00ULL -#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL +#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL #define REFT_OFFSET_MASK 0xfffffffffffffe00ULL +#define REFT_RESERVED_MASK 0x1ffULL #define INV_OFFSET (-1ULL) @@ -914,6 +915,8 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size, uint64_t *host_offset); +void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, + uint64_t *coffset, int *csize); int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); diff --git a/block/qed.c b/block/qed.c index f45c640513f..558d3646c4b 100644 --- a/block/qed.c +++ b/block/qed.c @@ -582,6 +582,7 @@ static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp) BDRVQEDState *s = bs->opaque; bs->bl.pwrite_zeroes_alignment = s->header.cluster_size; + bs->bl.max_pwrite_zeroes = QEMU_ALIGN_DOWN(INT_MAX, s->header.cluster_size); } /* We have nothing to do for QED reopen, stubs just return @@ -1397,7 +1398,7 @@ static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs, static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int bytes, + int64_t bytes, BdrvRequestFlags flags) { BDRVQEDState *s = bs->opaque; @@ -1408,6 +1409,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, */ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); + /* + * QED is not prepared for 63bit write-zero requests, so rely on + * max_pwrite_zeroes. + */ + assert(bytes <= INT_MAX); + /* Fall back if the request is not aligned */ if (qed_offset_into_cluster(s, offset) || qed_offset_into_cluster(s, bytes)) { diff --git a/block/quorum.c b/block/quorum.c index cfc1436abb5..c28dda7baac 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -663,8 +663,8 @@ static int read_fifo_child(QuorumAIOCB *acb) return ret; } -static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, int flags) +static int quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); @@ -714,8 +714,9 @@ static void write_quorum_entry(void *opaque) } } -static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, int flags) +static int quorum_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); @@ -745,7 +746,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, } static int quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int bytes, BdrvRequestFlags flags) + int64_t bytes, BdrvRequestFlags flags) { return quorum_co_pwritev(bs, offset, bytes, NULL, @@ -1279,7 +1280,7 @@ static BlockDriver bdrv_quorum = { .bdrv_dirname = quorum_dirname, .bdrv_co_block_status = quorum_co_block_status, - .bdrv_co_flush_to_disk = quorum_co_flush, + .bdrv_co_flush = quorum_co_flush, .bdrv_getlength = quorum_getlength, diff --git a/block/raw-format.c b/block/raw-format.c index 7717578ed6a..bda757fd195 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -181,8 +181,8 @@ static void raw_reopen_abort(BDRVReopenState *state) } /* Check and adjust the offset, against 'offset' and 'size' options. */ -static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset, - uint64_t bytes, bool is_write) +static inline int raw_adjust_offset(BlockDriverState *bs, int64_t *offset, + int64_t bytes, bool is_write) { BDRVRawState *s = bs->opaque; @@ -201,9 +201,9 @@ static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset, return 0; } -static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { int ret; @@ -216,9 +216,9 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } -static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - int flags) +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { void *buf = NULL; BlockDriver *drv; @@ -289,12 +289,12 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, } static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { int ret; - ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); + ret = raw_adjust_offset(bs, &offset, bytes, true); if (ret) { return ret; } @@ -302,11 +302,11 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { int ret; - ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); + ret = raw_adjust_offset(bs, &offset, bytes, true); if (ret) { return ret; } @@ -532,10 +532,10 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, BdrvChild *src, - uint64_t src_offset, + int64_t src_offset, BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, + int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { @@ -551,10 +551,10 @@ static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, BdrvChild *src, - uint64_t src_offset, + int64_t src_offset, BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, + int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { @@ -580,6 +580,25 @@ static void raw_cancel_in_flight(BlockDriverState *bs) bdrv_cancel_in_flight(bs->file->bs); } +static void raw_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared) +{ + bdrv_default_perms(bs, c, role, reopen_queue, parent_perm, + parent_shared, nperm, nshared); + + /* + * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in + * bdrv_default_perms_for_storage() for an explanation) but we only need + * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible + * to avoid permission conflicts. + */ + *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE); +} + BlockDriver bdrv_raw = { .format_name = "raw", .instance_size = sizeof(BDRVRawState), @@ -588,7 +607,7 @@ BlockDriver bdrv_raw = { .bdrv_reopen_commit = &raw_reopen_commit, .bdrv_reopen_abort = &raw_reopen_abort, .bdrv_open = &raw_open, - .bdrv_child_perm = bdrv_default_perms, + .bdrv_child_perm = raw_child_perm, .bdrv_co_create_opts = &raw_co_create_opts, .bdrv_co_preadv = &raw_co_preadv, .bdrv_co_pwritev = &raw_co_pwritev, diff --git a/block/rbd.c b/block/rbd.c index f098a89c7bc..def96292e0e 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -55,49 +55,30 @@ * leading "\". */ -/* rbd_aio_discard added in 0.1.2 */ -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) -#define LIBRBD_SUPPORTS_DISCARD -#else -#undef LIBRBD_SUPPORTS_DISCARD -#endif - #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) #define RBD_MAX_SNAPS 100 -/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */ -#ifdef LIBRBD_SUPPORTS_IOVEC -#define LIBRBD_USE_IOVEC 1 -#else -#define LIBRBD_USE_IOVEC 0 -#endif +#define RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN 8 + +static const char rbd_luks_header_verification[ + RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = { + 'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 1 +}; + +static const char rbd_luks2_header_verification[ + RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = { + 'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 2 +}; typedef enum { RBD_AIO_READ, RBD_AIO_WRITE, RBD_AIO_DISCARD, - RBD_AIO_FLUSH + RBD_AIO_FLUSH, + RBD_AIO_WRITE_ZEROES } RBDAIOCmd; -typedef struct RBDAIOCB { - BlockAIOCB common; - int64_t ret; - QEMUIOVector *qiov; - char *bounce; - RBDAIOCmd cmd; - int error; - struct BDRVRBDState *s; -} RBDAIOCB; - -typedef struct RADOSCB { - RBDAIOCB *acb; - struct BDRVRBDState *s; - int64_t size; - char *buf; - int64_t ret; -} RADOSCB; - typedef struct BDRVRBDState { rados_t cluster; rados_ioctx_t io_ctx; @@ -106,28 +87,52 @@ typedef struct BDRVRBDState { char *snap; char *namespace; uint64_t image_size; + uint64_t object_size; } BDRVRBDState; +typedef struct RBDTask { + BlockDriverState *bs; + Coroutine *co; + bool complete; + int64_t ret; +} RBDTask; + +typedef struct RBDDiffIterateReq { + uint64_t offs; + uint64_t bytes; + bool exists; +} RBDDiffIterateReq; + static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, BlockdevOptionsRbd *opts, bool cache, const char *keypairs, const char *secretid, Error **errp); +static char *qemu_rbd_strchr(char *src, char delim) +{ + char *p; + + for (p = src; *p; ++p) { + if (*p == delim) { + return p; + } + if (*p == '\\' && p[1] != '\0') { + ++p; + } + } + + return NULL; +} + + static char *qemu_rbd_next_tok(char *src, char delim, char **p) { char *end; *p = NULL; - for (end = src; *end; ++end) { - if (*end == delim) { - break; - } - if (*end == '\\' && end[1] != '\0') { - end++; - } - } - if (*end == delim) { + end = qemu_rbd_strchr(src, delim); + if (end) { *p = end + 1; *end = '\0'; } @@ -171,7 +176,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, qemu_rbd_unescape(found_str); qdict_put_str(options, "pool", found_str); - if (strchr(p, '@')) { + if (qemu_rbd_strchr(p, '@')) { image_name = qemu_rbd_next_tok(p, '@', &p); found_str = qemu_rbd_next_tok(p, ':', &p); @@ -181,7 +186,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, image_name = qemu_rbd_next_tok(p, ':', &p); } /* Check for namespace in the image_name */ - if (strchr(image_name, '/')) { + if (qemu_rbd_strchr(image_name, '/')) { found_str = qemu_rbd_next_tok(image_name, '/', &image_name); qemu_rbd_unescape(found_str); qdict_put_str(options, "namespace", found_str); @@ -241,14 +246,6 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, return; } - -static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp) -{ - /* XXX Does RBD support AIO on less than 512-byte alignment? */ - bs->bl.request_alignment = 512; -} - - static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts, Error **errp) { @@ -330,17 +327,203 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json, return ret; } -static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs) +#ifdef LIBRBD_SUPPORTS_ENCRYPTION +static int qemu_rbd_convert_luks_options( + RbdEncryptionOptionsLUKSBase *luks_opts, + char **passphrase, + size_t *passphrase_len, + Error **errp) { - if (LIBRBD_USE_IOVEC) { - RBDAIOCB *acb = rcb->acb; - iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0, - acb->qiov->size - offs); + return qcrypto_secret_lookup(luks_opts->key_secret, (uint8_t **)passphrase, + passphrase_len, errp); +} + +static int qemu_rbd_convert_luks_create_options( + RbdEncryptionCreateOptionsLUKSBase *luks_opts, + rbd_encryption_algorithm_t *alg, + char **passphrase, + size_t *passphrase_len, + Error **errp) +{ + int r = 0; + + r = qemu_rbd_convert_luks_options( + qapi_RbdEncryptionCreateOptionsLUKSBase_base(luks_opts), + passphrase, passphrase_len, errp); + if (r < 0) { + return r; + } + + if (luks_opts->has_cipher_alg) { + switch (luks_opts->cipher_alg) { + case QCRYPTO_CIPHER_ALG_AES_128: { + *alg = RBD_ENCRYPTION_ALGORITHM_AES128; + break; + } + case QCRYPTO_CIPHER_ALG_AES_256: { + *alg = RBD_ENCRYPTION_ALGORITHM_AES256; + break; + } + default: { + r = -ENOTSUP; + error_setg_errno(errp, -r, "unknown encryption algorithm: %u", + luks_opts->cipher_alg); + return r; + } + } } else { - memset(rcb->buf + offs, 0, rcb->size - offs); + /* default alg */ + *alg = RBD_ENCRYPTION_ALGORITHM_AES256; } + + return 0; } +static int qemu_rbd_encryption_format(rbd_image_t image, + RbdEncryptionCreateOptions *encrypt, + Error **errp) +{ + int r = 0; + g_autofree char *passphrase = NULL; + size_t passphrase_len; + rbd_encryption_format_t format; + rbd_encryption_options_t opts; + rbd_encryption_luks1_format_options_t luks_opts; + rbd_encryption_luks2_format_options_t luks2_opts; + size_t opts_size; + uint64_t raw_size, effective_size; + + r = rbd_get_size(image, &raw_size); + if (r < 0) { + error_setg_errno(errp, -r, "cannot get raw image size"); + return r; + } + + switch (encrypt->format) { + case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: { + memset(&luks_opts, 0, sizeof(luks_opts)); + format = RBD_ENCRYPTION_FORMAT_LUKS1; + opts = &luks_opts; + opts_size = sizeof(luks_opts); + r = qemu_rbd_convert_luks_create_options( + qapi_RbdEncryptionCreateOptionsLUKS_base(&encrypt->u.luks), + &luks_opts.alg, &passphrase, &passphrase_len, errp); + if (r < 0) { + return r; + } + luks_opts.passphrase = passphrase; + luks_opts.passphrase_size = passphrase_len; + break; + } + case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: { + memset(&luks2_opts, 0, sizeof(luks2_opts)); + format = RBD_ENCRYPTION_FORMAT_LUKS2; + opts = &luks2_opts; + opts_size = sizeof(luks2_opts); + r = qemu_rbd_convert_luks_create_options( + qapi_RbdEncryptionCreateOptionsLUKS2_base( + &encrypt->u.luks2), + &luks2_opts.alg, &passphrase, &passphrase_len, errp); + if (r < 0) { + return r; + } + luks2_opts.passphrase = passphrase; + luks2_opts.passphrase_size = passphrase_len; + break; + } + default: { + r = -ENOTSUP; + error_setg_errno( + errp, -r, "unknown image encryption format: %u", + encrypt->format); + return r; + } + } + + r = rbd_encryption_format(image, format, opts, opts_size); + if (r < 0) { + error_setg_errno(errp, -r, "encryption format fail"); + return r; + } + + r = rbd_get_size(image, &effective_size); + if (r < 0) { + error_setg_errno(errp, -r, "cannot get effective image size"); + return r; + } + + r = rbd_resize(image, raw_size + (raw_size - effective_size)); + if (r < 0) { + error_setg_errno(errp, -r, "cannot resize image after format"); + return r; + } + + return 0; +} + +static int qemu_rbd_encryption_load(rbd_image_t image, + RbdEncryptionOptions *encrypt, + Error **errp) +{ + int r = 0; + g_autofree char *passphrase = NULL; + size_t passphrase_len; + rbd_encryption_luks1_format_options_t luks_opts; + rbd_encryption_luks2_format_options_t luks2_opts; + rbd_encryption_format_t format; + rbd_encryption_options_t opts; + size_t opts_size; + + switch (encrypt->format) { + case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: { + memset(&luks_opts, 0, sizeof(luks_opts)); + format = RBD_ENCRYPTION_FORMAT_LUKS1; + opts = &luks_opts; + opts_size = sizeof(luks_opts); + r = qemu_rbd_convert_luks_options( + qapi_RbdEncryptionOptionsLUKS_base(&encrypt->u.luks), + &passphrase, &passphrase_len, errp); + if (r < 0) { + return r; + } + luks_opts.passphrase = passphrase; + luks_opts.passphrase_size = passphrase_len; + break; + } + case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: { + memset(&luks2_opts, 0, sizeof(luks2_opts)); + format = RBD_ENCRYPTION_FORMAT_LUKS2; + opts = &luks2_opts; + opts_size = sizeof(luks2_opts); + r = qemu_rbd_convert_luks_options( + qapi_RbdEncryptionOptionsLUKS2_base(&encrypt->u.luks2), + &passphrase, &passphrase_len, errp); + if (r < 0) { + return r; + } + luks2_opts.passphrase = passphrase; + luks2_opts.passphrase_size = passphrase_len; + break; + } + default: { + r = -ENOTSUP; + error_setg_errno( + errp, -r, "unknown image encryption format: %u", + encrypt->format); + return r; + } + } + + r = rbd_encryption_load(image, format, opts, opts_size); + if (r < 0) { + error_setg_errno(errp, -r, "encryption load fail"); + return r; + } + + return 0; +} +#endif + /* FIXME Deprecate and remove keypairs or make it available in QMP. */ static int qemu_rbd_do_create(BlockdevCreateOptions *options, const char *keypairs, const char *password_secret, @@ -358,6 +541,13 @@ static int qemu_rbd_do_create(BlockdevCreateOptions *options, return -EINVAL; } +#ifndef LIBRBD_SUPPORTS_ENCRYPTION + if (opts->has_encrypt) { + error_setg(errp, "RBD library does not support image encryption"); + return -ENOTSUP; + } +#endif + if (opts->has_cluster_size) { int64_t objsize = opts->cluster_size; if ((objsize - 1) & objsize) { /* not a power of 2? */ @@ -383,6 +573,28 @@ static int qemu_rbd_do_create(BlockdevCreateOptions *options, goto out; } +#ifdef LIBRBD_SUPPORTS_ENCRYPTION + if (opts->has_encrypt) { + rbd_image_t image; + + ret = rbd_open(io_ctx, opts->location->image, &image, NULL); + if (ret < 0) { + error_setg_errno(errp, -ret, + "error opening image '%s' for encryption format", + opts->location->image); + goto out; + } + + ret = qemu_rbd_encryption_format(image, opts->encrypt, errp); + rbd_close(image); + if (ret < 0) { + /* encryption format fail, try removing the image */ + rbd_remove(io_ctx, opts->location->image); + goto out; + } + } +#endif + ret = 0; out: rados_ioctx_destroy(io_ctx); @@ -395,6 +607,43 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) return qemu_rbd_do_create(options, NULL, NULL, errp); } +static int qemu_rbd_extract_encryption_create_options( + QemuOpts *opts, + RbdEncryptionCreateOptions **spec, + Error **errp) +{ + QDict *opts_qdict; + QDict *encrypt_qdict; + Visitor *v; + int ret = 0; + + opts_qdict = qemu_opts_to_qdict(opts, NULL); + qdict_extract_subqdict(opts_qdict, &encrypt_qdict, "encrypt."); + qobject_unref(opts_qdict); + if (!qdict_size(encrypt_qdict)) { + *spec = NULL; + goto exit; + } + + /* Convert options into a QAPI object */ + v = qobject_input_visitor_new_flat_confused(encrypt_qdict, errp); + if (!v) { + ret = -EINVAL; + goto exit; + } + + visit_type_RbdEncryptionCreateOptions(v, NULL, spec, errp); + visit_free(v); + if (!*spec) { + ret = -EINVAL; + goto exit; + } + +exit: + qobject_unref(encrypt_qdict); + return ret; +} + static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, @@ -403,6 +652,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, BlockdevCreateOptions *create_options; BlockdevCreateOptionsRbd *rbd_opts; BlockdevOptionsRbd *loc; + RbdEncryptionCreateOptions *encrypt = NULL; Error *local_err = NULL; const char *keypairs, *password_secret; QDict *options = NULL; @@ -431,6 +681,13 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, goto exit; } + ret = qemu_rbd_extract_encryption_create_options(opts, &encrypt, errp); + if (ret < 0) { + goto exit; + } + rbd_opts->encrypt = encrypt; + rbd_opts->has_encrypt = !!encrypt; + /* * Caution: while qdict_get_try_str() is fine, getting non-string * types would require more care. When @options come from -blockdev @@ -459,53 +716,6 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, return ret; } -/* - * This aio completion is being called from rbd_finish_bh() and runs in qemu - * BH context. - */ -static void qemu_rbd_complete_aio(RADOSCB *rcb) -{ - RBDAIOCB *acb = rcb->acb; - int64_t r; - - r = rcb->ret; - - if (acb->cmd != RBD_AIO_READ) { - if (r < 0) { - acb->ret = r; - acb->error = 1; - } else if (!acb->error) { - acb->ret = rcb->size; - } - } else { - if (r < 0) { - qemu_rbd_memset(rcb, 0); - acb->ret = r; - acb->error = 1; - } else if (r < rcb->size) { - qemu_rbd_memset(rcb, r); - if (!acb->error) { - acb->ret = rcb->size; - } - } else if (!acb->error) { - acb->ret = r; - } - } - - g_free(rcb); - - if (!LIBRBD_USE_IOVEC) { - if (acb->cmd == RBD_AIO_READ) { - qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); - } - qemu_vfree(acb->bounce); - } - - acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); - - qemu_aio_unref(acb); -} - static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp) { const char **vals; @@ -692,6 +902,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, const QDictEntry *e; Error *local_err = NULL; char *keypairs, *secretid; + rbd_image_info_t info; int r; keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs")); @@ -756,30 +967,49 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, goto failed_open; } - r = rbd_get_size(s->image, &s->image_size); + if (opts->has_encrypt) { +#ifdef LIBRBD_SUPPORTS_ENCRYPTION + r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp); + if (r < 0) { + goto failed_post_open; + } +#else + r = -ENOTSUP; + error_setg(errp, "RBD library does not support image encryption"); + goto failed_post_open; +#endif + } + + r = rbd_stat(s->image, &info, sizeof(info)); if (r < 0) { - error_setg_errno(errp, -r, "error getting image size from %s", + error_setg_errno(errp, -r, "error getting image info from %s", s->image_name); - rbd_close(s->image); - goto failed_open; + goto failed_post_open; } + s->image_size = info.size; + s->object_size = info.obj_size; /* If we are using an rbd snapshot, we must be r/o, otherwise * leave as-is */ if (s->snap != NULL) { r = bdrv_apply_auto_read_only(bs, "rbd snapshots are read-only", errp); if (r < 0) { - rbd_close(s->image); - goto failed_open; + goto failed_post_open; } } +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; +#endif + /* When extending regular files, we get zeros from the OS */ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; r = 0; goto out; +failed_post_open: + rbd_close(s->image); failed_open: rados_ioctx_destroy(s->io_ctx); g_free(s->snap); @@ -839,229 +1069,318 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) return 0; } -static const AIOCBInfo rbd_aiocb_info = { - .aiocb_size = sizeof(RBDAIOCB), -}; - -static void rbd_finish_bh(void *opaque) +static void qemu_rbd_finish_bh(void *opaque) { - RADOSCB *rcb = opaque; - qemu_rbd_complete_aio(rcb); + RBDTask *task = opaque; + task->complete = true; + aio_co_wake(task->co); } /* - * This is the callback function for rbd_aio_read and _write + * This is the completion callback function for all rbd aio calls + * started from qemu_rbd_start_co(). * * Note: this function is being called from a non qemu thread so * we need to be careful about what we do here. Generally we only * schedule a BH, and do the rest of the io completion handling - * from rbd_finish_bh() which runs in a qemu context. + * from qemu_rbd_finish_bh() which runs in a qemu context. */ -static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) +static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task) { - RBDAIOCB *acb = rcb->acb; - - rcb->ret = rbd_aio_get_return_value(c); + task->ret = rbd_aio_get_return_value(c); rbd_aio_release(c); - - replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs), - rbd_finish_bh, rcb); -} - -static int rbd_aio_discard_wrapper(rbd_image_t image, - uint64_t off, - uint64_t len, - rbd_completion_t comp) -{ -#ifdef LIBRBD_SUPPORTS_DISCARD - return rbd_aio_discard(image, off, len, comp); -#else - return -ENOTSUP; -#endif -} - -static int rbd_aio_flush_wrapper(rbd_image_t image, - rbd_completion_t comp) -{ -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH - return rbd_aio_flush(image, comp); -#else - return -ENOTSUP; -#endif + aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), + qemu_rbd_finish_bh, task); } -static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, - int64_t off, - QEMUIOVector *qiov, - int64_t size, - BlockCompletionFunc *cb, - void *opaque, - RBDAIOCmd cmd) +static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + int flags, + RBDAIOCmd cmd) { - RBDAIOCB *acb; - RADOSCB *rcb = NULL; + BDRVRBDState *s = bs->opaque; + RBDTask task = { .bs = bs, .co = qemu_coroutine_self() }; rbd_completion_t c; int r; - BDRVRBDState *s = bs->opaque; - - acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque); - acb->cmd = cmd; - acb->qiov = qiov; - assert(!qiov || qiov->size == size); - - rcb = g_new(RADOSCB, 1); + assert(!qiov || qiov->size == bytes); - if (!LIBRBD_USE_IOVEC) { - if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { - acb->bounce = NULL; - } else { - acb->bounce = qemu_try_blockalign(bs, qiov->size); - if (acb->bounce == NULL) { - goto failed; - } - } - if (cmd == RBD_AIO_WRITE) { - qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); - } - rcb->buf = acb->bounce; - } - - acb->ret = 0; - acb->error = 0; - acb->s = s; - - rcb->acb = acb; - rcb->s = acb->s; - rcb->size = size; - r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); + r = rbd_aio_create_completion(&task, + (rbd_callback_t) qemu_rbd_completion_cb, &c); if (r < 0) { - goto failed; + return r; } switch (cmd) { - case RBD_AIO_WRITE: { - /* - * RBD APIs don't allow us to write more than actual size, so in order - * to support growing images, we resize the image before write - * operations that exceed the current size. - */ - if (off + size > s->image_size) { - r = qemu_rbd_resize(bs, off + size); - if (r < 0) { - goto failed_completion; - } - } -#ifdef LIBRBD_SUPPORTS_IOVEC - r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); -#else - r = rbd_aio_write(s->image, off, size, rcb->buf, c); -#endif - break; - } case RBD_AIO_READ: -#ifdef LIBRBD_SUPPORTS_IOVEC - r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); -#else - r = rbd_aio_read(s->image, off, size, rcb->buf, c); -#endif + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, offset, c); + break; + case RBD_AIO_WRITE: + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, offset, c); break; case RBD_AIO_DISCARD: - r = rbd_aio_discard_wrapper(s->image, off, size, c); + r = rbd_aio_discard(s->image, offset, bytes, c); break; case RBD_AIO_FLUSH: - r = rbd_aio_flush_wrapper(s->image, c); + r = rbd_aio_flush(s->image, c); break; +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES + case RBD_AIO_WRITE_ZEROES: { + int zero_flags = 0; +#ifdef RBD_WRITE_ZEROES_FLAG_THICK_PROVISION + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + zero_flags = RBD_WRITE_ZEROES_FLAG_THICK_PROVISION; + } +#endif + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, zero_flags, 0); + break; + } +#endif default: r = -EINVAL; } if (r < 0) { - goto failed_completion; + error_report("rbd request failed early: cmd %d offset %" PRIu64 + " bytes %" PRIu64 " flags %d r %d (%s)", cmd, offset, + bytes, flags, r, strerror(-r)); + rbd_aio_release(c); + return r; } - return &acb->common; -failed_completion: - rbd_aio_release(c); -failed: - g_free(rcb); - if (!LIBRBD_USE_IOVEC) { - qemu_vfree(acb->bounce); + while (!task.complete) { + qemu_coroutine_yield(); } - qemu_aio_unref(acb); - return NULL; + if (task.ret < 0) { + error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %" + PRIu64 " flags %d task.ret %" PRIi64 " (%s)", cmd, offset, + bytes, flags, task.ret, strerror(-task.ret)); + return task.ret; + } + + /* zero pad short reads */ + if (cmd == RBD_AIO_READ && task.ret < qiov->size) { + qemu_iovec_memset(qiov, task.ret, 0, qiov->size - task.ret); + } + + return 0; } -static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags, - BlockCompletionFunc *cb, - void *opaque) +static int +coroutine_fn qemu_rbd_co_preadv(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { - return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque, - RBD_AIO_READ); + return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_READ); } -static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags, - BlockCompletionFunc *cb, - void *opaque) +static int +coroutine_fn qemu_rbd_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { - return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque, - RBD_AIO_WRITE); + BDRVRBDState *s = bs->opaque; + /* + * RBD APIs don't allow us to write more than actual size, so in order + * to support growing images, we resize the image before write + * operations that exceed the current size. + */ + if (offset + bytes > s->image_size) { + int r = qemu_rbd_resize(bs, offset + bytes); + if (r < 0) { + return r; + } + } + return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_WRITE); } -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH -static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, - void *opaque) +static int coroutine_fn qemu_rbd_co_flush(BlockDriverState *bs) { - return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH); + return qemu_rbd_start_co(bs, 0, 0, NULL, 0, RBD_AIO_FLUSH); } -#else +static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + return qemu_rbd_start_co(bs, offset, bytes, NULL, 0, RBD_AIO_DISCARD); +} -static int qemu_rbd_co_flush(BlockDriverState *bs) +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES +static int +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int64_t bytes, BdrvRequestFlags flags) { -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1) - /* rbd_flush added in 0.1.1 */ - BDRVRBDState *s = bs->opaque; - return rbd_flush(s->image); -#else - return 0; -#endif + return qemu_rbd_start_co(bs, offset, bytes, NULL, flags, + RBD_AIO_WRITE_ZEROES); } #endif static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) { BDRVRBDState *s = bs->opaque; - rbd_image_info_t info; + bdi->cluster_size = s->object_size; + return 0; +} + +static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, + Error **errp) +{ + BDRVRBDState *s = bs->opaque; + ImageInfoSpecific *spec_info; + char buf[RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {0}; int r; - r = rbd_stat(s->image, &info, sizeof(info)); - if (r < 0) { - return r; + if (s->image_size >= RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) { + r = rbd_read(s->image, 0, + RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN, buf); + if (r < 0) { + error_setg_errno(errp, -r, "cannot read image start for probe"); + return NULL; + } + } + + spec_info = g_new(ImageInfoSpecific, 1); + *spec_info = (ImageInfoSpecific){ + .type = IMAGE_INFO_SPECIFIC_KIND_RBD, + .u.rbd.data = g_new0(ImageInfoSpecificRbd, 1), + }; + + if (memcmp(buf, rbd_luks_header_verification, + RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { + spec_info->u.rbd.data->encryption_format = + RBD_IMAGE_ENCRYPTION_FORMAT_LUKS; + spec_info->u.rbd.data->has_encryption_format = true; + } else if (memcmp(buf, rbd_luks2_header_verification, + RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { + spec_info->u.rbd.data->encryption_format = + RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2; + spec_info->u.rbd.data->has_encryption_format = true; + } else { + spec_info->u.rbd.data->has_encryption_format = false; + } + + return spec_info; +} + +/* + * rbd_diff_iterate2 allows to interrupt the exection by returning a negative + * value in the callback routine. Choose a value that does not conflict with + * an existing exitcode and return it if we want to prematurely stop the + * execution because we detected a change in the allocation status. + */ +#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000 + +static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, + int exists, void *opaque) +{ + RBDDiffIterateReq *req = opaque; + + assert(req->offs + req->bytes <= offs); + /* + * we do not diff against a snapshot so we should never receive a callback + * for a hole. + */ + assert(exists); + + if (!req->exists && offs > req->offs) { + /* + * we started in an unallocated area and hit the first allocated + * block. req->bytes must be set to the length of the unallocated area + * before the allocated area. stop further processing. + */ + req->bytes = offs - req->offs; + return QEMU_RBD_EXIT_DIFF_ITERATE2; + } + + if (req->exists && offs > req->offs + req->bytes) { + /* + * we started in an allocated area and jumped over an unallocated area, + * req->bytes contains the length of the allocated area before the + * unallocated area. stop further processing. + */ + return QEMU_RBD_EXIT_DIFF_ITERATE2; } - bdi->cluster_size = info.obj_size; + req->bytes += len; + req->exists = true; + return 0; } +static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + BDRVRBDState *s = bs->opaque; + int status, r; + RBDDiffIterateReq req = { .offs = offset }; + uint64_t features, flags; + + assert(offset + bytes <= s->image_size); + + /* default to all sectors allocated */ + status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + *map = offset; + *file = bs; + *pnum = bytes; + + /* check if RBD image supports fast-diff */ + r = rbd_get_features(s->image, &features); + if (r < 0) { + return status; + } + if (!(features & RBD_FEATURE_FAST_DIFF)) { + return status; + } + + /* check if RBD fast-diff result is valid */ + r = rbd_get_flags(s->image, &flags); + if (r < 0) { + return status; + } + if (flags & RBD_FLAG_FAST_DIFF_INVALID) { + return status; + } + + r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, + qemu_rbd_diff_iterate_cb, &req); + if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { + return status; + } + assert(req.bytes <= bytes); + if (!req.exists) { + if (r == 0) { + /* + * rbd_diff_iterate2 does not invoke callbacks for unallocated + * areas. This here catches the case where no callback was + * invoked at all (req.bytes == 0). + */ + assert(req.bytes == 0); + req.bytes = bytes; + } + status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; + } + + *pnum = req.bytes; + return status; +} + static int64_t qemu_rbd_getlength(BlockDriverState *bs) { BDRVRBDState *s = bs->opaque; - rbd_image_info_t info; int r; - r = rbd_stat(s->image, &info, sizeof(info)); + r = rbd_get_size(s->image, &s->image_size); if (r < 0) { return r; } - return info.size; + return s->image_size; } static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, @@ -1200,19 +1519,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs, return snap_count; } -#ifdef LIBRBD_SUPPORTS_DISCARD -static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs, - int64_t offset, - int bytes, - BlockCompletionFunc *cb, - void *opaque) -{ - return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque, - RBD_AIO_DISCARD); -} -#endif - -#ifdef LIBRBD_SUPPORTS_INVALIDATE static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs, Error **errp) { @@ -1222,7 +1528,6 @@ static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs, error_setg_errno(errp, -r, "Failed to invalidate the cache"); } } -#endif static QemuOptsList qemu_rbd_create_opts = { .name = "rbd-create-opts", @@ -1243,6 +1548,22 @@ static QemuOptsList qemu_rbd_create_opts = { .type = QEMU_OPT_STRING, .help = "ID of secret providing the password", }, + { + .name = "encrypt.format", + .type = QEMU_OPT_STRING, + .help = "Encrypt the image, format choices: 'luks', 'luks2'", + }, + { + .name = "encrypt.cipher-alg", + .type = QEMU_OPT_STRING, + .help = "Name of encryption cipher algorithm" + " (allowed values: aes-128, aes-256)", + }, + { + .name = "encrypt.key-secret", + .type = QEMU_OPT_STRING, + .help = "ID of secret providing LUKS passphrase", + }, { /* end of list */ } } }; @@ -1264,7 +1585,6 @@ static BlockDriver bdrv_rbd = { .format_name = "rbd", .instance_size = sizeof(BDRVRBDState), .bdrv_parse_filename = qemu_rbd_parse_filename, - .bdrv_refresh_limits = qemu_rbd_refresh_limits, .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, @@ -1272,31 +1592,26 @@ static BlockDriver bdrv_rbd = { .bdrv_co_create_opts = qemu_rbd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_get_info = qemu_rbd_getinfo, + .bdrv_get_specific_info = qemu_rbd_get_specific_info, .create_opts = &qemu_rbd_create_opts, .bdrv_getlength = qemu_rbd_getlength, .bdrv_co_truncate = qemu_rbd_co_truncate, .protocol_name = "rbd", - .bdrv_aio_preadv = qemu_rbd_aio_preadv, - .bdrv_aio_pwritev = qemu_rbd_aio_pwritev, - -#ifdef LIBRBD_SUPPORTS_AIO_FLUSH - .bdrv_aio_flush = qemu_rbd_aio_flush, -#else + .bdrv_co_preadv = qemu_rbd_co_preadv, + .bdrv_co_pwritev = qemu_rbd_co_pwritev, .bdrv_co_flush_to_disk = qemu_rbd_co_flush, + .bdrv_co_pdiscard = qemu_rbd_co_pdiscard, +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES + .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes, #endif - -#ifdef LIBRBD_SUPPORTS_DISCARD - .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard, -#endif + .bdrv_co_block_status = qemu_rbd_co_block_status, .bdrv_snapshot_create = qemu_rbd_snap_create, .bdrv_snapshot_delete = qemu_rbd_snap_remove, .bdrv_snapshot_list = qemu_rbd_snap_list, .bdrv_snapshot_goto = qemu_rbd_snap_rollback, -#ifdef LIBRBD_SUPPORTS_INVALIDATE .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache, -#endif .strong_runtime_opts = qemu_rbd_strong_runtime_opts, }; diff --git a/block/replication.c b/block/replication.c index 97be7ef4de5..55c8f894aa3 100644 --- a/block/replication.c +++ b/block/replication.c @@ -22,7 +22,7 @@ #include "sysemu/block-backend.h" #include "qapi/error.h" #include "qapi/qmp/qdict.h" -#include "replication.h" +#include "block/replication.h" typedef enum { BLOCK_REPLICATION_NONE, /* block replication is not started */ @@ -35,7 +35,6 @@ typedef enum { typedef struct BDRVReplicationState { ReplicationMode mode; ReplicationStage stage; - BdrvChild *active_disk; BlockJob *commit_job; BdrvChild *hidden_disk; BdrvChild *secondary_disk; @@ -150,7 +149,7 @@ static void replication_close(BlockDriverState *bs) if (s->stage == BLOCK_REPLICATION_FAILOVER) { commit_job = &s->commit_job->job; assert(commit_job->aio_context == qemu_get_current_aio_context()); - job_cancel_sync(commit_job); + job_cancel_sync(commit_job, false); } if (s->mode == REPLICATION_MODE_SECONDARY) { @@ -166,7 +165,12 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - *nperm = BLK_PERM_CONSISTENT_READ; + if (role & BDRV_CHILD_PRIMARY) { + *nperm = BLK_PERM_CONSISTENT_READ; + } else { + *nperm = 0; + } + if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) { *nperm |= BLK_PERM_WRITE; } @@ -307,8 +311,10 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, return ret; } -static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) +static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp) { + BDRVReplicationState *s = bs->opaque; + BdrvChild *active_disk = bs->file; Error *local_err = NULL; int ret; @@ -323,13 +329,13 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) return; } - if (!s->active_disk->bs->drv) { + if (!active_disk->bs->drv) { error_setg(errp, "Active disk %s is ejected", - s->active_disk->bs->node_name); + active_disk->bs->node_name); return; } - ret = bdrv_make_empty(s->active_disk, errp); + ret = bdrv_make_empty(active_disk, errp); if (ret < 0) { return; } @@ -340,17 +346,7 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) return; } - BlockBackend *blk = blk_new(qemu_get_current_aio_context(), - BLK_PERM_WRITE, BLK_PERM_ALL); - blk_insert_bs(blk, s->hidden_disk->bs, &local_err); - if (local_err) { - error_propagate(errp, local_err); - blk_unref(blk); - return; - } - - ret = blk_make_empty(blk, errp); - blk_unref(blk); + ret = bdrv_make_empty(s->hidden_disk, errp); if (ret < 0) { return; } @@ -365,36 +361,51 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, Error **errp) { BDRVReplicationState *s = bs->opaque; + BdrvChild *hidden_disk, *secondary_disk; BlockReopenQueue *reopen_queue = NULL; + /* + * s->hidden_disk and s->secondary_disk may not be set yet, as they will + * only be set after the children are writable. + */ + hidden_disk = bs->file->bs->backing; + secondary_disk = hidden_disk->bs->backing; + if (writable) { - s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs); - s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs); + s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs); + s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); } - bdrv_subtree_drained_begin(s->hidden_disk->bs); - bdrv_subtree_drained_begin(s->secondary_disk->bs); + bdrv_subtree_drained_begin(hidden_disk->bs); + bdrv_subtree_drained_begin(secondary_disk->bs); if (s->orig_hidden_read_only) { QDict *opts = qdict_new(); qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); - reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, + reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs, opts, true); } if (s->orig_secondary_read_only) { QDict *opts = qdict_new(); qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); - reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs, + reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs, opts, true); } if (reopen_queue) { + AioContext *ctx = bdrv_get_aio_context(bs); + if (ctx != qemu_get_aio_context()) { + aio_context_release(ctx); + } bdrv_reopen_multiple(reopen_queue, errp); + if (ctx != qemu_get_aio_context()) { + aio_context_acquire(ctx); + } } - bdrv_subtree_drained_end(s->hidden_disk->bs); - bdrv_subtree_drained_end(s->secondary_disk->bs); + bdrv_subtree_drained_end(hidden_disk->bs); + bdrv_subtree_drained_end(secondary_disk->bs); } static void backup_job_cleanup(BlockDriverState *bs) @@ -451,6 +462,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, BlockDriverState *bs = rs->opaque; BDRVReplicationState *s; BlockDriverState *top_bs; + BdrvChild *active_disk, *hidden_disk, *secondary_disk; int64_t active_length, hidden_length, disk_length; AioContext *aio_context; Error *local_err = NULL; @@ -488,32 +500,31 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, case REPLICATION_MODE_PRIMARY: break; case REPLICATION_MODE_SECONDARY: - s->active_disk = bs->file; - if (!s->active_disk || !s->active_disk->bs || - !s->active_disk->bs->backing) { + active_disk = bs->file; + if (!active_disk || !active_disk->bs || !active_disk->bs->backing) { error_setg(errp, "Active disk doesn't have backing file"); aio_context_release(aio_context); return; } - s->hidden_disk = s->active_disk->bs->backing; - if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) { + hidden_disk = active_disk->bs->backing; + if (!hidden_disk->bs || !hidden_disk->bs->backing) { error_setg(errp, "Hidden disk doesn't have backing file"); aio_context_release(aio_context); return; } - s->secondary_disk = s->hidden_disk->bs->backing; - if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) { + secondary_disk = hidden_disk->bs->backing; + if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) { error_setg(errp, "The secondary disk doesn't have block backend"); aio_context_release(aio_context); return; } /* verify the length */ - active_length = bdrv_getlength(s->active_disk->bs); - hidden_length = bdrv_getlength(s->hidden_disk->bs); - disk_length = bdrv_getlength(s->secondary_disk->bs); + active_length = bdrv_getlength(active_disk->bs); + hidden_length = bdrv_getlength(hidden_disk->bs); + disk_length = bdrv_getlength(secondary_disk->bs); if (active_length < 0 || hidden_length < 0 || disk_length < 0 || active_length != hidden_length || hidden_length != disk_length) { error_setg(errp, "Active disk, hidden disk, secondary disk's length" @@ -523,10 +534,10 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, } /* Must be true, or the bdrv_getlength() calls would have failed */ - assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv); + assert(active_disk->bs->drv && hidden_disk->bs->drv); - if (!s->active_disk->bs->drv->bdrv_make_empty || - !s->hidden_disk->bs->drv->bdrv_make_empty) { + if (!active_disk->bs->drv->bdrv_make_empty || + !hidden_disk->bs->drv->bdrv_make_empty) { error_setg(errp, "Active disk or hidden disk doesn't support make_empty"); aio_context_release(aio_context); @@ -541,6 +552,26 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } + bdrv_ref(hidden_disk->bs); + s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", + &child_of_bds, BDRV_CHILD_DATA, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + aio_context_release(aio_context); + return; + } + + bdrv_ref(secondary_disk->bs); + s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs, + "secondary disk", &child_of_bds, + BDRV_CHILD_DATA, &local_err); + if (local_err) { + error_propagate(errp, local_err); + aio_context_release(aio_context); + return; + } + /* start backup job now */ error_setg(&s->blocker, "Block device is in use by internal backup job"); @@ -579,7 +610,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, s->stage = BLOCK_REPLICATION_RUNNING; if (s->mode == REPLICATION_MODE_SECONDARY) { - secondary_do_checkpoint(s, errp); + secondary_do_checkpoint(bs, errp); } s->error = 0; @@ -608,7 +639,7 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp) } if (s->mode == REPLICATION_MODE_SECONDARY) { - secondary_do_checkpoint(s, errp); + secondary_do_checkpoint(bs, errp); } aio_context_release(aio_context); } @@ -645,8 +676,9 @@ static void replication_done(void *opaque, int ret) if (ret == 0) { s->stage = BLOCK_REPLICATION_DONE; - s->active_disk = NULL; + bdrv_unref_child(bs, s->secondary_disk); s->secondary_disk = NULL; + bdrv_unref_child(bs, s->hidden_disk); s->hidden_disk = NULL; s->error = 0; } else { @@ -694,11 +726,11 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) * disk, secondary disk in backup_job_completed(). */ if (s->backup_job) { - job_cancel_sync(&s->backup_job->job); + job_cancel_sync(&s->backup_job->job, true); } if (!failover) { - secondary_do_checkpoint(s, errp); + secondary_do_checkpoint(bs, errp); s->stage = BLOCK_REPLICATION_DONE; aio_context_release(aio_context); return; @@ -706,7 +738,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) s->stage = BLOCK_REPLICATION_FAILOVER; s->commit_job = commit_active_start( - NULL, s->active_disk->bs, s->secondary_disk->bs, + NULL, bs->file->bs, s->secondary_disk->bs, JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, NULL, replication_done, bs, true, errp); break; diff --git a/block/sheepdog.c b/block/sheepdog.c deleted file mode 100644 index a45c73826d4..00000000000 --- a/block/sheepdog.c +++ /dev/null @@ -1,3356 +0,0 @@ -/* - * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "qapi/error.h" -#include "qapi/qapi-visit-sockets.h" -#include "qapi/qapi-visit-block-core.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qobject-input-visitor.h" -#include "qapi/qobject-output-visitor.h" -#include "qemu/uri.h" -#include "qemu/error-report.h" -#include "qemu/main-loop.h" -#include "qemu/module.h" -#include "qemu/option.h" -#include "qemu/sockets.h" -#include "block/block_int.h" -#include "block/qdict.h" -#include "sysemu/block-backend.h" -#include "qemu/bitops.h" -#include "qemu/cutils.h" -#include "trace.h" - -#define SD_PROTO_VER 0x01 - -#define SD_DEFAULT_ADDR "localhost" -#define SD_DEFAULT_PORT 7000 - -#define SD_OP_CREATE_AND_WRITE_OBJ 0x01 -#define SD_OP_READ_OBJ 0x02 -#define SD_OP_WRITE_OBJ 0x03 -/* 0x04 is used internally by Sheepdog */ - -#define SD_OP_NEW_VDI 0x11 -#define SD_OP_LOCK_VDI 0x12 -#define SD_OP_RELEASE_VDI 0x13 -#define SD_OP_GET_VDI_INFO 0x14 -#define SD_OP_READ_VDIS 0x15 -#define SD_OP_FLUSH_VDI 0x16 -#define SD_OP_DEL_VDI 0x17 -#define SD_OP_GET_CLUSTER_DEFAULT 0x18 - -#define SD_FLAG_CMD_WRITE 0x01 -#define SD_FLAG_CMD_COW 0x02 -#define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */ -#define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */ - -#define SD_RES_SUCCESS 0x00 /* Success */ -#define SD_RES_UNKNOWN 0x01 /* Unknown error */ -#define SD_RES_NO_OBJ 0x02 /* No object found */ -#define SD_RES_EIO 0x03 /* I/O error */ -#define SD_RES_VDI_EXIST 0x04 /* Vdi exists already */ -#define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */ -#define SD_RES_SYSTEM_ERROR 0x06 /* System error */ -#define SD_RES_VDI_LOCKED 0x07 /* Vdi is locked */ -#define SD_RES_NO_VDI 0x08 /* No vdi found */ -#define SD_RES_NO_BASE_VDI 0x09 /* No base vdi found */ -#define SD_RES_VDI_READ 0x0A /* Cannot read requested vdi */ -#define SD_RES_VDI_WRITE 0x0B /* Cannot write requested vdi */ -#define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */ -#define SD_RES_BASE_VDI_WRITE 0x0D /* Cannot write base vdi */ -#define SD_RES_NO_TAG 0x0E /* Requested tag is not found */ -#define SD_RES_STARTUP 0x0F /* Sheepdog is on starting up */ -#define SD_RES_VDI_NOT_LOCKED 0x10 /* Vdi is not locked */ -#define SD_RES_SHUTDOWN 0x11 /* Sheepdog is shutting down */ -#define SD_RES_NO_MEM 0x12 /* Cannot allocate memory */ -#define SD_RES_FULL_VDI 0x13 /* we already have the maximum vdis */ -#define SD_RES_VER_MISMATCH 0x14 /* Protocol version mismatch */ -#define SD_RES_NO_SPACE 0x15 /* Server has no room for new objects */ -#define SD_RES_WAIT_FOR_FORMAT 0x16 /* Waiting for a format operation */ -#define SD_RES_WAIT_FOR_JOIN 0x17 /* Waiting for other nodes joining */ -#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */ -#define SD_RES_HALT 0x19 /* Sheepdog is stopped serving IO request */ -#define SD_RES_READONLY 0x1A /* Object is read-only */ - -/* - * Object ID rules - * - * 0 - 19 (20 bits): data object space - * 20 - 31 (12 bits): reserved data object space - * 32 - 55 (24 bits): vdi object space - * 56 - 59 ( 4 bits): reserved vdi object space - * 60 - 63 ( 4 bits): object type identifier space - */ - -#define VDI_SPACE_SHIFT 32 -#define VDI_BIT (UINT64_C(1) << 63) -#define VMSTATE_BIT (UINT64_C(1) << 62) -#define MAX_DATA_OBJS (UINT64_C(1) << 20) -#define MAX_CHILDREN 1024 -#define SD_MAX_VDI_LEN 256 -#define SD_MAX_VDI_TAG_LEN 256 -#define SD_NR_VDIS (1U << 24) -#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) -#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) -#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22 -/* - * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and - * (SD_EC_MAX_STRIP - 1) for parity strips - * - * SD_MAX_COPIES is sum of number of data strips and parity strips. - */ -#define SD_EC_MAX_STRIP 16 -#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1) - -#define SD_INODE_SIZE (sizeof(SheepdogInode)) -#define CURRENT_VDI_ID 0 - -#define LOCK_TYPE_NORMAL 0 -#define LOCK_TYPE_SHARED 1 /* for iSCSI multipath */ - -typedef struct SheepdogReq { - uint8_t proto_ver; - uint8_t opcode; - uint16_t flags; - uint32_t epoch; - uint32_t id; - uint32_t data_length; - uint32_t opcode_specific[8]; -} SheepdogReq; - -typedef struct SheepdogRsp { - uint8_t proto_ver; - uint8_t opcode; - uint16_t flags; - uint32_t epoch; - uint32_t id; - uint32_t data_length; - uint32_t result; - uint32_t opcode_specific[7]; -} SheepdogRsp; - -typedef struct SheepdogObjReq { - uint8_t proto_ver; - uint8_t opcode; - uint16_t flags; - uint32_t epoch; - uint32_t id; - uint32_t data_length; - uint64_t oid; - uint64_t cow_oid; - uint8_t copies; - uint8_t copy_policy; - uint8_t reserved[6]; - uint64_t offset; -} SheepdogObjReq; - -typedef struct SheepdogObjRsp { - uint8_t proto_ver; - uint8_t opcode; - uint16_t flags; - uint32_t epoch; - uint32_t id; - uint32_t data_length; - uint32_t result; - uint8_t copies; - uint8_t copy_policy; - uint8_t reserved[2]; - uint32_t pad[6]; -} SheepdogObjRsp; - -typedef struct SheepdogVdiReq { - uint8_t proto_ver; - uint8_t opcode; - uint16_t flags; - uint32_t epoch; - uint32_t id; - uint32_t data_length; - uint64_t vdi_size; - uint32_t base_vdi_id; - uint8_t copies; - uint8_t copy_policy; - uint8_t store_policy; - uint8_t block_size_shift; - uint32_t snapid; - uint32_t type; - uint32_t pad[2]; -} SheepdogVdiReq; - -typedef struct SheepdogVdiRsp { - uint8_t proto_ver; - uint8_t opcode; - uint16_t flags; - uint32_t epoch; - uint32_t id; - uint32_t data_length; - uint32_t result; - uint32_t rsvd; - uint32_t vdi_id; - uint32_t pad[5]; -} SheepdogVdiRsp; - -typedef struct SheepdogClusterRsp { - uint8_t proto_ver; - uint8_t opcode; - uint16_t flags; - uint32_t epoch; - uint32_t id; - uint32_t data_length; - uint32_t result; - uint8_t nr_copies; - uint8_t copy_policy; - uint8_t block_size_shift; - uint8_t __pad1; - uint32_t __pad2[6]; -} SheepdogClusterRsp; - -typedef struct SheepdogInode { - char name[SD_MAX_VDI_LEN]; - char tag[SD_MAX_VDI_TAG_LEN]; - uint64_t ctime; - uint64_t snap_ctime; - uint64_t vm_clock_nsec; - uint64_t vdi_size; - uint64_t vm_state_size; - uint16_t copy_policy; - uint8_t nr_copies; - uint8_t block_size_shift; - uint32_t snap_id; - uint32_t vdi_id; - uint32_t parent_vdi_id; - uint32_t child_vdi_id[MAX_CHILDREN]; - uint32_t data_vdi_id[MAX_DATA_OBJS]; -} SheepdogInode; - -#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id) - -/* - * 64 bit FNV-1a non-zero initial basis - */ -#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL) - -static void deprecation_warning(void) -{ - static bool warned; - - if (!warned) { - warn_report("the sheepdog block driver is deprecated"); - warned = true; - } -} - -/* - * 64 bit Fowler/Noll/Vo FNV-1a hash code - */ -static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) -{ - unsigned char *bp = buf; - unsigned char *be = bp + len; - while (bp < be) { - hval ^= (uint64_t) *bp++; - hval += (hval << 1) + (hval << 4) + (hval << 5) + - (hval << 7) + (hval << 8) + (hval << 40); - } - return hval; -} - -static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx) -{ - return inode->vdi_id == inode->data_vdi_id[idx]; -} - -static inline bool is_data_obj(uint64_t oid) -{ - return !(VDI_BIT & oid); -} - -static inline uint64_t data_oid_to_idx(uint64_t oid) -{ - return oid & (MAX_DATA_OBJS - 1); -} - -static inline uint32_t oid_to_vid(uint64_t oid) -{ - return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT; -} - -static inline uint64_t vid_to_vdi_oid(uint32_t vid) -{ - return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT); -} - -static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx) -{ - return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; -} - -static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx) -{ - return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; -} - -static inline bool is_snapshot(struct SheepdogInode *inode) -{ - return !!inode->snap_ctime; -} - -static inline size_t count_data_objs(const struct SheepdogInode *inode) -{ - return DIV_ROUND_UP(inode->vdi_size, - (1UL << inode->block_size_shift)); -} - -typedef struct SheepdogAIOCB SheepdogAIOCB; -typedef struct BDRVSheepdogState BDRVSheepdogState; - -typedef struct AIOReq { - SheepdogAIOCB *aiocb; - unsigned int iov_offset; - - uint64_t oid; - uint64_t base_oid; - uint64_t offset; - unsigned int data_len; - uint8_t flags; - uint32_t id; - bool create; - - QLIST_ENTRY(AIOReq) aio_siblings; -} AIOReq; - -enum AIOCBState { - AIOCB_WRITE_UDATA, - AIOCB_READ_UDATA, - AIOCB_FLUSH_CACHE, - AIOCB_DISCARD_OBJ, -}; - -#define AIOCBOverlapping(x, y) \ - (!(x->max_affect_data_idx < y->min_affect_data_idx \ - || y->max_affect_data_idx < x->min_affect_data_idx)) - -struct SheepdogAIOCB { - BDRVSheepdogState *s; - - QEMUIOVector *qiov; - - int64_t sector_num; - int nb_sectors; - - int ret; - enum AIOCBState aiocb_type; - - Coroutine *coroutine; - int nr_pending; - - uint32_t min_affect_data_idx; - uint32_t max_affect_data_idx; - - /* - * The difference between affect_data_idx and dirty_data_idx: - * affect_data_idx represents range of index of all request types. - * dirty_data_idx represents range of index updated by COW requests. - * dirty_data_idx is used for updating an inode object. - */ - uint32_t min_dirty_data_idx; - uint32_t max_dirty_data_idx; - - QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings; -}; - -struct BDRVSheepdogState { - BlockDriverState *bs; - AioContext *aio_context; - - SheepdogInode inode; - - char name[SD_MAX_VDI_LEN]; - bool is_snapshot; - uint32_t cache_flags; - bool discard_supported; - - SocketAddress *addr; - int fd; - - CoMutex lock; - Coroutine *co_send; - Coroutine *co_recv; - - uint32_t aioreq_seq_num; - - /* Every aio request must be linked to either of these queues. */ - QLIST_HEAD(, AIOReq) inflight_aio_head; - QLIST_HEAD(, AIOReq) failed_aio_head; - - CoMutex queue_lock; - CoQueue overlapping_queue; - QLIST_HEAD(, SheepdogAIOCB) inflight_aiocb_head; -}; - -typedef struct BDRVSheepdogReopenState { - int fd; - int cache_flags; -} BDRVSheepdogReopenState; - -static const char *sd_strerror(int err) -{ - int i; - - static const struct { - int err; - const char *desc; - } errors[] = { - {SD_RES_SUCCESS, "Success"}, - {SD_RES_UNKNOWN, "Unknown error"}, - {SD_RES_NO_OBJ, "No object found"}, - {SD_RES_EIO, "I/O error"}, - {SD_RES_VDI_EXIST, "VDI exists already"}, - {SD_RES_INVALID_PARMS, "Invalid parameters"}, - {SD_RES_SYSTEM_ERROR, "System error"}, - {SD_RES_VDI_LOCKED, "VDI is already locked"}, - {SD_RES_NO_VDI, "No vdi found"}, - {SD_RES_NO_BASE_VDI, "No base VDI found"}, - {SD_RES_VDI_READ, "Failed read the requested VDI"}, - {SD_RES_VDI_WRITE, "Failed to write the requested VDI"}, - {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"}, - {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"}, - {SD_RES_NO_TAG, "Failed to find the requested tag"}, - {SD_RES_STARTUP, "The system is still booting"}, - {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"}, - {SD_RES_SHUTDOWN, "The system is shutting down"}, - {SD_RES_NO_MEM, "Out of memory on the server"}, - {SD_RES_FULL_VDI, "We already have the maximum vdis"}, - {SD_RES_VER_MISMATCH, "Protocol version mismatch"}, - {SD_RES_NO_SPACE, "Server has no space for new objects"}, - {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"}, - {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"}, - {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"}, - {SD_RES_HALT, "Sheepdog is stopped serving IO request"}, - {SD_RES_READONLY, "Object is read-only"}, - }; - - for (i = 0; i < ARRAY_SIZE(errors); ++i) { - if (errors[i].err == err) { - return errors[i].desc; - } - } - - return "Invalid error code"; -} - -/* - * Sheepdog I/O handling: - * - * 1. In sd_co_rw_vector, we send the I/O requests to the server and - * link the requests to the inflight_list in the - * BDRVSheepdogState. The function yields while waiting for - * receiving the response. - * - * 2. We receive the response in aio_read_response, the fd handler to - * the sheepdog connection. We switch back to sd_co_readv/sd_writev - * after all the requests belonging to the AIOCB are finished. If - * needed, sd_co_writev will send another requests for the vdi object. - */ - -static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, - uint64_t oid, unsigned int data_len, - uint64_t offset, uint8_t flags, bool create, - uint64_t base_oid, unsigned int iov_offset) -{ - AIOReq *aio_req; - - aio_req = g_malloc(sizeof(*aio_req)); - aio_req->aiocb = acb; - aio_req->iov_offset = iov_offset; - aio_req->oid = oid; - aio_req->base_oid = base_oid; - aio_req->offset = offset; - aio_req->data_len = data_len; - aio_req->flags = flags; - aio_req->id = s->aioreq_seq_num++; - aio_req->create = create; - - acb->nr_pending++; - return aio_req; -} - -static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb) -{ - SheepdogAIOCB *cb; - -retry: - QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { - if (AIOCBOverlapping(acb, cb)) { - qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock); - goto retry; - } - } -} - -static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s, - QEMUIOVector *qiov, int64_t sector_num, int nb_sectors, - int type) -{ - uint32_t object_size; - - object_size = (UINT32_C(1) << s->inode.block_size_shift); - - acb->s = s; - - acb->qiov = qiov; - - acb->sector_num = sector_num; - acb->nb_sectors = nb_sectors; - - acb->coroutine = qemu_coroutine_self(); - acb->ret = 0; - acb->nr_pending = 0; - - acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size; - acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE + - acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size; - - acb->min_dirty_data_idx = UINT32_MAX; - acb->max_dirty_data_idx = 0; - acb->aiocb_type = type; - - if (type == AIOCB_FLUSH_CACHE) { - return; - } - - qemu_co_mutex_lock(&s->queue_lock); - wait_for_overlapping_aiocb(s, acb); - QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings); - qemu_co_mutex_unlock(&s->queue_lock); -} - -static SocketAddress *sd_server_config(QDict *options, Error **errp) -{ - QDict *server = NULL; - Visitor *iv = NULL; - SocketAddress *saddr = NULL; - - qdict_extract_subqdict(options, &server, "server."); - - iv = qobject_input_visitor_new_flat_confused(server, errp); - if (!iv) { - goto done; - } - - if (!visit_type_SocketAddress(iv, NULL, &saddr, errp)) { - goto done; - } - -done: - visit_free(iv); - qobject_unref(server); - return saddr; -} - -/* Return -EIO in case of error, file descriptor on success */ -static int connect_to_sdog(BDRVSheepdogState *s, Error **errp) -{ - int fd; - - fd = socket_connect(s->addr, errp); - - if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) { - int ret = socket_set_nodelay(fd); - if (ret < 0) { - warn_report("can't set TCP_NODELAY: %s", strerror(errno)); - } - } - - if (fd >= 0) { - qemu_set_nonblock(fd); - } else { - fd = -EIO; - } - - return fd; -} - -/* Return 0 on success and -errno in case of error */ -static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, - unsigned int *wlen) -{ - int ret; - - ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); - if (ret != sizeof(*hdr)) { - error_report("failed to send a req, %s", strerror(errno)); - return -errno; - } - - ret = qemu_co_send(sockfd, data, *wlen); - if (ret != *wlen) { - error_report("failed to send a req, %s", strerror(errno)); - return -errno; - } - - return ret; -} - -typedef struct SheepdogReqCo { - int sockfd; - BlockDriverState *bs; - AioContext *aio_context; - SheepdogReq *hdr; - void *data; - unsigned int *wlen; - unsigned int *rlen; - int ret; - bool finished; - Coroutine *co; -} SheepdogReqCo; - -static void restart_co_req(void *opaque) -{ - SheepdogReqCo *srco = opaque; - - aio_co_wake(srco->co); -} - -static coroutine_fn void do_co_req(void *opaque) -{ - int ret; - SheepdogReqCo *srco = opaque; - int sockfd = srco->sockfd; - SheepdogReq *hdr = srco->hdr; - void *data = srco->data; - unsigned int *wlen = srco->wlen; - unsigned int *rlen = srco->rlen; - - srco->co = qemu_coroutine_self(); - aio_set_fd_handler(srco->aio_context, sockfd, false, - NULL, restart_co_req, NULL, srco); - - ret = send_co_req(sockfd, hdr, data, wlen); - if (ret < 0) { - goto out; - } - - aio_set_fd_handler(srco->aio_context, sockfd, false, - restart_co_req, NULL, NULL, srco); - - ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); - if (ret != sizeof(*hdr)) { - error_report("failed to get a rsp, %s", strerror(errno)); - ret = -errno; - goto out; - } - - if (*rlen > hdr->data_length) { - *rlen = hdr->data_length; - } - - if (*rlen) { - ret = qemu_co_recv(sockfd, data, *rlen); - if (ret != *rlen) { - error_report("failed to get the data, %s", strerror(errno)); - ret = -errno; - goto out; - } - } - ret = 0; -out: - /* there is at most one request for this sockfd, so it is safe to - * set each handler to NULL. */ - aio_set_fd_handler(srco->aio_context, sockfd, false, - NULL, NULL, NULL, NULL); - - srco->co = NULL; - srco->ret = ret; - /* Set srco->finished before reading bs->wakeup. */ - qatomic_mb_set(&srco->finished, true); - if (srco->bs) { - bdrv_wakeup(srco->bs); - } -} - -/* - * Send the request to the sheep in a synchronous manner. - * - * Return 0 on success, -errno in case of error. - */ -static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr, - void *data, unsigned int *wlen, unsigned int *rlen) -{ - Coroutine *co; - SheepdogReqCo srco = { - .sockfd = sockfd, - .aio_context = bs ? bdrv_get_aio_context(bs) : qemu_get_aio_context(), - .bs = bs, - .hdr = hdr, - .data = data, - .wlen = wlen, - .rlen = rlen, - .ret = 0, - .finished = false, - }; - - if (qemu_in_coroutine()) { - do_co_req(&srco); - } else { - co = qemu_coroutine_create(do_co_req, &srco); - if (bs) { - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, !srco.finished); - } else { - qemu_coroutine_enter(co); - while (!srco.finished) { - aio_poll(qemu_get_aio_context(), true); - } - } - } - - return srco.ret; -} - -static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, - enum AIOCBState aiocb_type); -static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); -static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); -static int get_sheep_fd(BDRVSheepdogState *s, Error **errp); -static void co_write_request(void *opaque); - -static coroutine_fn void reconnect_to_sdog(void *opaque) -{ - BDRVSheepdogState *s = opaque; - AIOReq *aio_req, *next; - - aio_set_fd_handler(s->aio_context, s->fd, false, NULL, - NULL, NULL, NULL); - close(s->fd); - s->fd = -1; - - /* Wait for outstanding write requests to be completed. */ - while (s->co_send != NULL) { - co_write_request(opaque); - } - - /* Try to reconnect the sheepdog server every one second. */ - while (s->fd < 0) { - Error *local_err = NULL; - s->fd = get_sheep_fd(s, &local_err); - if (s->fd < 0) { - trace_sheepdog_reconnect_to_sdog(); - error_report_err(local_err); - qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, NANOSECONDS_PER_SECOND); - } - }; - - /* - * Now we have to resend all the request in the inflight queue. However, - * resend_aioreq() can yield and newly created requests can be added to the - * inflight queue before the coroutine is resumed. To avoid mixing them, we - * have to move all the inflight requests to the failed queue before - * resend_aioreq() is called. - */ - qemu_co_mutex_lock(&s->queue_lock); - QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) { - QLIST_REMOVE(aio_req, aio_siblings); - QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings); - } - - /* Resend all the failed aio requests. */ - while (!QLIST_EMPTY(&s->failed_aio_head)) { - aio_req = QLIST_FIRST(&s->failed_aio_head); - QLIST_REMOVE(aio_req, aio_siblings); - qemu_co_mutex_unlock(&s->queue_lock); - resend_aioreq(s, aio_req); - qemu_co_mutex_lock(&s->queue_lock); - } - qemu_co_mutex_unlock(&s->queue_lock); -} - -/* - * Receive responses of the I/O requests. - * - * This function is registered as a fd handler, and called from the - * main loop when s->fd is ready for reading responses. - */ -static void coroutine_fn aio_read_response(void *opaque) -{ - SheepdogObjRsp rsp; - BDRVSheepdogState *s = opaque; - int fd = s->fd; - int ret; - AIOReq *aio_req = NULL; - SheepdogAIOCB *acb; - uint64_t idx; - - /* read a header */ - ret = qemu_co_recv(fd, &rsp, sizeof(rsp)); - if (ret != sizeof(rsp)) { - error_report("failed to get the header, %s", strerror(errno)); - goto err; - } - - /* find the right aio_req from the inflight aio list */ - QLIST_FOREACH(aio_req, &s->inflight_aio_head, aio_siblings) { - if (aio_req->id == rsp.id) { - break; - } - } - if (!aio_req) { - error_report("cannot find aio_req %x", rsp.id); - goto err; - } - - acb = aio_req->aiocb; - - switch (acb->aiocb_type) { - case AIOCB_WRITE_UDATA: - if (!is_data_obj(aio_req->oid)) { - break; - } - idx = data_oid_to_idx(aio_req->oid); - - if (aio_req->create) { - /* - * If the object is newly created one, we need to update - * the vdi object (metadata object). min_dirty_data_idx - * and max_dirty_data_idx are changed to include updated - * index between them. - */ - if (rsp.result == SD_RES_SUCCESS) { - s->inode.data_vdi_id[idx] = s->inode.vdi_id; - acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx); - acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx); - } - } - break; - case AIOCB_READ_UDATA: - ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, - aio_req->iov_offset, rsp.data_length); - if (ret != rsp.data_length) { - error_report("failed to get the data, %s", strerror(errno)); - goto err; - } - break; - case AIOCB_FLUSH_CACHE: - if (rsp.result == SD_RES_INVALID_PARMS) { - trace_sheepdog_aio_read_response(); - s->cache_flags = SD_FLAG_CMD_DIRECT; - rsp.result = SD_RES_SUCCESS; - } - break; - case AIOCB_DISCARD_OBJ: - switch (rsp.result) { - case SD_RES_INVALID_PARMS: - error_report("server doesn't support discard command"); - rsp.result = SD_RES_SUCCESS; - s->discard_supported = false; - break; - default: - break; - } - } - - /* No more data for this aio_req (reload_inode below uses its own file - * descriptor handler which doesn't use co_recv). - */ - s->co_recv = NULL; - - qemu_co_mutex_lock(&s->queue_lock); - QLIST_REMOVE(aio_req, aio_siblings); - qemu_co_mutex_unlock(&s->queue_lock); - - switch (rsp.result) { - case SD_RES_SUCCESS: - break; - case SD_RES_READONLY: - if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) { - ret = reload_inode(s, 0, ""); - if (ret < 0) { - goto err; - } - } - if (is_data_obj(aio_req->oid)) { - aio_req->oid = vid_to_data_oid(s->inode.vdi_id, - data_oid_to_idx(aio_req->oid)); - } else { - aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); - } - resend_aioreq(s, aio_req); - return; - default: - acb->ret = -EIO; - error_report("%s", sd_strerror(rsp.result)); - break; - } - - g_free(aio_req); - - if (!--acb->nr_pending) { - /* - * We've finished all requests which belong to the AIOCB, so - * we can switch back to sd_co_readv/writev now. - */ - aio_co_wake(acb->coroutine); - } - - return; - -err: - reconnect_to_sdog(opaque); -} - -static void co_read_response(void *opaque) -{ - BDRVSheepdogState *s = opaque; - - if (!s->co_recv) { - s->co_recv = qemu_coroutine_create(aio_read_response, opaque); - } - - aio_co_enter(s->aio_context, s->co_recv); -} - -static void co_write_request(void *opaque) -{ - BDRVSheepdogState *s = opaque; - - aio_co_wake(s->co_send); -} - -/* - * Return a socket descriptor to read/write objects. - * - * We cannot use this descriptor for other operations because - * the block driver may be on waiting response from the server. - */ -static int get_sheep_fd(BDRVSheepdogState *s, Error **errp) -{ - int fd; - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - return fd; - } - - aio_set_fd_handler(s->aio_context, fd, false, - co_read_response, NULL, NULL, s); - return fd; -} - -/* - * Parse numeric snapshot ID in @str - * If @str can't be parsed as number, return false. - * Else, if the number is zero or too large, set *@snapid to zero and - * return true. - * Else, set *@snapid to the number and return true. - */ -static bool sd_parse_snapid(const char *str, uint32_t *snapid) -{ - unsigned long ul; - int ret; - - ret = qemu_strtoul(str, NULL, 10, &ul); - if (ret == -ERANGE) { - ul = ret = 0; - } - if (ret) { - return false; - } - if (ul > UINT32_MAX) { - ul = 0; - } - - *snapid = ul; - return true; -} - -static bool sd_parse_snapid_or_tag(const char *str, - uint32_t *snapid, char tag[]) -{ - if (!sd_parse_snapid(str, snapid)) { - *snapid = 0; - if (g_strlcpy(tag, str, SD_MAX_VDI_TAG_LEN) >= SD_MAX_VDI_TAG_LEN) { - return false; - } - } else if (!*snapid) { - return false; - } else { - tag[0] = 0; - } - return true; -} - -typedef struct { - const char *path; /* non-null iff transport is tcp */ - const char *host; /* valid when transport is tcp */ - int port; /* valid when transport is tcp */ - char vdi[SD_MAX_VDI_LEN]; - char tag[SD_MAX_VDI_TAG_LEN]; - uint32_t snap_id; - /* Remainder is only for sd_config_done() */ - URI *uri; - QueryParams *qp; -} SheepdogConfig; - -static void sd_config_done(SheepdogConfig *cfg) -{ - if (cfg->qp) { - query_params_free(cfg->qp); - } - uri_free(cfg->uri); -} - -static void sd_parse_uri(SheepdogConfig *cfg, const char *filename, - Error **errp) -{ - Error *err = NULL; - QueryParams *qp = NULL; - bool is_unix; - URI *uri; - - memset(cfg, 0, sizeof(*cfg)); - - cfg->uri = uri = uri_parse(filename); - if (!uri) { - error_setg(&err, "invalid URI '%s'", filename); - goto out; - } - - /* transport */ - if (!g_strcmp0(uri->scheme, "sheepdog")) { - is_unix = false; - } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) { - is_unix = false; - } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) { - is_unix = true; - } else { - error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp'," - " or 'sheepdog+unix'"); - goto out; - } - - if (uri->path == NULL || !strcmp(uri->path, "/")) { - error_setg(&err, "missing file path in URI"); - goto out; - } - if (g_strlcpy(cfg->vdi, uri->path + 1, SD_MAX_VDI_LEN) - >= SD_MAX_VDI_LEN) { - error_setg(&err, "VDI name is too long"); - goto out; - } - - cfg->qp = qp = query_params_parse(uri->query); - - if (is_unix) { - /* sheepdog+unix:///vdiname?socket=path */ - if (uri->server || uri->port) { - error_setg(&err, "URI scheme %s doesn't accept a server address", - uri->scheme); - goto out; - } - if (!qp->n) { - error_setg(&err, - "URI scheme %s requires query parameter 'socket'", - uri->scheme); - goto out; - } - if (qp->n != 1 || strcmp(qp->p[0].name, "socket")) { - error_setg(&err, "unexpected query parameters"); - goto out; - } - cfg->path = qp->p[0].value; - } else { - /* sheepdog[+tcp]://[host:port]/vdiname */ - if (qp->n) { - error_setg(&err, "unexpected query parameters"); - goto out; - } - cfg->host = uri->server; - cfg->port = uri->port; - } - - /* snapshot tag */ - if (uri->fragment) { - if (!sd_parse_snapid_or_tag(uri->fragment, - &cfg->snap_id, cfg->tag)) { - error_setg(&err, "'%s' is not a valid snapshot ID", - uri->fragment); - goto out; - } - } else { - cfg->snap_id = CURRENT_VDI_ID; /* search current vdi */ - } - -out: - if (err) { - error_propagate(errp, err); - sd_config_done(cfg); - } -} - -/* - * Parse a filename (old syntax) - * - * filename must be one of the following formats: - * 1. [vdiname] - * 2. [vdiname]:[snapid] - * 3. [vdiname]:[tag] - * 4. [hostname]:[port]:[vdiname] - * 5. [hostname]:[port]:[vdiname]:[snapid] - * 6. [hostname]:[port]:[vdiname]:[tag] - * - * You can boot from the snapshot images by specifying `snapid` or - * `tag'. - * - * You can run VMs outside the Sheepdog cluster by specifying - * `hostname' and `port' (experimental). - */ -static void parse_vdiname(SheepdogConfig *cfg, const char *filename, - Error **errp) -{ - Error *err = NULL; - char *p, *q, *uri; - const char *host_spec, *vdi_spec; - int nr_sep; - - strstart(filename, "sheepdog:", &filename); - p = q = g_strdup(filename); - - /* count the number of separators */ - nr_sep = 0; - while (*p) { - if (*p == ':') { - nr_sep++; - } - p++; - } - p = q; - - /* use the first two tokens as host_spec. */ - if (nr_sep >= 2) { - host_spec = p; - p = strchr(p, ':'); - p++; - p = strchr(p, ':'); - *p++ = '\0'; - } else { - host_spec = ""; - } - - vdi_spec = p; - - p = strchr(vdi_spec, ':'); - if (p) { - *p++ = '#'; - } - - uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec); - - /* - * FIXME We to escape URI meta-characters, e.g. "x?y=z" - * produces "sheepdog://x?y=z". Because of that ... - */ - sd_parse_uri(cfg, uri, &err); - if (err) { - /* - * ... this can fail, but the error message is misleading. - * Replace it by the traditional useless one until the - * escaping is fixed. - */ - error_free(err); - error_setg(errp, "Can't parse filename"); - } - - g_free(q); - g_free(uri); -} - -static void sd_parse_filename(const char *filename, QDict *options, - Error **errp) -{ - Error *err = NULL; - SheepdogConfig cfg; - char buf[32]; - - if (strstr(filename, "://")) { - sd_parse_uri(&cfg, filename, &err); - } else { - parse_vdiname(&cfg, filename, &err); - } - if (err) { - error_propagate(errp, err); - return; - } - - if (cfg.path) { - qdict_set_default_str(options, "server.path", cfg.path); - qdict_set_default_str(options, "server.type", "unix"); - } else { - qdict_set_default_str(options, "server.type", "inet"); - qdict_set_default_str(options, "server.host", - cfg.host ?: SD_DEFAULT_ADDR); - snprintf(buf, sizeof(buf), "%d", cfg.port ?: SD_DEFAULT_PORT); - qdict_set_default_str(options, "server.port", buf); - } - qdict_set_default_str(options, "vdi", cfg.vdi); - qdict_set_default_str(options, "tag", cfg.tag); - if (cfg.snap_id) { - snprintf(buf, sizeof(buf), "%d", cfg.snap_id); - qdict_set_default_str(options, "snap-id", buf); - } - - sd_config_done(&cfg); -} - -static int find_vdi_name(BDRVSheepdogState *s, const char *filename, - uint32_t snapid, const char *tag, uint32_t *vid, - bool lock, Error **errp) -{ - int ret, fd; - SheepdogVdiReq hdr; - SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; - unsigned int wlen, rlen = 0; - char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN] QEMU_NONSTRING; - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - return fd; - } - - /* This pair of strncpy calls ensures that the buffer is zero-filled, - * which is desirable since we'll soon be sending those bytes, and - * don't want the send_req to read uninitialized data. - */ - strncpy(buf, filename, SD_MAX_VDI_LEN); - strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN); - - memset(&hdr, 0, sizeof(hdr)); - if (lock) { - hdr.opcode = SD_OP_LOCK_VDI; - hdr.type = LOCK_TYPE_NORMAL; - } else { - hdr.opcode = SD_OP_GET_VDI_INFO; - } - wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN; - hdr.proto_ver = SD_PROTO_VER; - hdr.data_length = wlen; - hdr.snapid = snapid; - hdr.flags = SD_FLAG_CMD_WRITE; - - ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen); - if (ret) { - error_setg_errno(errp, -ret, "cannot get vdi info"); - goto out; - } - - if (rsp->result != SD_RES_SUCCESS) { - error_setg(errp, "cannot get vdi info, %s, %s %" PRIu32 " %s", - sd_strerror(rsp->result), filename, snapid, tag); - if (rsp->result == SD_RES_NO_VDI) { - ret = -ENOENT; - } else if (rsp->result == SD_RES_VDI_LOCKED) { - ret = -EBUSY; - } else { - ret = -EIO; - } - goto out; - } - *vid = rsp->vdi_id; - - ret = 0; -out: - closesocket(fd); - return ret; -} - -static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, - struct iovec *iov, int niov, - enum AIOCBState aiocb_type) -{ - int nr_copies = s->inode.nr_copies; - SheepdogObjReq hdr; - unsigned int wlen = 0; - int ret; - uint64_t oid = aio_req->oid; - unsigned int datalen = aio_req->data_len; - uint64_t offset = aio_req->offset; - uint8_t flags = aio_req->flags; - uint64_t old_oid = aio_req->base_oid; - bool create = aio_req->create; - - qemu_co_mutex_lock(&s->queue_lock); - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - qemu_co_mutex_unlock(&s->queue_lock); - - if (!nr_copies) { - error_report("bug"); - } - - memset(&hdr, 0, sizeof(hdr)); - - switch (aiocb_type) { - case AIOCB_FLUSH_CACHE: - hdr.opcode = SD_OP_FLUSH_VDI; - break; - case AIOCB_READ_UDATA: - hdr.opcode = SD_OP_READ_OBJ; - hdr.flags = flags; - break; - case AIOCB_WRITE_UDATA: - if (create) { - hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; - } else { - hdr.opcode = SD_OP_WRITE_OBJ; - } - wlen = datalen; - hdr.flags = SD_FLAG_CMD_WRITE | flags; - break; - case AIOCB_DISCARD_OBJ: - hdr.opcode = SD_OP_WRITE_OBJ; - hdr.flags = SD_FLAG_CMD_WRITE | flags; - s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0; - offset = offsetof(SheepdogInode, - data_vdi_id[data_oid_to_idx(oid)]); - oid = vid_to_vdi_oid(s->inode.vdi_id); - wlen = datalen = sizeof(uint32_t); - break; - } - - if (s->cache_flags) { - hdr.flags |= s->cache_flags; - } - - hdr.oid = oid; - hdr.cow_oid = old_oid; - hdr.copies = s->inode.nr_copies; - - hdr.data_length = datalen; - hdr.offset = offset; - - hdr.id = aio_req->id; - - qemu_co_mutex_lock(&s->lock); - s->co_send = qemu_coroutine_self(); - aio_set_fd_handler(s->aio_context, s->fd, false, - co_read_response, co_write_request, NULL, s); - socket_set_cork(s->fd, 1); - - /* send a header */ - ret = qemu_co_send(s->fd, &hdr, sizeof(hdr)); - if (ret != sizeof(hdr)) { - error_report("failed to send a req, %s", strerror(errno)); - goto out; - } - - if (wlen) { - ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); - if (ret != wlen) { - error_report("failed to send a data, %s", strerror(errno)); - } - } -out: - socket_set_cork(s->fd, 0); - aio_set_fd_handler(s->aio_context, s->fd, false, - co_read_response, NULL, NULL, s); - s->co_send = NULL; - qemu_co_mutex_unlock(&s->lock); -} - -static int read_write_object(int fd, BlockDriverState *bs, char *buf, - uint64_t oid, uint8_t copies, - unsigned int datalen, uint64_t offset, - bool write, bool create, uint32_t cache_flags) -{ - SheepdogObjReq hdr; - SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; - unsigned int wlen, rlen; - int ret; - - memset(&hdr, 0, sizeof(hdr)); - - if (write) { - wlen = datalen; - rlen = 0; - hdr.flags = SD_FLAG_CMD_WRITE; - if (create) { - hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; - } else { - hdr.opcode = SD_OP_WRITE_OBJ; - } - } else { - wlen = 0; - rlen = datalen; - hdr.opcode = SD_OP_READ_OBJ; - } - - hdr.flags |= cache_flags; - - hdr.oid = oid; - hdr.data_length = datalen; - hdr.offset = offset; - hdr.copies = copies; - - ret = do_req(fd, bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen); - if (ret) { - error_report("failed to send a request to the sheep"); - return ret; - } - - switch (rsp->result) { - case SD_RES_SUCCESS: - return 0; - default: - error_report("%s", sd_strerror(rsp->result)); - return -EIO; - } -} - -static int read_object(int fd, BlockDriverState *bs, char *buf, - uint64_t oid, uint8_t copies, - unsigned int datalen, uint64_t offset, - uint32_t cache_flags) -{ - return read_write_object(fd, bs, buf, oid, copies, - datalen, offset, false, - false, cache_flags); -} - -static int write_object(int fd, BlockDriverState *bs, char *buf, - uint64_t oid, uint8_t copies, - unsigned int datalen, uint64_t offset, bool create, - uint32_t cache_flags) -{ - return read_write_object(fd, bs, buf, oid, copies, - datalen, offset, true, - create, cache_flags); -} - -/* update inode with the latest state */ -static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) -{ - Error *local_err = NULL; - SheepdogInode *inode; - int ret = 0, fd; - uint32_t vid = 0; - - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - return -EIO; - } - - inode = g_malloc(SD_INODE_HEADER_SIZE); - - ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err); - if (ret) { - error_report_err(local_err); - goto out; - } - - ret = read_object(fd, s->bs, (char *)inode, vid_to_vdi_oid(vid), - s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0, - s->cache_flags); - if (ret < 0) { - goto out; - } - - if (inode->vdi_id != s->inode.vdi_id) { - memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE); - } - -out: - g_free(inode); - closesocket(fd); - - return ret; -} - -static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) -{ - SheepdogAIOCB *acb = aio_req->aiocb; - - aio_req->create = false; - - /* check whether this request becomes a CoW one */ - if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { - int idx = data_oid_to_idx(aio_req->oid); - - if (is_data_obj_writable(&s->inode, idx)) { - goto out; - } - - if (s->inode.data_vdi_id[idx]) { - aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); - aio_req->flags |= SD_FLAG_CMD_COW; - } - aio_req->create = true; - } -out: - if (is_data_obj(aio_req->oid)) { - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - acb->aiocb_type); - } else { - struct iovec iov; - iov.iov_base = &s->inode; - iov.iov_len = sizeof(s->inode); - add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); - } -} - -static void sd_detach_aio_context(BlockDriverState *bs) -{ - BDRVSheepdogState *s = bs->opaque; - - aio_set_fd_handler(s->aio_context, s->fd, false, NULL, - NULL, NULL, NULL); -} - -static void sd_attach_aio_context(BlockDriverState *bs, - AioContext *new_context) -{ - BDRVSheepdogState *s = bs->opaque; - - s->aio_context = new_context; - aio_set_fd_handler(new_context, s->fd, false, - co_read_response, NULL, NULL, s); -} - -static QemuOptsList runtime_opts = { - .name = "sheepdog", - .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), - .desc = { - { - .name = "vdi", - .type = QEMU_OPT_STRING, - }, - { - .name = "snap-id", - .type = QEMU_OPT_NUMBER, - }, - { - .name = "tag", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -static int sd_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) -{ - int ret, fd; - uint32_t vid = 0; - BDRVSheepdogState *s = bs->opaque; - const char *vdi, *snap_id_str, *tag; - uint64_t snap_id; - char *buf = NULL; - QemuOpts *opts; - - deprecation_warning(); - - s->bs = bs; - s->aio_context = bdrv_get_aio_context(bs); - - opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); - if (!qemu_opts_absorb_qdict(opts, options, errp)) { - ret = -EINVAL; - goto err_no_fd; - } - - s->addr = sd_server_config(options, errp); - if (!s->addr) { - ret = -EINVAL; - goto err_no_fd; - } - - vdi = qemu_opt_get(opts, "vdi"); - snap_id_str = qemu_opt_get(opts, "snap-id"); - snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID); - tag = qemu_opt_get(opts, "tag"); - - if (!vdi) { - error_setg(errp, "parameter 'vdi' is missing"); - ret = -EINVAL; - goto err_no_fd; - } - if (strlen(vdi) >= SD_MAX_VDI_LEN) { - error_setg(errp, "value of parameter 'vdi' is too long"); - ret = -EINVAL; - goto err_no_fd; - } - - if (snap_id > UINT32_MAX) { - snap_id = 0; - } - if (snap_id_str && !snap_id) { - error_setg(errp, "'snap-id=%s' is not a valid snapshot ID", - snap_id_str); - ret = -EINVAL; - goto err_no_fd; - } - - if (!tag) { - tag = ""; - } - if (strlen(tag) >= SD_MAX_VDI_TAG_LEN) { - error_setg(errp, "value of parameter 'tag' is too long"); - ret = -EINVAL; - goto err_no_fd; - } - - QLIST_INIT(&s->inflight_aio_head); - QLIST_INIT(&s->failed_aio_head); - QLIST_INIT(&s->inflight_aiocb_head); - - s->fd = get_sheep_fd(s, errp); - if (s->fd < 0) { - ret = s->fd; - goto err_no_fd; - } - - ret = find_vdi_name(s, vdi, (uint32_t)snap_id, tag, &vid, true, errp); - if (ret) { - goto err; - } - - /* - * QEMU block layer emulates writethrough cache as 'writeback + flush', so - * we always set SD_FLAG_CMD_CACHE (writeback cache) as default. - */ - s->cache_flags = SD_FLAG_CMD_CACHE; - if (flags & BDRV_O_NOCACHE) { - s->cache_flags = SD_FLAG_CMD_DIRECT; - } - s->discard_supported = true; - - if (snap_id || tag[0]) { - trace_sheepdog_open(vid); - s->is_snapshot = true; - } - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - ret = fd; - goto err; - } - - buf = g_malloc(SD_INODE_SIZE); - ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid), - 0, SD_INODE_SIZE, 0, s->cache_flags); - - closesocket(fd); - - if (ret) { - error_setg(errp, "Can't read snapshot inode"); - goto err; - } - - memcpy(&s->inode, buf, sizeof(s->inode)); - - bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE; - bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; - pstrcpy(s->name, sizeof(s->name), vdi); - qemu_co_mutex_init(&s->lock); - qemu_co_mutex_init(&s->queue_lock); - qemu_co_queue_init(&s->overlapping_queue); - qemu_opts_del(opts); - g_free(buf); - return 0; - -err: - aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, - false, NULL, NULL, NULL, NULL); - closesocket(s->fd); -err_no_fd: - qemu_opts_del(opts); - g_free(buf); - return ret; -} - -static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, - Error **errp) -{ - BDRVSheepdogState *s = state->bs->opaque; - BDRVSheepdogReopenState *re_s; - int ret = 0; - - re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1); - - re_s->cache_flags = SD_FLAG_CMD_CACHE; - if (state->flags & BDRV_O_NOCACHE) { - re_s->cache_flags = SD_FLAG_CMD_DIRECT; - } - - re_s->fd = get_sheep_fd(s, errp); - if (re_s->fd < 0) { - ret = re_s->fd; - return ret; - } - - return ret; -} - -static void sd_reopen_commit(BDRVReopenState *state) -{ - BDRVSheepdogReopenState *re_s = state->opaque; - BDRVSheepdogState *s = state->bs->opaque; - - if (s->fd) { - aio_set_fd_handler(s->aio_context, s->fd, false, - NULL, NULL, NULL, NULL); - closesocket(s->fd); - } - - s->fd = re_s->fd; - s->cache_flags = re_s->cache_flags; - - g_free(state->opaque); - state->opaque = NULL; - - return; -} - -static void sd_reopen_abort(BDRVReopenState *state) -{ - BDRVSheepdogReopenState *re_s = state->opaque; - BDRVSheepdogState *s = state->bs->opaque; - - if (re_s == NULL) { - return; - } - - if (re_s->fd) { - aio_set_fd_handler(s->aio_context, re_s->fd, false, - NULL, NULL, NULL, NULL); - closesocket(re_s->fd); - } - - g_free(state->opaque); - state->opaque = NULL; - - return; -} - -static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, - Error **errp) -{ - SheepdogVdiReq hdr; - SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; - int fd, ret; - unsigned int wlen, rlen = 0; - char buf[SD_MAX_VDI_LEN]; - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - return fd; - } - - /* FIXME: would it be better to fail (e.g., return -EIO) when filename - * does not fit in buf? For now, just truncate and avoid buffer overrun. - */ - memset(buf, 0, sizeof(buf)); - pstrcpy(buf, sizeof(buf), s->name); - - memset(&hdr, 0, sizeof(hdr)); - hdr.opcode = SD_OP_NEW_VDI; - hdr.base_vdi_id = s->inode.vdi_id; - - wlen = SD_MAX_VDI_LEN; - - hdr.flags = SD_FLAG_CMD_WRITE; - hdr.snapid = snapshot; - - hdr.data_length = wlen; - hdr.vdi_size = s->inode.vdi_size; - hdr.copy_policy = s->inode.copy_policy; - hdr.copies = s->inode.nr_copies; - hdr.block_size_shift = s->inode.block_size_shift; - - ret = do_req(fd, NULL, (SheepdogReq *)&hdr, buf, &wlen, &rlen); - - closesocket(fd); - - if (ret) { - error_setg_errno(errp, -ret, "create failed"); - return ret; - } - - if (rsp->result != SD_RES_SUCCESS) { - error_setg(errp, "%s, %s", sd_strerror(rsp->result), s->inode.name); - return -EIO; - } - - if (vdi_id) { - *vdi_id = rsp->vdi_id; - } - - return 0; -} - -static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size, - Error **errp) -{ - BlockBackend *blk = NULL; - BDRVSheepdogState *base = bs->opaque; - unsigned long buf_size; - uint32_t idx, max_idx; - uint32_t object_size; - void *buf = NULL; - int ret; - - blk = blk_new_with_bs(bs, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, - BLK_PERM_ALL, errp); - - if (!blk) { - ret = -EPERM; - goto out_with_err_set; - } - - blk_set_allow_write_beyond_eof(blk, true); - - object_size = (UINT32_C(1) << base->inode.block_size_shift); - buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); - buf = g_malloc0(buf_size); - - max_idx = DIV_ROUND_UP(new_size, buf_size); - - for (idx = old_size / buf_size; idx < max_idx; idx++) { - /* - * The created image can be a cloned image, so we need to read - * a data from the source image. - */ - ret = blk_pread(blk, idx * buf_size, buf, buf_size); - if (ret < 0) { - goto out; - } - ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0); - if (ret < 0) { - goto out; - } - } - - ret = 0; -out: - if (ret < 0) { - error_setg_errno(errp, -ret, "Can't pre-allocate"); - } -out_with_err_set: - blk_unref(blk); - g_free(buf); - - return ret; -} - -static int sd_create_prealloc(BlockdevOptionsSheepdog *location, int64_t size, - Error **errp) -{ - BlockDriverState *bs; - Visitor *v; - QObject *obj = NULL; - QDict *qdict; - int ret; - - v = qobject_output_visitor_new(&obj); - visit_type_BlockdevOptionsSheepdog(v, NULL, &location, &error_abort); - visit_free(v); - - qdict = qobject_to(QDict, obj); - qdict_flatten(qdict); - - qdict_put_str(qdict, "driver", "sheepdog"); - - bs = bdrv_open(NULL, NULL, qdict, BDRV_O_PROTOCOL | BDRV_O_RDWR, errp); - if (bs == NULL) { - ret = -EIO; - goto fail; - } - - ret = sd_prealloc(bs, 0, size, errp); -fail: - bdrv_unref(bs); - qobject_unref(qdict); - return ret; -} - -static int parse_redundancy(BDRVSheepdogState *s, SheepdogRedundancy *opt) -{ - struct SheepdogInode *inode = &s->inode; - - switch (opt->type) { - case SHEEPDOG_REDUNDANCY_TYPE_FULL: - if (opt->u.full.copies > SD_MAX_COPIES || opt->u.full.copies < 1) { - return -EINVAL; - } - inode->copy_policy = 0; - inode->nr_copies = opt->u.full.copies; - return 0; - - case SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED: - { - int64_t copy = opt->u.erasure_coded.data_strips; - int64_t parity = opt->u.erasure_coded.parity_strips; - - if (copy != 2 && copy != 4 && copy != 8 && copy != 16) { - return -EINVAL; - } - - if (parity >= SD_EC_MAX_STRIP || parity < 1) { - return -EINVAL; - } - - /* - * 4 bits for parity and 4 bits for data. - * We have to compress upper data bits because it can't represent 16 - */ - inode->copy_policy = ((copy / 2) << 4) + parity; - inode->nr_copies = copy + parity; - return 0; - } - - default: - g_assert_not_reached(); - } - - return -EINVAL; -} - -/* - * Sheepdog support two kinds of redundancy, full replication and erasure - * coding. - * - * # create a fully replicated vdi with x copies - * -o redundancy=x (1 <= x <= SD_MAX_COPIES) - * - * # create a erasure coded vdi with x data strips and y parity strips - * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP) - */ -static SheepdogRedundancy *parse_redundancy_str(const char *opt) -{ - SheepdogRedundancy *redundancy; - const char *n1, *n2; - long copy, parity; - char p[10]; - int ret; - - pstrcpy(p, sizeof(p), opt); - n1 = strtok(p, ":"); - n2 = strtok(NULL, ":"); - - if (!n1) { - return NULL; - } - - ret = qemu_strtol(n1, NULL, 10, ©); - if (ret < 0) { - return NULL; - } - - redundancy = g_new0(SheepdogRedundancy, 1); - if (!n2) { - *redundancy = (SheepdogRedundancy) { - .type = SHEEPDOG_REDUNDANCY_TYPE_FULL, - .u.full.copies = copy, - }; - } else { - ret = qemu_strtol(n2, NULL, 10, &parity); - if (ret < 0) { - g_free(redundancy); - return NULL; - } - - *redundancy = (SheepdogRedundancy) { - .type = SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED, - .u.erasure_coded = { - .data_strips = copy, - .parity_strips = parity, - }, - }; - } - - return redundancy; -} - -static int parse_block_size_shift(BDRVSheepdogState *s, - BlockdevCreateOptionsSheepdog *opts) -{ - struct SheepdogInode *inode = &s->inode; - uint64_t object_size; - int obj_order; - - if (opts->has_object_size) { - object_size = opts->object_size; - - if ((object_size - 1) & object_size) { /* not a power of 2? */ - return -EINVAL; - } - obj_order = ctz32(object_size); - if (obj_order < 20 || obj_order > 31) { - return -EINVAL; - } - inode->block_size_shift = (uint8_t)obj_order; - } - - return 0; -} - -static int sd_co_create(BlockdevCreateOptions *options, Error **errp) -{ - BlockdevCreateOptionsSheepdog *opts = &options->u.sheepdog; - int ret = 0; - uint32_t vid = 0; - char *backing_file = NULL; - char *buf = NULL; - BDRVSheepdogState *s; - uint64_t max_vdi_size; - bool prealloc = false; - - assert(options->driver == BLOCKDEV_DRIVER_SHEEPDOG); - - deprecation_warning(); - - s = g_new0(BDRVSheepdogState, 1); - - /* Steal SocketAddress from QAPI, set NULL to prevent double free */ - s->addr = opts->location->server; - opts->location->server = NULL; - - if (strlen(opts->location->vdi) >= sizeof(s->name)) { - error_setg(errp, "'vdi' string too long"); - ret = -EINVAL; - goto out; - } - pstrcpy(s->name, sizeof(s->name), opts->location->vdi); - - s->inode.vdi_size = opts->size; - backing_file = opts->backing_file; - - if (!opts->has_preallocation) { - opts->preallocation = PREALLOC_MODE_OFF; - } - switch (opts->preallocation) { - case PREALLOC_MODE_OFF: - prealloc = false; - break; - case PREALLOC_MODE_FULL: - prealloc = true; - break; - default: - error_setg(errp, "Preallocation mode not supported for Sheepdog"); - ret = -EINVAL; - goto out; - } - - if (opts->has_redundancy) { - ret = parse_redundancy(s, opts->redundancy); - if (ret < 0) { - error_setg(errp, "Invalid redundancy mode"); - goto out; - } - } - ret = parse_block_size_shift(s, opts); - if (ret < 0) { - error_setg(errp, "Invalid object_size." - " obect_size needs to be power of 2" - " and be limited from 2^20 to 2^31"); - goto out; - } - - if (opts->has_backing_file) { - BlockBackend *blk; - BDRVSheepdogState *base; - BlockDriver *drv; - - /* Currently, only Sheepdog backing image is supported. */ - drv = bdrv_find_protocol(opts->backing_file, true, NULL); - if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) { - error_setg(errp, "backing_file must be a sheepdog image"); - ret = -EINVAL; - goto out; - } - - blk = blk_new_open(opts->backing_file, NULL, NULL, - BDRV_O_PROTOCOL, errp); - if (blk == NULL) { - ret = -EIO; - goto out; - } - - base = blk_bs(blk)->opaque; - - if (!is_snapshot(&base->inode)) { - error_setg(errp, "cannot clone from a non snapshot vdi"); - blk_unref(blk); - ret = -EINVAL; - goto out; - } - s->inode.vdi_id = base->inode.vdi_id; - blk_unref(blk); - } - - s->aio_context = qemu_get_aio_context(); - - /* if block_size_shift is not specified, get cluster default value */ - if (s->inode.block_size_shift == 0) { - SheepdogVdiReq hdr; - SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr; - int fd; - unsigned int wlen = 0, rlen = 0; - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - ret = fd; - goto out; - } - - memset(&hdr, 0, sizeof(hdr)); - hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT; - hdr.proto_ver = SD_PROTO_VER; - - ret = do_req(fd, NULL, (SheepdogReq *)&hdr, - NULL, &wlen, &rlen); - closesocket(fd); - if (ret) { - error_setg_errno(errp, -ret, "failed to get cluster default"); - goto out; - } - if (rsp->result == SD_RES_SUCCESS) { - s->inode.block_size_shift = rsp->block_size_shift; - } else { - s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT; - } - } - - max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; - - if (s->inode.vdi_size > max_vdi_size) { - error_setg(errp, "An image is too large." - " The maximum image size is %"PRIu64 "GB", - max_vdi_size / 1024 / 1024 / 1024); - ret = -EINVAL; - goto out; - } - - ret = do_sd_create(s, &vid, 0, errp); - if (ret) { - goto out; - } - - if (prealloc) { - ret = sd_create_prealloc(opts->location, opts->size, errp); - } -out: - g_free(backing_file); - g_free(buf); - g_free(s->addr); - g_free(s); - return ret; -} - -static int coroutine_fn sd_co_create_opts(BlockDriver *drv, - const char *filename, - QemuOpts *opts, - Error **errp) -{ - BlockdevCreateOptions *create_options = NULL; - QDict *qdict = NULL, *location_qdict; - Visitor *v; - char *redundancy = NULL; - Error *local_err = NULL; - int ret; - char *backing_fmt = NULL; - - redundancy = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY); - backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); - - if (backing_fmt && strcmp(backing_fmt, "sheepdog") != 0) { - error_setg(errp, "backing_file must be a sheepdog image"); - ret = -EINVAL; - goto fail; - } - - qdict = qemu_opts_to_qdict(opts, NULL); - qdict_put_str(qdict, "driver", "sheepdog"); - - location_qdict = qdict_new(); - qdict_put(qdict, "location", location_qdict); - - sd_parse_filename(filename, location_qdict, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto fail; - } - - qdict_flatten(qdict); - - /* Change legacy command line options into QMP ones */ - static const QDictRenames opt_renames[] = { - { BLOCK_OPT_BACKING_FILE, "backing-file" }, - { BLOCK_OPT_OBJECT_SIZE, "object-size" }, - { NULL, NULL }, - }; - - if (!qdict_rename_keys(qdict, opt_renames, errp)) { - ret = -EINVAL; - goto fail; - } - - /* Get the QAPI object */ - v = qobject_input_visitor_new_flat_confused(qdict, errp); - if (!v) { - ret = -EINVAL; - goto fail; - } - - visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); - visit_free(v); - if (!create_options) { - ret = -EINVAL; - goto fail; - } - - assert(create_options->driver == BLOCKDEV_DRIVER_SHEEPDOG); - create_options->u.sheepdog.size = - ROUND_UP(create_options->u.sheepdog.size, BDRV_SECTOR_SIZE); - - if (redundancy) { - create_options->u.sheepdog.has_redundancy = true; - create_options->u.sheepdog.redundancy = - parse_redundancy_str(redundancy); - if (create_options->u.sheepdog.redundancy == NULL) { - error_setg(errp, "Invalid redundancy mode"); - ret = -EINVAL; - goto fail; - } - } - - ret = sd_co_create(create_options, errp); -fail: - qapi_free_BlockdevCreateOptions(create_options); - qobject_unref(qdict); - g_free(redundancy); - g_free(backing_fmt); - return ret; -} - -static void sd_close(BlockDriverState *bs) -{ - Error *local_err = NULL; - BDRVSheepdogState *s = bs->opaque; - SheepdogVdiReq hdr; - SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; - unsigned int wlen, rlen = 0; - int fd, ret; - - trace_sheepdog_close(s->name); - - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - return; - } - - memset(&hdr, 0, sizeof(hdr)); - - hdr.opcode = SD_OP_RELEASE_VDI; - hdr.type = LOCK_TYPE_NORMAL; - hdr.base_vdi_id = s->inode.vdi_id; - wlen = strlen(s->name) + 1; - hdr.data_length = wlen; - hdr.flags = SD_FLAG_CMD_WRITE; - - ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, - s->name, &wlen, &rlen); - - closesocket(fd); - - if (!ret && rsp->result != SD_RES_SUCCESS && - rsp->result != SD_RES_VDI_NOT_LOCKED) { - error_report("%s, %s", sd_strerror(rsp->result), s->name); - } - - aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, - false, NULL, NULL, NULL, NULL); - closesocket(s->fd); - qapi_free_SocketAddress(s->addr); -} - -static int64_t sd_getlength(BlockDriverState *bs) -{ - BDRVSheepdogState *s = bs->opaque; - - return s->inode.vdi_size; -} - -static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, - BdrvRequestFlags flags, Error **errp) -{ - BDRVSheepdogState *s = bs->opaque; - int ret, fd; - unsigned int datalen; - uint64_t max_vdi_size; - int64_t old_size = s->inode.vdi_size; - - if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) { - error_setg(errp, "Unsupported preallocation mode '%s'", - PreallocMode_str(prealloc)); - return -ENOTSUP; - } - - max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; - if (offset < old_size) { - error_setg(errp, "shrinking is not supported"); - return -EINVAL; - } else if (offset > max_vdi_size) { - error_setg(errp, "too big image size"); - return -EINVAL; - } - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - return fd; - } - - /* we don't need to update entire object */ - datalen = SD_INODE_HEADER_SIZE; - s->inode.vdi_size = offset; - ret = write_object(fd, s->bs, (char *)&s->inode, - vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, - datalen, 0, false, s->cache_flags); - close(fd); - - if (ret < 0) { - error_setg_errno(errp, -ret, "failed to update an inode"); - return ret; - } - - if (prealloc == PREALLOC_MODE_FULL) { - ret = sd_prealloc(bs, old_size, offset, errp); - if (ret < 0) { - return ret; - } - } - - return 0; -} - -/* - * This function is called after writing data objects. If we need to - * update metadata, this sends a write request to the vdi object. - */ -static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) -{ - BDRVSheepdogState *s = acb->s; - struct iovec iov; - AIOReq *aio_req; - uint32_t offset, data_len, mn, mx; - - mn = acb->min_dirty_data_idx; - mx = acb->max_dirty_data_idx; - if (mn <= mx) { - /* we need to update the vdi object. */ - ++acb->nr_pending; - offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + - mn * sizeof(s->inode.data_vdi_id[0]); - data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); - - acb->min_dirty_data_idx = UINT32_MAX; - acb->max_dirty_data_idx = 0; - - iov.iov_base = &s->inode; - iov.iov_len = sizeof(s->inode); - aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), - data_len, offset, 0, false, 0, offset); - add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); - if (--acb->nr_pending) { - qemu_coroutine_yield(); - } - } -} - -/* Delete current working VDI on the snapshot chain */ -static bool sd_delete(BDRVSheepdogState *s) -{ - Error *local_err = NULL; - unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0; - SheepdogVdiReq hdr = { - .opcode = SD_OP_DEL_VDI, - .base_vdi_id = s->inode.vdi_id, - .data_length = wlen, - .flags = SD_FLAG_CMD_WRITE, - }; - SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; - int fd, ret; - - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - return false; - } - - ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, - s->name, &wlen, &rlen); - closesocket(fd); - if (ret) { - return false; - } - switch (rsp->result) { - case SD_RES_NO_VDI: - error_report("%s was already deleted", s->name); - /* fall through */ - case SD_RES_SUCCESS: - break; - default: - error_report("%s, %s", sd_strerror(rsp->result), s->name); - return false; - } - - return true; -} - -/* - * Create a writable VDI from a snapshot - */ -static int sd_create_branch(BDRVSheepdogState *s) -{ - Error *local_err = NULL; - int ret, fd; - uint32_t vid; - char *buf; - bool deleted; - - trace_sheepdog_create_branch_snapshot(s->inode.vdi_id); - - buf = g_malloc(SD_INODE_SIZE); - - /* - * Even If deletion fails, we will just create extra snapshot based on - * the working VDI which was supposed to be deleted. So no need to - * false bail out. - */ - deleted = sd_delete(s); - ret = do_sd_create(s, &vid, !deleted, &local_err); - if (ret) { - error_report_err(local_err); - goto out; - } - - trace_sheepdog_create_branch_created(vid); - - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - ret = fd; - goto out; - } - - ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid), - s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags); - - closesocket(fd); - - if (ret < 0) { - goto out; - } - - memcpy(&s->inode, buf, sizeof(s->inode)); - - s->is_snapshot = false; - ret = 0; - trace_sheepdog_create_branch_new(s->inode.vdi_id); - -out: - g_free(buf); - - return ret; -} - -/* - * Send I/O requests to the server. - * - * This function sends requests to the server, links the requests to - * the inflight_list in BDRVSheepdogState, and exits without - * waiting the response. The responses are received in the - * `aio_read_response' function which is called from the main loop as - * a fd handler. - * - * Returns 1 when we need to wait a response, 0 when there is no sent - * request and -errno in error cases. - */ -static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb) -{ - int ret = 0; - unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE; - unsigned long idx; - uint32_t object_size; - uint64_t oid; - uint64_t offset; - BDRVSheepdogState *s = acb->s; - SheepdogInode *inode = &s->inode; - AIOReq *aio_req; - - if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) { - /* - * In the case we open the snapshot VDI, Sheepdog creates the - * writable VDI when we do a write operation first. - */ - ret = sd_create_branch(s); - if (ret) { - acb->ret = -EIO; - return; - } - } - - object_size = (UINT32_C(1) << inode->block_size_shift); - idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size; - offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size; - - /* - * Make sure we don't free the aiocb before we are done with all requests. - * This additional reference is dropped at the end of this function. - */ - acb->nr_pending++; - - while (done != total) { - uint8_t flags = 0; - uint64_t old_oid = 0; - bool create = false; - - oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); - - len = MIN(total - done, object_size - offset); - - switch (acb->aiocb_type) { - case AIOCB_READ_UDATA: - if (!inode->data_vdi_id[idx]) { - qemu_iovec_memset(acb->qiov, done, 0, len); - goto done; - } - break; - case AIOCB_WRITE_UDATA: - if (!inode->data_vdi_id[idx]) { - create = true; - } else if (!is_data_obj_writable(inode, idx)) { - /* Copy-On-Write */ - create = true; - old_oid = oid; - flags = SD_FLAG_CMD_COW; - } - break; - case AIOCB_DISCARD_OBJ: - /* - * We discard the object only when the whole object is - * 1) allocated 2) trimmed. Otherwise, simply skip it. - */ - if (len != object_size || inode->data_vdi_id[idx] == 0) { - goto done; - } - break; - default: - break; - } - - if (create) { - trace_sheepdog_co_rw_vector_update(inode->vdi_id, oid, - vid_to_data_oid(inode->data_vdi_id[idx], idx), - idx); - oid = vid_to_data_oid(inode->vdi_id, idx); - trace_sheepdog_co_rw_vector_new(oid); - } - - aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create, - old_oid, - acb->aiocb_type == AIOCB_DISCARD_OBJ ? - 0 : done); - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - acb->aiocb_type); - done: - offset = 0; - idx++; - done += len; - } - if (--acb->nr_pending) { - qemu_coroutine_yield(); - } -} - -static void sd_aio_complete(SheepdogAIOCB *acb) -{ - BDRVSheepdogState *s; - if (acb->aiocb_type == AIOCB_FLUSH_CACHE) { - return; - } - - s = acb->s; - qemu_co_mutex_lock(&s->queue_lock); - QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overlapping_queue); - qemu_co_mutex_unlock(&s->queue_lock); -} - -static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int flags) -{ - SheepdogAIOCB acb; - int ret; - int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE; - BDRVSheepdogState *s = bs->opaque; - - assert(!flags); - if (offset > s->inode.vdi_size) { - ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) { - return ret; - } - } - - sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA); - sd_co_rw_vector(&acb); - sd_write_done(&acb); - sd_aio_complete(&acb); - - return acb.ret; -} - -static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - SheepdogAIOCB acb; - BDRVSheepdogState *s = bs->opaque; - - sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA); - sd_co_rw_vector(&acb); - sd_aio_complete(&acb); - - return acb.ret; -} - -static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) -{ - BDRVSheepdogState *s = bs->opaque; - SheepdogAIOCB acb; - AIOReq *aio_req; - - if (s->cache_flags != SD_FLAG_CMD_CACHE) { - return 0; - } - - sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE); - - acb.nr_pending++; - aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id), - 0, 0, 0, false, 0, 0); - add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type); - - if (--acb.nr_pending) { - qemu_coroutine_yield(); - } - - sd_aio_complete(&acb); - return acb.ret; -} - -static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) -{ - Error *local_err = NULL; - BDRVSheepdogState *s = bs->opaque; - int ret, fd; - uint32_t new_vid; - SheepdogInode *inode; - unsigned int datalen; - - trace_sheepdog_snapshot_create_info(sn_info->name, sn_info->id_str, s->name, - sn_info->vm_state_size, s->is_snapshot); - - if (s->is_snapshot) { - error_report("You can't create a snapshot of a snapshot VDI, " - "%s (%" PRIu32 ").", s->name, s->inode.vdi_id); - - return -EINVAL; - } - - trace_sheepdog_snapshot_create(sn_info->name, sn_info->id_str); - - s->inode.vm_state_size = sn_info->vm_state_size; - s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; - /* It appears that inode.tag does not require a NUL terminator, - * which means this use of strncpy is ok. - */ - strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); - /* we don't need to update entire object */ - datalen = SD_INODE_HEADER_SIZE; - inode = g_malloc(datalen); - - /* refresh inode. */ - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - ret = fd; - goto cleanup; - } - - ret = write_object(fd, s->bs, (char *)&s->inode, - vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, - datalen, 0, false, s->cache_flags); - if (ret < 0) { - error_report("failed to write snapshot's inode."); - goto cleanup; - } - - ret = do_sd_create(s, &new_vid, 1, &local_err); - if (ret < 0) { - error_reportf_err(local_err, - "failed to create inode for snapshot: "); - goto cleanup; - } - - ret = read_object(fd, s->bs, (char *)inode, - vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0, - s->cache_flags); - - if (ret < 0) { - error_report("failed to read new inode info. %s", strerror(errno)); - goto cleanup; - } - - memcpy(&s->inode, inode, datalen); - trace_sheepdog_snapshot_create_inode(s->inode.name, s->inode.snap_id, - s->inode.vdi_id); - -cleanup: - g_free(inode); - closesocket(fd); - return ret; -} - -/* - * We implement rollback(loadvm) operation to the specified snapshot by - * 1) switch to the snapshot - * 2) rely on sd_create_branch to delete working VDI and - * 3) create a new working VDI based on the specified snapshot - */ -static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) -{ - BDRVSheepdogState *s = bs->opaque; - BDRVSheepdogState *old_s; - char tag[SD_MAX_VDI_TAG_LEN]; - uint32_t snapid = 0; - int ret; - - if (!sd_parse_snapid_or_tag(snapshot_id, &snapid, tag)) { - return -EINVAL; - } - - old_s = g_new(BDRVSheepdogState, 1); - - memcpy(old_s, s, sizeof(BDRVSheepdogState)); - - ret = reload_inode(s, snapid, tag); - if (ret) { - goto out; - } - - ret = sd_create_branch(s); - if (ret) { - goto out; - } - - g_free(old_s); - - return 0; -out: - /* recover bdrv_sd_state */ - memcpy(s, old_s, sizeof(BDRVSheepdogState)); - g_free(old_s); - - error_report("failed to open. recover old bdrv_sd_state."); - - return ret; -} - -#define NR_BATCHED_DISCARD 128 - -static int remove_objects(BDRVSheepdogState *s, Error **errp) -{ - int fd, i = 0, nr_objs = 0; - int ret; - SheepdogInode *inode = &s->inode; - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - return fd; - } - - nr_objs = count_data_objs(inode); - while (i < nr_objs) { - int start_idx, nr_filled_idx; - - while (i < nr_objs && !inode->data_vdi_id[i]) { - i++; - } - start_idx = i; - - nr_filled_idx = 0; - while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) { - if (inode->data_vdi_id[i]) { - inode->data_vdi_id[i] = 0; - nr_filled_idx++; - } - - i++; - } - - ret = write_object(fd, s->bs, - (char *)&inode->data_vdi_id[start_idx], - vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies, - (i - start_idx) * sizeof(uint32_t), - offsetof(struct SheepdogInode, - data_vdi_id[start_idx]), - false, s->cache_flags); - if (ret < 0) { - error_setg(errp, "Failed to discard snapshot inode"); - goto out; - } - } - - ret = 0; -out: - closesocket(fd); - return ret; -} - -static int sd_snapshot_delete(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp) -{ - /* - * FIXME should delete the snapshot matching both @snapshot_id and - * @name, but @name not used here - */ - unsigned long snap_id = 0; - char snap_tag[SD_MAX_VDI_TAG_LEN]; - int fd, ret; - char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; - BDRVSheepdogState *s = bs->opaque; - unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0; - uint32_t vid; - SheepdogVdiReq hdr = { - .opcode = SD_OP_DEL_VDI, - .data_length = wlen, - .flags = SD_FLAG_CMD_WRITE, - }; - SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; - - ret = remove_objects(s, errp); - if (ret) { - return ret; - } - - memset(buf, 0, sizeof(buf)); - memset(snap_tag, 0, sizeof(snap_tag)); - pstrcpy(buf, SD_MAX_VDI_LEN, s->name); - /* TODO Use sd_parse_snapid() once this mess is cleaned up */ - ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id); - if (ret || snap_id > UINT32_MAX) { - /* - * FIXME Since qemu_strtoul() returns -EINVAL when - * @snapshot_id is null, @snapshot_id is mandatory. Correct - * would be to require at least one of @snapshot_id and @name. - */ - error_setg(errp, "Invalid snapshot ID: %s", - snapshot_id ? snapshot_id : ""); - return -EINVAL; - } - - if (snap_id) { - hdr.snapid = (uint32_t) snap_id; - } else { - /* FIXME I suspect we should use @name here */ - /* FIXME don't truncate silently */ - pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id); - pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag); - } - - ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true, errp); - if (ret) { - return ret; - } - - fd = connect_to_sdog(s, errp); - if (fd < 0) { - return fd; - } - - ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, - buf, &wlen, &rlen); - closesocket(fd); - if (ret) { - error_setg_errno(errp, -ret, "Couldn't send request to server"); - return ret; - } - - switch (rsp->result) { - case SD_RES_NO_VDI: - error_setg(errp, "Can't find the snapshot"); - return -ENOENT; - case SD_RES_SUCCESS: - break; - default: - error_setg(errp, "%s", sd_strerror(rsp->result)); - return -EIO; - } - - return 0; -} - -static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) -{ - Error *local_err = NULL; - BDRVSheepdogState *s = bs->opaque; - SheepdogReq req; - int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long); - QEMUSnapshotInfo *sn_tab = NULL; - unsigned wlen, rlen; - int found = 0; - SheepdogInode *inode; - unsigned long *vdi_inuse; - unsigned int start_nr; - uint64_t hval; - uint32_t vid; - - vdi_inuse = g_malloc(max); - inode = g_malloc(SD_INODE_HEADER_SIZE); - - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - ret = fd; - goto out; - } - - rlen = max; - wlen = 0; - - memset(&req, 0, sizeof(req)); - - req.opcode = SD_OP_READ_VDIS; - req.data_length = max; - - ret = do_req(fd, s->bs, &req, vdi_inuse, &wlen, &rlen); - - closesocket(fd); - if (ret) { - goto out; - } - - sn_tab = g_new0(QEMUSnapshotInfo, nr); - - /* calculate a vdi id with hash function */ - hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT); - start_nr = hval & (SD_NR_VDIS - 1); - - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - ret = fd; - goto out; - } - - for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) { - if (!test_bit(vid, vdi_inuse)) { - break; - } - - /* we don't need to read entire object */ - ret = read_object(fd, s->bs, (char *)inode, - vid_to_vdi_oid(vid), - 0, SD_INODE_HEADER_SIZE, 0, - s->cache_flags); - - if (ret) { - continue; - } - - if (!strcmp(inode->name, s->name) && is_snapshot(inode)) { - sn_tab[found].date_sec = inode->snap_ctime >> 32; - sn_tab[found].date_nsec = inode->snap_ctime & 0xffffffff; - sn_tab[found].vm_state_size = inode->vm_state_size; - sn_tab[found].vm_clock_nsec = inode->vm_clock_nsec; - - snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), - "%" PRIu32, inode->snap_id); - pstrcpy(sn_tab[found].name, - MIN(sizeof(sn_tab[found].name), sizeof(inode->tag)), - inode->tag); - found++; - } - } - - closesocket(fd); -out: - *psn_tab = sn_tab; - - g_free(vdi_inuse); - g_free(inode); - - if (ret < 0) { - return ret; - } - - return found; -} - -static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, - int64_t pos, int size, int load) -{ - Error *local_err = NULL; - bool create; - int fd, ret = 0, remaining = size; - unsigned int data_len; - uint64_t vmstate_oid; - uint64_t offset; - uint32_t vdi_index; - uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id; - uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift); - - fd = connect_to_sdog(s, &local_err); - if (fd < 0) { - error_report_err(local_err); - return fd; - } - - while (remaining) { - vdi_index = pos / object_size; - offset = pos % object_size; - - data_len = MIN(remaining, object_size - offset); - - vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index); - - create = (offset == 0); - if (load) { - ret = read_object(fd, s->bs, (char *)data, vmstate_oid, - s->inode.nr_copies, data_len, offset, - s->cache_flags); - } else { - ret = write_object(fd, s->bs, (char *)data, vmstate_oid, - s->inode.nr_copies, data_len, offset, create, - s->cache_flags); - } - - if (ret < 0) { - error_report("failed to save vmstate %s", strerror(errno)); - goto cleanup; - } - - pos += data_len; - data += data_len; - remaining -= data_len; - } - ret = size; -cleanup: - closesocket(fd); - return ret; -} - -static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t pos) -{ - BDRVSheepdogState *s = bs->opaque; - void *buf; - int ret; - - buf = qemu_blockalign(bs, qiov->size); - qemu_iovec_to_buf(qiov, 0, buf, qiov->size); - ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0); - qemu_vfree(buf); - - return ret; -} - -static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t pos) -{ - BDRVSheepdogState *s = bs->opaque; - void *buf; - int ret; - - buf = qemu_blockalign(bs, qiov->size); - ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1); - qemu_iovec_from_buf(qiov, 0, buf, qiov->size); - qemu_vfree(buf); - - return ret; -} - - -static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, - int bytes) -{ - SheepdogAIOCB acb; - BDRVSheepdogState *s = bs->opaque; - QEMUIOVector discard_iov; - struct iovec iov; - uint32_t zero = 0; - - if (!s->discard_supported) { - return 0; - } - - memset(&discard_iov, 0, sizeof(discard_iov)); - memset(&iov, 0, sizeof(iov)); - iov.iov_base = &zero; - iov.iov_len = sizeof(zero); - discard_iov.iov = &iov; - discard_iov.niov = 1; - if (!QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)) { - return -ENOTSUP; - } - sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ); - sd_co_rw_vector(&acb); - sd_aio_complete(&acb); - - return acb.ret; -} - -static coroutine_fn int -sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) -{ - BDRVSheepdogState *s = bs->opaque; - SheepdogInode *inode = &s->inode; - uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); - unsigned long start = offset / object_size, - end = DIV_ROUND_UP(offset + bytes, object_size); - unsigned long idx; - *map = offset; - int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; - - for (idx = start; idx < end; idx++) { - if (inode->data_vdi_id[idx] == 0) { - break; - } - } - if (idx == start) { - /* Get the longest length of unallocated sectors */ - ret = 0; - for (idx = start + 1; idx < end; idx++) { - if (inode->data_vdi_id[idx] != 0) { - break; - } - } - } - - *pnum = (idx - start) * object_size; - if (*pnum > bytes) { - *pnum = bytes; - } - if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { - *file = bs; - } - return ret; -} - -static int64_t sd_get_allocated_file_size(BlockDriverState *bs) -{ - BDRVSheepdogState *s = bs->opaque; - SheepdogInode *inode = &s->inode; - uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); - unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size); - uint64_t size = 0; - - for (i = 0; i < last; i++) { - if (inode->data_vdi_id[i] == 0) { - continue; - } - size += object_size; - } - return size; -} - -static QemuOptsList sd_create_opts = { - .name = "sheepdog-create-opts", - .head = QTAILQ_HEAD_INITIALIZER(sd_create_opts.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, - .type = QEMU_OPT_SIZE, - .help = "Virtual disk size" - }, - { - .name = BLOCK_OPT_BACKING_FILE, - .type = QEMU_OPT_STRING, - .help = "File name of a base image" - }, - { - .name = BLOCK_OPT_BACKING_FMT, - .type = QEMU_OPT_STRING, - .help = "Must be 'sheepdog' if present", - }, - { - .name = BLOCK_OPT_PREALLOC, - .type = QEMU_OPT_STRING, - .help = "Preallocation mode (allowed values: off, full)" - }, - { - .name = BLOCK_OPT_REDUNDANCY, - .type = QEMU_OPT_STRING, - .help = "Redundancy of the image" - }, - { - .name = BLOCK_OPT_OBJECT_SIZE, - .type = QEMU_OPT_SIZE, - .help = "Object size of the image" - }, - { /* end of list */ } - } -}; - -static const char *const sd_strong_runtime_opts[] = { - "vdi", - "snap-id", - "tag", - "server.", - - NULL -}; - -static BlockDriver bdrv_sheepdog = { - .format_name = "sheepdog", - .protocol_name = "sheepdog", - .instance_size = sizeof(BDRVSheepdogState), - .bdrv_parse_filename = sd_parse_filename, - .bdrv_file_open = sd_open, - .bdrv_reopen_prepare = sd_reopen_prepare, - .bdrv_reopen_commit = sd_reopen_commit, - .bdrv_reopen_abort = sd_reopen_abort, - .bdrv_close = sd_close, - .bdrv_co_create = sd_co_create, - .bdrv_co_create_opts = sd_co_create_opts, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_getlength = sd_getlength, - .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_co_truncate = sd_co_truncate, - - .bdrv_co_readv = sd_co_readv, - .bdrv_co_writev = sd_co_writev, - .bdrv_co_flush_to_disk = sd_co_flush_to_disk, - .bdrv_co_pdiscard = sd_co_pdiscard, - .bdrv_co_block_status = sd_co_block_status, - - .bdrv_snapshot_create = sd_snapshot_create, - .bdrv_snapshot_goto = sd_snapshot_goto, - .bdrv_snapshot_delete = sd_snapshot_delete, - .bdrv_snapshot_list = sd_snapshot_list, - - .bdrv_save_vmstate = sd_save_vmstate, - .bdrv_load_vmstate = sd_load_vmstate, - - .bdrv_detach_aio_context = sd_detach_aio_context, - .bdrv_attach_aio_context = sd_attach_aio_context, - - .create_opts = &sd_create_opts, - .strong_runtime_opts = sd_strong_runtime_opts, -}; - -static BlockDriver bdrv_sheepdog_tcp = { - .format_name = "sheepdog", - .protocol_name = "sheepdog+tcp", - .instance_size = sizeof(BDRVSheepdogState), - .bdrv_parse_filename = sd_parse_filename, - .bdrv_file_open = sd_open, - .bdrv_reopen_prepare = sd_reopen_prepare, - .bdrv_reopen_commit = sd_reopen_commit, - .bdrv_reopen_abort = sd_reopen_abort, - .bdrv_close = sd_close, - .bdrv_co_create = sd_co_create, - .bdrv_co_create_opts = sd_co_create_opts, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_getlength = sd_getlength, - .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_co_truncate = sd_co_truncate, - - .bdrv_co_readv = sd_co_readv, - .bdrv_co_writev = sd_co_writev, - .bdrv_co_flush_to_disk = sd_co_flush_to_disk, - .bdrv_co_pdiscard = sd_co_pdiscard, - .bdrv_co_block_status = sd_co_block_status, - - .bdrv_snapshot_create = sd_snapshot_create, - .bdrv_snapshot_goto = sd_snapshot_goto, - .bdrv_snapshot_delete = sd_snapshot_delete, - .bdrv_snapshot_list = sd_snapshot_list, - - .bdrv_save_vmstate = sd_save_vmstate, - .bdrv_load_vmstate = sd_load_vmstate, - - .bdrv_detach_aio_context = sd_detach_aio_context, - .bdrv_attach_aio_context = sd_attach_aio_context, - - .create_opts = &sd_create_opts, - .strong_runtime_opts = sd_strong_runtime_opts, -}; - -static BlockDriver bdrv_sheepdog_unix = { - .format_name = "sheepdog", - .protocol_name = "sheepdog+unix", - .instance_size = sizeof(BDRVSheepdogState), - .bdrv_parse_filename = sd_parse_filename, - .bdrv_file_open = sd_open, - .bdrv_reopen_prepare = sd_reopen_prepare, - .bdrv_reopen_commit = sd_reopen_commit, - .bdrv_reopen_abort = sd_reopen_abort, - .bdrv_close = sd_close, - .bdrv_co_create = sd_co_create, - .bdrv_co_create_opts = sd_co_create_opts, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_getlength = sd_getlength, - .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_co_truncate = sd_co_truncate, - - .bdrv_co_readv = sd_co_readv, - .bdrv_co_writev = sd_co_writev, - .bdrv_co_flush_to_disk = sd_co_flush_to_disk, - .bdrv_co_pdiscard = sd_co_pdiscard, - .bdrv_co_block_status = sd_co_block_status, - - .bdrv_snapshot_create = sd_snapshot_create, - .bdrv_snapshot_goto = sd_snapshot_goto, - .bdrv_snapshot_delete = sd_snapshot_delete, - .bdrv_snapshot_list = sd_snapshot_list, - - .bdrv_save_vmstate = sd_save_vmstate, - .bdrv_load_vmstate = sd_load_vmstate, - - .bdrv_detach_aio_context = sd_detach_aio_context, - .bdrv_attach_aio_context = sd_attach_aio_context, - - .create_opts = &sd_create_opts, - .strong_runtime_opts = sd_strong_runtime_opts, -}; - -static void bdrv_sheepdog_init(void) -{ - bdrv_register(&bdrv_sheepdog); - bdrv_register(&bdrv_sheepdog_tcp); - bdrv_register(&bdrv_sheepdog_unix); -} -block_init(bdrv_sheepdog_init); diff --git a/block/snapshot.c b/block/snapshot.c index e8ae9a28c11..ccacda8bd59 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -275,13 +275,16 @@ int bdrv_snapshot_goto(BlockDriverState *bs, qobject_unref(file_options); g_free(subqdict_prefix); + /* Force .bdrv_open() below to re-attach fallback_bs on *fallback_ptr */ qdict_put_str(options, (*fallback_ptr)->name, bdrv_get_node_name(fallback_bs)); + /* Now close bs, apply the snapshot on fallback_bs, and re-open bs */ if (drv->bdrv_close) { drv->bdrv_close(bs); } + /* .bdrv_open() will re-attach it */ bdrv_unref_child(bs, *fallback_ptr); *fallback_ptr = NULL; @@ -296,7 +299,16 @@ int bdrv_snapshot_goto(BlockDriverState *bs, return ret < 0 ? ret : open_ret; } - assert(fallback_bs == (*fallback_ptr)->bs); + /* + * fallback_ptr is &bs->file or &bs->backing. *fallback_ptr + * was closed above and set to NULL, but the .bdrv_open() call + * has opened it again, because we set the respective option + * (with the qdict_put_str() call above). + * Assert that .bdrv_open() has attached some child on + * *fallback_ptr, and that it has attached the one we wanted + * it to (i.e., fallback_bs). + */ + assert(*fallback_ptr && fallback_bs == (*fallback_ptr)->bs); bdrv_unref(fallback_bs); return ret; } @@ -415,7 +427,7 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, error_setg(errp, "snapshot_id and name are both NULL"); return -EINVAL; } - if (!bs->read_only) { + if (!bdrv_is_read_only(bs)) { error_setg(errp, "Device is not readonly"); return -EINVAL; } diff --git a/block/ssh.c b/block/ssh.c index ebe3d8b631f..e0fbb4934ba 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -237,9 +237,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp) return 0; err: - if (uri) { - uri_free(uri); - } + uri_free(uri); return -EINVAL; } @@ -277,7 +275,6 @@ static void ssh_parse_filename(const char *filename, QDict *options, static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp) { int ret; -#ifdef HAVE_LIBSSH_0_8 enum ssh_known_hosts_e state; int r; ssh_key pubkey; @@ -343,46 +340,6 @@ static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp) error_setg(errp, "error while checking for known server (%d)", state); goto out; } -#else /* !HAVE_LIBSSH_0_8 */ - int state; - - state = ssh_is_server_known(s->session); - trace_ssh_server_status(state); - - switch (state) { - case SSH_SERVER_KNOWN_OK: - /* OK */ - trace_ssh_check_host_key_knownhosts(); - break; - case SSH_SERVER_KNOWN_CHANGED: - ret = -EINVAL; - error_setg(errp, - "host key does not match the one in known_hosts; this " - "may be a possible attack"); - goto out; - case SSH_SERVER_FOUND_OTHER: - ret = -EINVAL; - error_setg(errp, - "host key for this server not found, another type exists"); - goto out; - case SSH_SERVER_FILE_NOT_FOUND: - ret = -ENOENT; - error_setg(errp, "known_hosts file not found"); - goto out; - case SSH_SERVER_NOT_KNOWN: - ret = -EINVAL; - error_setg(errp, "no host key was found in known_hosts"); - goto out; - case SSH_SERVER_ERROR: - ret = -EINVAL; - error_setg(errp, "server error"); - goto out; - default: - ret = -EINVAL; - error_setg(errp, "error while checking for known server (%d)", state); - goto out; - } -#endif /* !HAVE_LIBSSH_0_8 */ /* known_hosts checking successful. */ ret = 0; @@ -438,11 +395,7 @@ check_host_key_hash(BDRVSSHState *s, const char *hash, unsigned char *server_hash; size_t server_hash_len; -#ifdef HAVE_LIBSSH_0_8 r = ssh_get_server_publickey(s->session, &pubkey); -#else - r = ssh_get_publickey(s->session, &pubkey); -#endif if (r != SSH_OK) { session_error_setg(errp, s, "failed to read remote host key"); return -EINVAL; @@ -487,6 +440,9 @@ static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp) } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) { return check_host_key_hash(s, hkc->u.hash.hash, SSH_PUBLICKEY_HASH_SHA1, errp); + } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA256) { + return check_host_key_hash(s, hkc->u.hash.hash, + SSH_PUBLICKEY_HASH_SHA256, errp); } g_assert_not_reached(); break; @@ -1233,8 +1189,6 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what) } } -#ifdef HAVE_LIBSSH_0_8 - static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs) { int r; @@ -1271,18 +1225,6 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs) return ret; } -#else /* !HAVE_LIBSSH_0_8 */ - -static coroutine_fn int ssh_co_flush(BlockDriverState *bs) -{ - BDRVSSHState *s = bs->opaque; - - unsafe_flush_warning(s, "libssh >= 0.8.0"); - return 0; -} - -#endif /* !HAVE_LIBSSH_0_8 */ - static int64_t ssh_getlength(BlockDriverState *bs) { BDRVSSHState *s = bs->opaque; diff --git a/block/stream.c b/block/stream.c index 97bee482dce..e45113aed6a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -54,8 +54,8 @@ static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); - BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base); - BlockDriverState *unfiltered_base = bdrv_skip_filters(base); + BlockDriverState *base; + BlockDriverState *unfiltered_base; Error *local_err = NULL; int ret = 0; @@ -63,6 +63,9 @@ static int stream_prepare(Job *job) bdrv_cor_filter_drop(s->cor_filter_bs); s->cor_filter_bs = NULL; + base = bdrv_filter_or_cow_bs(s->above_base); + unfiltered_base = bdrv_skip_filters(base); + if (bdrv_cow_child(unfiltered_bs)) { const char *base_id = NULL, *base_fmt = NULL; if (unfiltered_base) { diff --git a/block/throttle.c b/block/throttle.c index b685166ad4a..6e8d52fa245 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -112,8 +112,9 @@ static int64_t throttle_getlength(BlockDriverState *bs) } static int coroutine_fn throttle_co_preadv(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { ThrottleGroupMember *tgm = bs->opaque; @@ -123,8 +124,9 @@ static int coroutine_fn throttle_co_preadv(BlockDriverState *bs, } static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { ThrottleGroupMember *tgm = bs->opaque; throttle_group_co_io_limits_intercept(tgm, bytes, true); @@ -133,7 +135,7 @@ static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs, } static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, + int64_t offset, int64_t bytes, BdrvRequestFlags flags) { ThrottleGroupMember *tgm = bs->opaque; @@ -143,7 +145,7 @@ static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { ThrottleGroupMember *tgm = bs->opaque; throttle_group_co_io_limits_intercept(tgm, bytes, true); @@ -152,8 +154,8 @@ static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs, } static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs, - uint64_t offset, - uint64_t bytes, + int64_t offset, + int64_t bytes, QEMUIOVector *qiov) { return throttle_co_pwritev(bs, offset, bytes, qiov, diff --git a/block/trace-events b/block/trace-events index 1a12d634e2e..549090d453e 100644 --- a/block/trace-events +++ b/block/trace-events @@ -1,12 +1,12 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # ../block.c bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags 0x%x format_name \"%s\"" bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" # block-backend.c -blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x" -blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x" +blk_co_preadv(void *blk, void *bs, int64_t offset, int64_t bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %" PRId64 " flags 0x%x" +blk_co_pwritev(void *blk, void *bs, int64_t offset, int64_t bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %" PRId64 " flags 0x%x" blk_root_attach(void *child, void *blk, void *bs) "child %p blk %p bs %p" blk_root_detach(void *child, void *blk, void *bs) "child %p blk %p bs %p" @@ -75,13 +75,13 @@ luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p l # qcow2.c qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu" -qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d" +qcow2_writev_start_req(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64 qcow2_writev_done_req(void *co, int ret) "co %p ret %d" qcow2_writev_start_part(void *co) "co %p" qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d" qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64 -qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d" -qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d" +qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64 +qcow2_pwrite_zeroes(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64 qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d" # qcow2-cluster.c @@ -152,12 +152,12 @@ nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p off nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d" nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d" -nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" -nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" +nvme_dsm(void *s, int64_t offset, int64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" +nvme_dsm_done(void *s, int64_t offset, int64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" nvme_dma_map_flush(void *s) "s %p" nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" -nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d" -nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p" +nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d" +nvme_free_queue_pair(unsigned q_index, void *q, void *cq, void *sq) "index %u q %p cq %p sq %p" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" @@ -206,20 +206,7 @@ file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_of file_FindEjectableOpticalMedia(const char *media) "Matching using %s" file_setup_cdrom(const char *partition) "Using %s as optical disc" file_hdev_is_sg(int type, int version) "SG device found: type=%d, version=%d" - -# sheepdog.c -sheepdog_reconnect_to_sdog(void) "Wait for connection to be established" -sheepdog_aio_read_response(void) "disable cache since the server doesn't support it" -sheepdog_open(uint32_t vid) "0x%" PRIx32 " snapshot inode was open" -sheepdog_close(const char *name) "%s" -sheepdog_create_branch_snapshot(uint32_t vdi) "0x%" PRIx32 " is snapshot" -sheepdog_create_branch_created(uint32_t vdi) "0x%" PRIx32 " is created" -sheepdog_create_branch_new(uint32_t vdi) "0x%" PRIx32 " was newly created" -sheepdog_co_rw_vector_update(uint32_t vdi, uint64_t oid, uint64_t data, long idx) "update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld" -sheepdog_co_rw_vector_new(uint64_t oid) "new oid 0x%" PRIx64 -sheepdog_snapshot_create_info(const char *sn_name, const char *id, const char *name, int64_t size, int is_snapshot) "sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " " "is_snapshot %d" -sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s" -sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32 +file_flush_fdatasync_failed(int err) "errno %d" # ssh.c sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)" diff --git a/block/vdi.c b/block/vdi.c index 548f8a057b8..bdc58d726ee 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -544,8 +544,8 @@ static int coroutine_fn vdi_co_block_status(BlockDriverState *bs, } static int coroutine_fn -vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vdi_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVVdiState *s = bs->opaque; QEMUIOVector local_qiov; @@ -600,8 +600,8 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } static int coroutine_fn -vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vdi_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVVdiState *s = bs->opaque; QEMUIOVector local_qiov; diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 404fb5f3cb0..7672161d955 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -801,7 +801,7 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, } if (logs.valid) { - if (bs->read_only) { + if (bdrv_is_read_only(bs)) { bdrv_refresh_filename(bs); ret = -EPERM; error_setg(errp, diff --git a/block/vmdk.c b/block/vmdk.c index 4499f136bdf..0dfab6e9413 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -60,6 +60,7 @@ #define VMDK_ZEROED (-3) #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain" +#define BLOCK_OPT_TOOLSVERSION "toolsversion" typedef struct { uint32_t version; @@ -1888,8 +1889,8 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, } static int coroutine_fn -vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVVmdkState *s = bs->opaque; int ret; @@ -2068,8 +2069,8 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, } static int coroutine_fn -vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; BDRVVmdkState *s = bs->opaque; @@ -2080,8 +2081,8 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } static int coroutine_fn -vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov) +vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov) { if (bytes == 0) { /* The caller will write bytes 0 to signal EOF. @@ -2109,7 +2110,7 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int bytes, + int64_t bytes, BdrvRequestFlags flags) { int ret; @@ -2344,6 +2345,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, BlockdevVmdkAdapterType adapter_type, const char *backing_file, const char *hw_version, + const char *toolsversion, bool compat6, bool zeroed_grain, vmdk_create_extent_fn extent_fn, @@ -2384,7 +2386,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, "ddb.geometry.cylinders = \"%" PRId64 "\"\n" "ddb.geometry.heads = \"%" PRIu32 "\"\n" "ddb.geometry.sectors = \"63\"\n" - "ddb.adapterType = \"%s\"\n"; + "ddb.adapterType = \"%s\"\n" + "ddb.toolsVersion = \"%s\"\n"; ext_desc_lines = g_string_new(NULL); @@ -2401,6 +2404,9 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, if (!hw_version) { hw_version = "4"; } + if (!toolsversion) { + toolsversion = "2147483647"; + } if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) { /* that's the number of heads with which vmware operates when @@ -2525,7 +2531,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, size / (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE), number_heads, - BlockdevVmdkAdapterType_str(adapter_type)); + BlockdevVmdkAdapterType_str(adapter_type), + toolsversion); desc_len = strlen(desc); /* the descriptor offset = 0x200 */ if (!split && !flat) { @@ -2617,6 +2624,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, BlockdevVmdkAdapterType adapter_type_enum; char *backing_file = NULL; char *hw_version = NULL; + char *toolsversion = NULL; char *fmt = NULL; BlockdevVmdkSubformat subformat; int ret = 0; @@ -2649,6 +2657,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE); backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION); + toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION); compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false); if (strcmp(hw_version, "undefined") == 0) { g_free(hw_version); @@ -2692,14 +2701,15 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, .opts = opts, }; ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum, - backing_file, hw_version, compat6, zeroed_grain, - vmdk_co_create_opts_cb, &data, errp); + backing_file, hw_version, toolsversion, compat6, + zeroed_grain, vmdk_co_create_opts_cb, &data, errp); exit: g_free(backing_fmt); g_free(adapter_type); g_free(backing_file); g_free(hw_version); + g_free(toolsversion); g_free(fmt); g_free(desc); g_free(path); @@ -2782,6 +2792,7 @@ static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options, opts->adapter_type, opts->backing_file, opts->hwversion, + opts->toolsversion, false, opts->zeroed_grain, vmdk_co_create_cb, @@ -3031,6 +3042,11 @@ static QemuOptsList vmdk_create_opts = { .help = "VMDK hardware version", .def_value_str = "undefined" }, + { + .name = BLOCK_OPT_TOOLSVERSION, + .type = QEMU_OPT_STRING, + .help = "VMware guest tools version", + }, { .name = BLOCK_OPT_SUBFMT, .type = QEMU_OPT_STRING, diff --git a/block/vpc.c b/block/vpc.c index 17a705b482a..297a26262ab 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -276,7 +276,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, if (ret < 0) { goto fail; } - if (strncmp(footer->creator, "conectix", 8)) { + if (strncmp(footer->creator, "conectix", 8) || + be32_to_cpu(footer->type) != VHD_FIXED) { error_setg(errp, "invalid VPC image"); ret = -EINVAL; goto fail; @@ -608,8 +609,8 @@ static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) } static int coroutine_fn -vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vpc_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVVPCState *s = bs->opaque; int ret; @@ -658,8 +659,8 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } static int coroutine_fn -vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vpc_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVVPCState *s = bs->opaque; int64_t image_offset; diff --git a/block/vvfat.c b/block/vvfat.c index 54807f82ca1..5dacc6cfac4 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1279,8 +1279,18 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, qemu_co_mutex_init(&s->lock); - ret = 0; + qemu_opts_del(opts); + + return 0; + fail: + g_free(s->qcow_filename); + s->qcow_filename = NULL; + g_free(s->cluster_buffer); + s->cluster_buffer = NULL; + g_free(s->used_clusters); + s->used_clusters = NULL; + qemu_opts_del(opts); return ret; } @@ -1522,8 +1532,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, } static int coroutine_fn -vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vvfat_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; BDRVVVFATState *s = bs->opaque; @@ -3061,8 +3071,8 @@ DLOG(checkpoint()); } static int coroutine_fn -vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) +vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; BDRVVVFATState *s = bs->opaque; @@ -3098,26 +3108,6 @@ static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, return BDRV_BLOCK_DATA; } -static int coroutine_fn -write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) -{ - int ret; - - BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); - qemu_co_mutex_lock(&s->lock); - ret = try_commit(s); - qemu_co_mutex_unlock(&s->lock); - - return ret; -} - -static BlockDriver vvfat_write_target = { - .format_name = "vvfat_write_target", - .instance_size = sizeof(void*), - .bdrv_co_pwritev = write_target_commit, -}; - static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) @@ -3127,22 +3117,18 @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); } -static const BdrvChildClass child_vvfat_qcow = { - .parent_is_bds = true, - .inherit_options = vvfat_qcow_options, -}; +static BdrvChildClass child_vvfat_qcow; static int enable_write_target(BlockDriverState *bs, Error **errp) { BDRVVVFATState *s = bs->opaque; BlockDriver *bdrv_qcow = NULL; - BlockDriverState *backing; QemuOpts *opts = NULL; int ret; int size = sector2cluster(s, s->sector_count); QDict *options; - s->used_clusters = calloc(size, 1); + s->used_clusters = g_malloc0(size); array_init(&(s->commits), sizeof(commit_t)); @@ -3187,18 +3173,9 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) unlink(s->qcow_filename); #endif - backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR, - &error_abort); - *(void**) backing->opaque = s; - - bdrv_set_backing_hd(s->bs, backing, &error_abort); - bdrv_unref(backing); - return 0; err: - g_free(s->qcow_filename); - s->qcow_filename = NULL; return ret; } @@ -3208,20 +3185,10 @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - BDRVVVFATState *s = bs->opaque; - - assert(c == s->qcow || (role & BDRV_CHILD_COW)); - - if (c == s->qcow) { - /* This is a private node, nobody should try to attach to it */ - *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; - *nshared = BLK_PERM_WRITE_UNCHANGED; - } else { - /* The backing file is there so 'commit' can use it. vvfat doesn't - * access it in any way. */ - *nperm = 0; - *nshared = BLK_PERM_ALL; - } + assert(role & BDRV_CHILD_DATA); + /* This is a private node, nobody should try to attach to it */ + *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + *nshared = BLK_PERM_WRITE_UNCHANGED; } static void vvfat_close(BlockDriverState *bs) @@ -3270,6 +3237,8 @@ static BlockDriver bdrv_vvfat = { static void bdrv_vvfat_init(void) { + child_vvfat_qcow = child_of_bds; + child_vvfat_qcow.inherit_options = vvfat_qcow_options; bdrv_register(&bdrv_vvfat); } diff --git a/block/write-threshold.c b/block/write-threshold.c index 85b78dc2a9b..35cafbc22d9 100644 --- a/block/write-threshold.c +++ b/block/write-threshold.c @@ -12,9 +12,7 @@ #include "qemu/osdep.h" #include "block/block_int.h" -#include "qemu/coroutine.h" #include "block/write-threshold.h" -#include "qemu/notify.h" #include "qapi/error.h" #include "qapi/qapi-commands-block-core.h" #include "qapi/qapi-events-block-core.h" @@ -24,82 +22,9 @@ uint64_t bdrv_write_threshold_get(const BlockDriverState *bs) return bs->write_threshold_offset; } -bool bdrv_write_threshold_is_set(const BlockDriverState *bs) -{ - return bs->write_threshold_offset > 0; -} - -static void write_threshold_disable(BlockDriverState *bs) -{ - if (bdrv_write_threshold_is_set(bs)) { - notifier_with_return_remove(&bs->write_threshold_notifier); - bs->write_threshold_offset = 0; - } -} - -uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, - const BdrvTrackedRequest *req) -{ - if (bdrv_write_threshold_is_set(bs)) { - if (req->offset > bs->write_threshold_offset) { - return (req->offset - bs->write_threshold_offset) + req->bytes; - } - if ((req->offset + req->bytes) > bs->write_threshold_offset) { - return (req->offset + req->bytes) - bs->write_threshold_offset; - } - } - return 0; -} - -static int coroutine_fn before_write_notify(NotifierWithReturn *notifier, - void *opaque) -{ - BdrvTrackedRequest *req = opaque; - BlockDriverState *bs = req->bs; - uint64_t amount = 0; - - amount = bdrv_write_threshold_exceeded(bs, req); - if (amount > 0) { - qapi_event_send_block_write_threshold( - bs->node_name, - amount, - bs->write_threshold_offset); - - /* autodisable to avoid flooding the monitor */ - write_threshold_disable(bs); - } - - return 0; /* should always let other notifiers run */ -} - -static void write_threshold_register_notifier(BlockDriverState *bs) -{ - bs->write_threshold_notifier.notify = before_write_notify; - bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier); -} - -static void write_threshold_update(BlockDriverState *bs, - int64_t threshold_bytes) -{ - bs->write_threshold_offset = threshold_bytes; -} - void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes) { - if (bdrv_write_threshold_is_set(bs)) { - if (threshold_bytes > 0) { - write_threshold_update(bs, threshold_bytes); - } else { - write_threshold_disable(bs); - } - } else { - if (threshold_bytes > 0) { - /* avoid multiple registration */ - write_threshold_register_notifier(bs); - write_threshold_update(bs, threshold_bytes); - } - /* discard bogus disable request */ - } + bs->write_threshold_offset = threshold_bytes; } void qmp_block_set_write_threshold(const char *node_name, @@ -122,3 +47,17 @@ void qmp_block_set_write_threshold(const char *node_name, aio_context_release(aio_context); } + +void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset, + int64_t bytes) +{ + int64_t end = offset + bytes; + uint64_t wtr = bs->write_threshold_offset; + + if (wtr > 0 && end > wtr) { + qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr); + + /* autodisable to avoid flooding the monitor */ + bdrv_write_threshold_set(bs, 0); + } +} diff --git a/blockdev-nbd.c b/blockdev-nbd.c index b264620b98d..bdfa7ed3a5a 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -108,9 +108,9 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp) return NULL; } - if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { - error_setg(errp, - "Expecting TLS credentials with a server endpoint"); + if (!qcrypto_tls_creds_check_endpoint(creds, + QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, + errp)) { return NULL; } object_ref(obj); diff --git a/blockdev.c b/blockdev.c index a57590aae40..b35072644eb 100644 --- a/blockdev.c +++ b/blockdev.c @@ -56,7 +56,6 @@ #include "sysemu/iothread.h" #include "block/block_int.h" #include "block/trace.h" -#include "sysemu/arch_init.h" #include "sysemu/runstate.h" #include "sysemu/replay.h" #include "qemu/cutils.h" @@ -583,8 +582,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); - blk_rs->open_flags = bdrv_flags; - blk_rs->read_only = read_only; + blk_rs->open_flags = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR); blk_rs->detect_zeroes = detect_zeroes; qobject_unref(bs_opts); @@ -1576,10 +1574,6 @@ static void external_snapshot_prepare(BlkActionState *common, goto out; } - /* This removes our old bs and adds the new bs. This is an operation that - * can fail, so we need to do it in .prepare; undoing it for abort is - * always possible. */ - bdrv_ref(state->new_bs); ret = bdrv_append(state->new_bs, state->old_bs, errp); if (ret < 0) { goto out; @@ -1719,6 +1713,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); + state->bs = bs; /* Paired with .clean() */ bdrv_drained_begin(bs); @@ -1818,8 +1813,6 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) } } - state->bs = bs; - state->job = do_backup_common(qapi_DriveBackup_base(backup), bs, target_bs, aio_context, common->block_job_txn, errp); @@ -1854,7 +1847,7 @@ static void drive_backup_abort(BlkActionState *common) aio_context = bdrv_get_aio_context(state->bs); aio_context_acquire(aio_context); - job_cancel_sync(&state->job->job); + job_cancel_sync(&state->job->job, true); aio_context_release(aio_context); } @@ -1955,7 +1948,7 @@ static void blockdev_backup_abort(BlkActionState *common) aio_context = bdrv_get_aio_context(state->bs); aio_context_acquire(aio_context); - job_cancel_sync(&state->job->job); + job_cancel_sync(&state->job->job, true); aio_context_release(aio_context); } @@ -3565,46 +3558,60 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp) visit_free(v); } -void qmp_x_blockdev_reopen(BlockdevOptions *options, Error **errp) +void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) { - BlockDriverState *bs; - AioContext *ctx; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - BlockReopenQueue *queue; - QDict *qdict; + BlockReopenQueue *queue = NULL; + GSList *drained = NULL; - /* Check for the selected node name */ - if (!options->has_node_name) { - error_setg(errp, "node-name not specified"); - goto fail; - } + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { + BlockdevOptions *options = reopen_list->value; + BlockDriverState *bs; + AioContext *ctx; + QObject *obj; + Visitor *v; + QDict *qdict; - bs = bdrv_find_node(options->node_name); - if (!bs) { - error_setg(errp, "Failed to find node with node-name='%s'", - options->node_name); - goto fail; - } + /* Check for the selected node name */ + if (!options->has_node_name) { + error_setg(errp, "node-name not specified"); + goto fail; + } - /* Put all options in a QDict and flatten it */ - visit_type_BlockdevOptions(v, NULL, &options, &error_abort); - visit_complete(v, &obj); - qdict = qobject_to(QDict, obj); + bs = bdrv_find_node(options->node_name); + if (!bs) { + error_setg(errp, "Failed to find node with node-name='%s'", + options->node_name); + goto fail; + } - qdict_flatten(qdict); + /* Put all options in a QDict and flatten it */ + v = qobject_output_visitor_new(&obj); + visit_type_BlockdevOptions(v, NULL, &options, &error_abort); + visit_complete(v, &obj); + visit_free(v); + + qdict = qobject_to(QDict, obj); + + qdict_flatten(qdict); + + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + + bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(queue, bs, qdict, false); + drained = g_slist_prepend(drained, bs); + + aio_context_release(ctx); + } /* Perform the reopen operation */ - ctx = bdrv_get_aio_context(bs); - aio_context_acquire(ctx); - bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(NULL, bs, qdict, false); bdrv_reopen_multiple(queue, errp); - bdrv_subtree_drained_end(bs); - aio_context_release(ctx); + queue = NULL; fail: - visit_free(v); + bdrv_reopen_queue_free(queue); + g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end); } void qmp_blockdev_del(const char *node_name, Error **errp) diff --git a/blockjob.c b/blockjob.c index 207e8c7fd91..4bad1408cb5 100644 --- a/blockjob.c +++ b/blockjob.c @@ -87,6 +87,7 @@ void block_job_free(Job *job) block_job_remove_all_bdrv(bjob); blk_unref(bjob->blk); + ratelimit_destroy(&bjob->limit); error_free(bjob->blocker); } @@ -163,6 +164,13 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, job->job.aio_context = ctx; } +static AioContext *child_job_get_parent_aio_context(BdrvChild *c) +{ + BlockJob *job = c->opaque; + + return job->job.aio_context; +} + static const BdrvChildClass child_job = { .get_parent_desc = child_job_get_parent_desc, .drained_begin = child_job_drained_begin, @@ -171,6 +179,7 @@ static const BdrvChildClass child_job = { .can_set_aio_ctx = child_job_can_set_aio_ctx, .set_aio_ctx = child_job_set_aio_ctx, .stay_at_node = true, + .get_parent_aio_context = child_job_get_parent_aio_context, }; void block_job_remove_all_bdrv(BlockJob *job) @@ -221,8 +230,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { aio_context_release(job->job.aio_context); } - c = bdrv_root_attach_child(bs, name, &child_job, 0, - job->job.aio_context, perm, shared_perm, job, + c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job, errp); if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { aio_context_acquire(job->job.aio_context); @@ -292,28 +300,29 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) { - if (!job->speed) { - return 0; - } - return ratelimit_calculate_delay(&job->limit, n); } BlockJobInfo *block_job_query(BlockJob *job, Error **errp) { BlockJobInfo *info; + uint64_t progress_current, progress_total; if (block_job_is_internal(job)) { error_setg(errp, "Cannot query QEMU internal jobs"); return NULL; } + + progress_get_snapshot(&job->job.progress, &progress_current, + &progress_total); + info = g_new0(BlockJobInfo, 1); info->type = g_strdup(job_type_str(&job->job)); info->device = g_strdup(job->job.id); info->busy = qatomic_read(&job->job.busy); info->paused = job->job.pause_count > 0; - info->offset = job->job.progress.current; - info->len = job->job.progress.total; + info->offset = progress_current; + info->len = progress_total; info->speed = job->speed; info->io_status = job->iostatus; info->ready = job_is_ready(&job->job), @@ -340,15 +349,19 @@ static void block_job_iostatus_set_err(BlockJob *job, int error) static void block_job_event_cancelled(Notifier *n, void *opaque) { BlockJob *job = opaque; + uint64_t progress_current, progress_total; if (block_job_is_internal(job)) { return; } + progress_get_snapshot(&job->job.progress, &progress_current, + &progress_total); + qapi_event_send_block_job_cancelled(job_type(&job->job), job->job.id, - job->job.progress.total, - job->job.progress.current, + progress_total, + progress_current, job->speed); } @@ -356,6 +369,7 @@ static void block_job_event_completed(Notifier *n, void *opaque) { BlockJob *job = opaque; const char *msg = NULL; + uint64_t progress_current, progress_total; if (block_job_is_internal(job)) { return; @@ -365,10 +379,13 @@ static void block_job_event_completed(Notifier *n, void *opaque) msg = error_get_pretty(job->job.err); } + progress_get_snapshot(&job->job.progress, &progress_current, + &progress_total); + qapi_event_send_block_job_completed(job_type(&job->job), job->job.id, - job->job.progress.total, - job->job.progress.current, + progress_total, + progress_current, job->speed, !!msg, msg); @@ -389,15 +406,19 @@ static void block_job_event_pending(Notifier *n, void *opaque) static void block_job_event_ready(Notifier *n, void *opaque) { BlockJob *job = opaque; + uint64_t progress_current, progress_total; if (block_job_is_internal(job)) { return; } + progress_get_snapshot(&job->job.progress, &progress_current, + &progress_total); + qapi_event_send_block_job_ready(job_type(&job->job), job->job.id, - job->job.progress.total, - job->job.progress.current, + progress_total, + progress_current, job->speed); } @@ -435,6 +456,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, assert(job->job.driver->free == &block_job_free); assert(job->job.driver->user_resume == &block_job_user_resume); + ratelimit_init(&job->limit); + job->blk = blk; job->finalize_cancelled_notifier.notify = block_job_event_cancelled; @@ -462,12 +485,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, blk_set_disable_request_queuing(blk, true); blk_set_allow_aio_context_change(blk, true); - /* Only set speed when necessary to avoid NotSupported error */ - if (speed != 0) { - if (!block_job_set_speed(job, speed, errp)) { - job_early_fail(&job->job); - return NULL; - } + if (!block_job_set_speed(job, speed, errp)) { + job_early_fail(&job->job); + return NULL; } return job; diff --git a/bsd-user/bsd-mman.h b/bsd-user/bsd-mman.h deleted file mode 100644 index 910e8c19210..00000000000 --- a/bsd-user/bsd-mman.h +++ /dev/null @@ -1,121 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)mman.h 8.2 (Berkeley) 1/9/95 - * $FreeBSD: src/sys/sys/mman.h,v 1.42 2008/03/28 04:29:27 ps Exp $ - */ - -#define TARGET_FREEBSD_MAP_RESERVED0080 0x0080 /* previously misimplemented MAP_INHERIT */ -#define TARGET_FREEBSD_MAP_RESERVED0100 0x0100 /* previously unimplemented MAP_NOEXTEND */ -#define TARGET_FREEBSD_MAP_STACK 0x0400 /* region grows down, like a stack */ -#define TARGET_FREEBSD_MAP_NOSYNC 0x0800 /* page to but do not sync underlying file */ - -#define TARGET_FREEBSD_MAP_FLAGMASK 0x1ff7 - -/* $NetBSD: mman.h,v 1.42 2008/11/18 22:13:49 ad Exp $ */ - -/*- - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)mman.h 8.2 (Berkeley) 1/9/95 - */ -#define TARGET_NETBSD_MAP_INHERIT 0x0080 /* region is retained after exec */ -#define TARGET_NETBSD_MAP_TRYFIXED 0x0400 /* attempt hint address, even within break */ -#define TARGET_NETBSD_MAP_WIRED 0x0800 /* mlock() mapping when it is established */ - -#define TARGET_NETBSD_MAP_STACK 0x2000 /* allocated from memory, swap space (stack) */ - -#define TARGET_NETBSD_MAP_FLAGMASK 0x3ff7 - -/* $OpenBSD: mman.h,v 1.18 2003/07/21 22:52:19 tedu Exp $ */ -/* $NetBSD: mman.h,v 1.11 1995/03/26 20:24:23 jtc Exp $ */ - -/*- - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)mman.h 8.1 (Berkeley) 6/2/93 - */ - -#define TARGET_OPENBSD_MAP_INHERIT 0x0080 /* region is retained after exec */ -#define TARGET_OPENBSD_MAP_NOEXTEND 0x0100 /* for MAP_FILE, don't change file size */ -#define TARGET_OPENBSD_MAP_TRYFIXED 0x0400 /* attempt hint address, even within heap */ - -#define TARGET_OPENBSD_MAP_FLAGMASK 0x17f7 - -// XXX -#define TARGET_BSD_MAP_FLAGMASK 0x3ff7 diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c index f38c4faacf8..5b3c061a452 100644 --- a/bsd-user/bsdload.c +++ b/bsd-user/bsdload.c @@ -1,11 +1,24 @@ -/* Code for loading BSD executables. Mostly linux kernel code. */ +/* + * Load BSD executables. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ #include "qemu/osdep.h" #include "qemu.h" -#define TARGET_NGROUPS 32 - /* ??? This should really be somewhere else. */ abi_long memcpy_to_target(abi_ulong dest, const void *src, unsigned long len) @@ -13,47 +26,48 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src, void *host_ptr; host_ptr = lock_user(VERIFY_WRITE, dest, len, 0); - if (!host_ptr) + if (!host_ptr) { return -TARGET_EFAULT; + } memcpy(host_ptr, src, len); unlock_user(host_ptr, dest, 1); return 0; } -static int count(char ** vec) +static int count(char **vec) { int i; - for(i = 0; *vec; i++) { + for (i = 0; *vec; i++) { vec++; } - return(i); + return i; } -static int prepare_binprm(struct linux_binprm *bprm) +static int prepare_binprm(struct bsd_binprm *bprm) { struct stat st; int mode; int retval; - if(fstat(bprm->fd, &st) < 0) { - return(-errno); + if (fstat(bprm->fd, &st) < 0) { + return -errno; } mode = st.st_mode; - if(!S_ISREG(mode)) { /* Must be regular file */ - return(-EACCES); + if (!S_ISREG(mode)) { /* Must be regular file */ + return -EACCES; } - if(!(mode & 0111)) { /* Must have at least one execute bit set */ - return(-EACCES); + if (!(mode & 0111)) { /* Must have at least one execute bit set */ + return -EACCES; } bprm->e_uid = geteuid(); bprm->e_gid = getegid(); /* Set-uid? */ - if(mode & S_ISUID) { + if (mode & S_ISUID) { bprm->e_uid = st.st_uid; } @@ -69,22 +83,20 @@ static int prepare_binprm(struct linux_binprm *bprm) memset(bprm->buf, 0, sizeof(bprm->buf)); retval = lseek(bprm->fd, 0L, SEEK_SET); - if(retval >= 0) { + if (retval >= 0) { retval = read(bprm->fd, bprm->buf, 128); } - if(retval < 0) { + if (retval < 0) { perror("prepare_binprm"); exit(-1); - /* return(-errno); */ - } - else { - return(retval); + } else { + return retval; } } /* Construct the envp and argv tables on the target stack. */ abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp, - abi_ulong stringp, int push_ptr) + abi_ulong stringp) { int n = sizeof(abi_ulong); abi_ulong envp; @@ -94,13 +106,6 @@ abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp, envp = sp; sp -= (argc + 1) * n; argv = sp; - if (push_ptr) { - /* FIXME - handle put_user() failures */ - sp -= n; - put_user_ual(envp, sp); - sp -= n; - put_user_ual(argv, sp); - } sp -= n; /* FIXME - handle put_user() failures */ put_user_ual(argc, sp); @@ -125,49 +130,85 @@ abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp, return sp; } -int loader_exec(const char * filename, char ** argv, char ** envp, - struct target_pt_regs * regs, struct image_info *infop) +static bool is_there(const char *candidate) { - struct linux_binprm bprm; - int retval; - int i; + struct stat fin; + + /* XXX work around access(2) false positives for superuser */ + if (access(candidate, X_OK) == 0 && stat(candidate, &fin) == 0 && + S_ISREG(fin.st_mode) && (getuid() != 0 || + (fin.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0)) { + return true; + } + + return false; +} + +int loader_exec(const char *filename, char **argv, char **envp, + struct target_pt_regs *regs, struct image_info *infop, + struct bsd_binprm *bprm) +{ + char *path, fullpath[PATH_MAX]; + int retval, i; - bprm.p = TARGET_PAGE_SIZE*MAX_ARG_PAGES-sizeof(unsigned int); - for (i=0 ; ip = TARGET_PAGE_SIZE * MAX_ARG_PAGES; + for (i = 0; i < MAX_ARG_PAGES; i++) { /* clear page-table */ + bprm->page[i] = NULL; + } + + if (strchr(filename, '/') != NULL) { + path = realpath(filename, fullpath); + if (path == NULL) { + /* Failed to resolve. */ + return -1; + } + if (!is_there(path)) { + return -1; + } + } else { + path = g_find_program_in_path(filename); + if (path == NULL) { + return -1; + } + } + + retval = open(path, O_RDONLY); + if (retval < 0) { + g_free(path); return retval; - bprm.fd = retval; - bprm.filename = (char *)filename; - bprm.argc = count(argv); - bprm.argv = argv; - bprm.envc = count(envp); - bprm.envp = envp; - - retval = prepare_binprm(&bprm); - - if(retval>=0) { - if (bprm.buf[0] == 0x7f - && bprm.buf[1] == 'E' - && bprm.buf[2] == 'L' - && bprm.buf[3] == 'F') { - retval = load_elf_binary(&bprm,regs,infop); + } + + bprm->fullpath = path; + bprm->fd = retval; + bprm->filename = (char *)filename; + bprm->argc = count(argv); + bprm->argv = argv; + bprm->envc = count(envp); + bprm->envp = envp; + + retval = prepare_binprm(bprm); + + if (retval >= 0) { + if (bprm->buf[0] == 0x7f + && bprm->buf[1] == 'E' + && bprm->buf[2] == 'L' + && bprm->buf[3] == 'F') { + retval = load_elf_binary(bprm, regs, infop); } else { fprintf(stderr, "Unknown binary format\n"); return -1; } } - if(retval>=0) { + if (retval >= 0) { /* success. Initialize important registers */ do_init_thread(regs, infop); return retval; } /* Something went wrong, return the inode and free the argument pages*/ - for (i=0 ; ipage[i]); } - return(retval); + return retval; } diff --git a/bsd-user/elfcore.c b/bsd-user/elfcore.c new file mode 100644 index 00000000000..c49d9280e2d --- /dev/null +++ b/bsd-user/elfcore.c @@ -0,0 +1,10 @@ +/* Stubbed out version of core dump support, explicitly in public domain */ + +static int elf_core_dump(int signr, CPUArchState *env) +{ + struct elf_note en = { 0 }; + + bswap_note(&en); + + return 0; +} diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c index 5f4d824d78f..142a5bfac26 100644 --- a/bsd-user/elfload.c +++ b/bsd-user/elfload.c @@ -1,563 +1,53 @@ -/* This is the Linux kernel elf-loading code, ported into user space */ - -#include "qemu/osdep.h" - -#include "qemu.h" -#include "disas/disas.h" -#include "qemu/path.h" - -#ifdef _ARCH_PPC64 -#undef ARCH_DLINFO -#undef ELF_PLATFORM -#undef ELF_HWCAP -#undef ELF_CLASS -#undef ELF_DATA -#undef ELF_ARCH -#endif - -/* from personality.h */ - /* - * Flags for bug emulation. + * ELF loading code * - * These occupy the top three bytes. - */ -enum { - ADDR_NO_RANDOMIZE = 0x0040000, /* disable randomization of VA space */ - FDPIC_FUNCPTRS = 0x0080000, /* userspace function ptrs point to descriptors - * (signal handling) - */ - MMAP_PAGE_ZERO = 0x0100000, - ADDR_COMPAT_LAYOUT = 0x0200000, - READ_IMPLIES_EXEC = 0x0400000, - ADDR_LIMIT_32BIT = 0x0800000, - SHORT_INODE = 0x1000000, - WHOLE_SECONDS = 0x2000000, - STICKY_TIMEOUTS = 0x4000000, - ADDR_LIMIT_3GB = 0x8000000, -}; - -/* - * Personality types. + * Copyright (c) 2013 Stacey D. Son * - * These go in the low byte. Avoid using the top bit, it will - * conflict with error returns. - */ -enum { - PER_LINUX = 0x0000, - PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT, - PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS, - PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO, - PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE, - PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS | - WHOLE_SECONDS | SHORT_INODE, - PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS, - PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE, - PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS, - PER_BSD = 0x0006, - PER_SUNOS = 0x0006 | STICKY_TIMEOUTS, - PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE, - PER_LINUX32 = 0x0008, - PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB, - PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit */ - PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/* IRIX6 new 32-bit */ - PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/* IRIX6 64-bit */ - PER_RISCOS = 0x000c, - PER_SOLARIS = 0x000d | STICKY_TIMEOUTS, - PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO, - PER_OSF4 = 0x000f, /* OSF/1 v4 */ - PER_HPUX = 0x0010, - PER_MASK = 0x00ff, -}; - -/* - * Return the base personality without flags. - */ -#define personality(pers) (pers & PER_MASK) - -/* this flag is uneffective under linux too, should be deleted */ -#ifndef MAP_DENYWRITE -#define MAP_DENYWRITE 0 -#endif - -/* should probably go in elf.h */ -#ifndef ELIBBAD -#define ELIBBAD 80 -#endif - -#ifdef TARGET_I386 - -#define ELF_PLATFORM get_elf_platform() - -static const char *get_elf_platform(void) -{ - static char elf_platform[] = "i386"; - int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL); - if (family > 6) - family = 6; - if (family >= 3) - elf_platform[1] = '0' + family; - return elf_platform; -} - -#define ELF_HWCAP get_elf_hwcap() - -static uint32_t get_elf_hwcap(void) -{ - X86CPU *cpu = X86_CPU(thread_cpu); - - return cpu->env.features[FEAT_1_EDX]; -} - -#ifdef TARGET_X86_64 -#define ELF_START_MMAP 0x2aaaaab000ULL -#define elf_check_arch(x) ( ((x) == ELF_ARCH) ) - -#define ELF_CLASS ELFCLASS64 -#define ELF_DATA ELFDATA2LSB -#define ELF_ARCH EM_X86_64 - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->rax = 0; - regs->rsp = infop->start_stack; - regs->rip = infop->entry; - if (bsd_type == target_freebsd) { - regs->rdi = infop->start_stack; - } -} - -#else - -#define ELF_START_MMAP 0x80000000 - -/* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) ( ((x) == EM_386) || ((x) == EM_486) ) - -/* - * These are used to set parameters in the core dumps. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . */ -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2LSB -#define ELF_ARCH EM_386 - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->esp = infop->start_stack; - regs->eip = infop->entry; - - /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program - starts %edx contains a pointer to a function which might be - registered using `atexit'. This provides a mean for the - dynamic linker to call DT_FINI functions for shared libraries - that have been loaded before the code runs. - - A value of 0 tells we have no such handler. */ - regs->edx = 0; -} -#endif - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 4096 - -#endif - -#ifdef TARGET_ARM - -#define ELF_START_MMAP 0x80000000 -#define elf_check_arch(x) ( (x) == EM_ARM ) - -#define ELF_CLASS ELFCLASS32 -#ifdef TARGET_WORDS_BIGENDIAN -#define ELF_DATA ELFDATA2MSB -#else -#define ELF_DATA ELFDATA2LSB -#endif -#define ELF_ARCH EM_ARM - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - abi_long stack = infop->start_stack; - memset(regs, 0, sizeof(*regs)); - regs->ARM_cpsr = 0x10; - if (infop->entry & 1) - regs->ARM_cpsr |= CPSR_T; - regs->ARM_pc = infop->entry & 0xfffffffe; - regs->ARM_sp = infop->start_stack; - /* FIXME - what to for failure of get_user()? */ - get_user_ual(regs->ARM_r2, stack + 8); /* envp */ - get_user_ual(regs->ARM_r1, stack + 4); /* envp */ - /* XXX: it seems that r0 is zeroed after ! */ - regs->ARM_r0 = 0; - /* For uClinux PIC binaries. */ - /* XXX: Linux does this only on ARM with no MMU (do we care ?) */ - regs->ARM_r10 = infop->start_data; -} - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 4096 - -enum -{ - ARM_HWCAP_ARM_SWP = 1 << 0, - ARM_HWCAP_ARM_HALF = 1 << 1, - ARM_HWCAP_ARM_THUMB = 1 << 2, - ARM_HWCAP_ARM_26BIT = 1 << 3, - ARM_HWCAP_ARM_FAST_MULT = 1 << 4, - ARM_HWCAP_ARM_FPA = 1 << 5, - ARM_HWCAP_ARM_VFP = 1 << 6, - ARM_HWCAP_ARM_EDSP = 1 << 7, -}; - -#define ELF_HWCAP (ARM_HWCAP_ARM_SWP | ARM_HWCAP_ARM_HALF \ - | ARM_HWCAP_ARM_THUMB | ARM_HWCAP_ARM_FAST_MULT \ - | ARM_HWCAP_ARM_FPA | ARM_HWCAP_ARM_VFP) - -#endif - -#ifdef TARGET_SPARC -#ifdef TARGET_SPARC64 - -#define ELF_START_MMAP 0x80000000 - -#ifndef TARGET_ABI32 -#define elf_check_arch(x) ( (x) == EM_SPARCV9 || (x) == EM_SPARC32PLUS ) -#else -#define elf_check_arch(x) ( (x) == EM_SPARC32PLUS || (x) == EM_SPARC ) -#endif - -#define ELF_CLASS ELFCLASS64 -#define ELF_DATA ELFDATA2MSB -#define ELF_ARCH EM_SPARCV9 - -#define STACK_BIAS 2047 - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ -#ifndef TARGET_ABI32 - regs->tstate = 0; -#endif - regs->pc = infop->entry; - regs->npc = regs->pc + 4; - regs->y = 0; -#ifdef TARGET_ABI32 - regs->u_regs[14] = infop->start_stack - 16 * 4; -#else - if (personality(infop->personality) == PER_LINUX32) - regs->u_regs[14] = infop->start_stack - 16 * 4; - else { - regs->u_regs[14] = infop->start_stack - 16 * 8 - STACK_BIAS; - if (bsd_type == target_freebsd) { - regs->u_regs[8] = infop->start_stack; - regs->u_regs[11] = infop->start_stack; - } - } -#endif -} - -#else -#define ELF_START_MMAP 0x80000000 - -#define elf_check_arch(x) ( (x) == EM_SPARC ) - -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2MSB -#define ELF_ARCH EM_SPARC - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->psr = 0; - regs->pc = infop->entry; - regs->npc = regs->pc + 4; - regs->y = 0; - regs->u_regs[14] = infop->start_stack - 16 * 4; -} - -#endif -#endif - -#ifdef TARGET_PPC - -#define ELF_START_MMAP 0x80000000 - -#if defined(TARGET_PPC64) && !defined(TARGET_ABI32) - -#define elf_check_arch(x) ( (x) == EM_PPC64 ) - -#define ELF_CLASS ELFCLASS64 - -#else - -#define elf_check_arch(x) ( (x) == EM_PPC ) - -#define ELF_CLASS ELFCLASS32 - -#endif - -#ifdef TARGET_WORDS_BIGENDIAN -#define ELF_DATA ELFDATA2MSB -#else -#define ELF_DATA ELFDATA2LSB -#endif -#define ELF_ARCH EM_PPC - -/* - * We need to put in some extra aux table entries to tell glibc what - * the cache block size is, so it can use the dcbz instruction safely. - */ -#define AT_DCACHEBSIZE 19 -#define AT_ICACHEBSIZE 20 -#define AT_UCACHEBSIZE 21 -/* A special ignored type value for PPC, for glibc compatibility. */ -#define AT_IGNOREPPC 22 -/* - * The requirements here are: - * - keep the final alignment of sp (sp & 0xf) - * - make sure the 32-bit value at the first 16 byte aligned position of - * AUXV is greater than 16 for glibc compatibility. - * AT_IGNOREPPC is used for that. - * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC, - * even if DLINFO_ARCH_ITEMS goes to zero or is undefined. - */ -#define DLINFO_ARCH_ITEMS 5 -#define ARCH_DLINFO \ -do { \ - NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20); \ - NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20); \ - NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ - /* \ - * Now handle glibc compatibility. \ - */ \ - NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ - NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ - } while (0) - -static inline void init_thread(struct target_pt_regs *_regs, struct image_info *infop) -{ - abi_ulong pos = infop->start_stack; - abi_ulong tmp; -#if defined(TARGET_PPC64) && !defined(TARGET_ABI32) - abi_ulong entry, toc; -#endif - - _regs->gpr[1] = infop->start_stack; -#if defined(TARGET_PPC64) && !defined(TARGET_ABI32) - get_user_u64(entry, infop->entry); - entry += infop->load_addr; - get_user_u64(toc, infop->entry + 8); - toc += infop->load_addr; - _regs->gpr[2] = toc; - infop->entry = entry; -#endif - _regs->nip = infop->entry; - /* Note that isn't exactly what regular kernel does - * but this is what the ABI wants and is needed to allow - * execution of PPC BSD programs. - */ - /* FIXME - what to for failure of get_user()? */ - get_user_ual(_regs->gpr[3], pos); - pos += sizeof(abi_ulong); - _regs->gpr[4] = pos; - for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong)) { - get_user_ual(tmp, pos); - } - _regs->gpr[5] = pos; -} - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 4096 - -#endif - -#ifdef TARGET_MIPS - -#define ELF_START_MMAP 0x80000000 - -#define elf_check_arch(x) ( (x) == EM_MIPS ) - -#ifdef TARGET_MIPS64 -#define ELF_CLASS ELFCLASS64 -#else -#define ELF_CLASS ELFCLASS32 -#endif -#ifdef TARGET_WORDS_BIGENDIAN -#define ELF_DATA ELFDATA2MSB -#else -#define ELF_DATA ELFDATA2LSB -#endif -#define ELF_ARCH EM_MIPS - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->cp0_status = 2 << CP0St_KSU; - regs->cp0_epc = infop->entry; - regs->regs[29] = infop->start_stack; -} - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 4096 - -#endif /* TARGET_MIPS */ - -#ifdef TARGET_SH4 - -#define ELF_START_MMAP 0x80000000 - -#define elf_check_arch(x) ( (x) == EM_SH ) - -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2LSB -#define ELF_ARCH EM_SH - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - /* Check other registers XXXXX */ - regs->pc = infop->entry; - regs->regs[15] = infop->start_stack; -} - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 4096 - -#endif - -#ifdef TARGET_CRIS - -#define ELF_START_MMAP 0x80000000 - -#define elf_check_arch(x) ( (x) == EM_CRIS ) - -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2LSB -#define ELF_ARCH EM_CRIS - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->erp = infop->entry; -} - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 8192 - -#endif - -#ifdef TARGET_M68K - -#define ELF_START_MMAP 0x80000000 - -#define elf_check_arch(x) ( (x) == EM_68K ) - -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2MSB -#define ELF_ARCH EM_68K - -/* ??? Does this need to do anything? -#define ELF_PLAT_INIT(_r) */ - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->usp = infop->start_stack; - regs->sr = 0; - regs->pc = infop->entry; -} - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 8192 - -#endif - -#ifdef TARGET_ALPHA - -#define ELF_START_MMAP (0x30000000000ULL) - -#define elf_check_arch(x) ( (x) == ELF_ARCH ) - -#define ELF_CLASS ELFCLASS64 -#define ELF_DATA ELFDATA2MSB -#define ELF_ARCH EM_ALPHA - -static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->pc = infop->entry; - regs->ps = 8; - regs->usp = infop->start_stack; - regs->unique = infop->start_data; /* ? */ - printf("Set unique value to " TARGET_FMT_lx " (" TARGET_FMT_lx ")\n", - regs->unique, infop->start_data); -} - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 8192 - -#endif /* TARGET_ALPHA */ - -#ifndef ELF_PLATFORM -#define ELF_PLATFORM (NULL) -#endif - -#ifndef ELF_HWCAP -#define ELF_HWCAP 0 -#endif - -#ifdef TARGET_ABI32 -#undef ELF_CLASS -#define ELF_CLASS ELFCLASS32 -#undef bswaptls -#define bswaptls(ptr) bswap32s(ptr) -#endif - -#include "elf.h" - -struct exec -{ - unsigned int a_info; /* Use macros N_MAGIC, etc for access */ - unsigned int a_text; /* length of text, in bytes */ - unsigned int a_data; /* length of data, in bytes */ - unsigned int a_bss; /* length of uninitialized data area, in bytes */ - unsigned int a_syms; /* length of symbol table data in file, in bytes */ - unsigned int a_entry; /* start address */ - unsigned int a_trsize; /* length of relocation info for text, in bytes */ - unsigned int a_drsize; /* length of relocation info for data, in bytes */ -}; - - -#define N_MAGIC(exec) ((exec).a_info & 0xffff) -#define OMAGIC 0407 -#define NMAGIC 0410 -#define ZMAGIC 0413 -#define QMAGIC 0314 +#include "qemu/osdep.h" -/* max code+data+bss space allocated to elf interpreter */ -#define INTERP_MAP_SIZE (32 * 1024 * 1024) +#include "qemu.h" +#include "disas/disas.h" +#include "qemu/path.h" -/* max code+data+bss+brk space allocated to ET_DYN executables */ -#define ET_DYN_MAP_SIZE (128 * 1024 * 1024) +static abi_ulong target_auxents; /* Where the AUX entries are in target */ +static size_t target_auxents_sz; /* Size of AUX entries including AT_NULL */ -/* Necessary parameters */ -#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE -#define TARGET_ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE-1)) -#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1)) +#include "target_arch_reg.h" +#include "target_os_elf.h" +#include "target_os_stack.h" +#include "target_os_thread.h" +#include "target_os_user.h" -#define INTERPRETER_NONE 0 -#define INTERPRETER_AOUT 1 -#define INTERPRETER_ELF 2 +abi_ulong target_stksiz; +abi_ulong target_stkbas; -#define DLINFO_ITEMS 12 +static int elf_core_dump(int signr, CPUArchState *env); +static int load_elf_sections(const struct elfhdr *hdr, struct elf_phdr *phdr, + int fd, abi_ulong rbase, abi_ulong *baddrp); -static inline void memcpy_fromfs(void * to, const void * from, unsigned long n) +static inline void memcpy_fromfs(void *to, const void *from, unsigned long n) { - memcpy(to, from, n); + memcpy(to, from, n); } -static int load_aout_interp(void * exptr, int interp_fd); - #ifdef BSWAP_NEEDED static void bswap_ehdr(struct elfhdr *ehdr) { - bswap16s(&ehdr->e_type); /* Object file type */ + bswap16s(&ehdr->e_type); /* Object file type */ bswap16s(&ehdr->e_machine); /* Architecture */ bswap32s(&ehdr->e_version); /* Object file version */ bswaptls(&ehdr->e_entry); /* Entry point virtual address */ @@ -565,37 +55,45 @@ static void bswap_ehdr(struct elfhdr *ehdr) bswaptls(&ehdr->e_shoff); /* Section header table file offset */ bswap32s(&ehdr->e_flags); /* Processor-specific flags */ bswap16s(&ehdr->e_ehsize); /* ELF header size in bytes */ - bswap16s(&ehdr->e_phentsize); /* Program header table entry size */ + bswap16s(&ehdr->e_phentsize); /* Program header table entry size */ bswap16s(&ehdr->e_phnum); /* Program header table entry count */ - bswap16s(&ehdr->e_shentsize); /* Section header table entry size */ + bswap16s(&ehdr->e_shentsize); /* Section header table entry size */ bswap16s(&ehdr->e_shnum); /* Section header table entry count */ - bswap16s(&ehdr->e_shstrndx); /* Section header string table index */ + bswap16s(&ehdr->e_shstrndx); /* Section header string table index */ } -static void bswap_phdr(struct elf_phdr *phdr) +static void bswap_phdr(struct elf_phdr *phdr, int phnum) { - bswap32s(&phdr->p_type); /* Segment type */ - bswaptls(&phdr->p_offset); /* Segment file offset */ - bswaptls(&phdr->p_vaddr); /* Segment virtual address */ - bswaptls(&phdr->p_paddr); /* Segment physical address */ - bswaptls(&phdr->p_filesz); /* Segment size in file */ - bswaptls(&phdr->p_memsz); /* Segment size in memory */ - bswap32s(&phdr->p_flags); /* Segment flags */ - bswaptls(&phdr->p_align); /* Segment alignment */ + int i; + + for (i = 0; i < phnum; i++, phdr++) { + bswap32s(&phdr->p_type); /* Segment type */ + bswap32s(&phdr->p_flags); /* Segment flags */ + bswaptls(&phdr->p_offset); /* Segment file offset */ + bswaptls(&phdr->p_vaddr); /* Segment virtual address */ + bswaptls(&phdr->p_paddr); /* Segment physical address */ + bswaptls(&phdr->p_filesz); /* Segment size in file */ + bswaptls(&phdr->p_memsz); /* Segment size in memory */ + bswaptls(&phdr->p_align); /* Segment alignment */ + } } -static void bswap_shdr(struct elf_shdr *shdr) +static void bswap_shdr(struct elf_shdr *shdr, int shnum) { - bswap32s(&shdr->sh_name); - bswap32s(&shdr->sh_type); - bswaptls(&shdr->sh_flags); - bswaptls(&shdr->sh_addr); - bswaptls(&shdr->sh_offset); - bswaptls(&shdr->sh_size); - bswap32s(&shdr->sh_link); - bswap32s(&shdr->sh_info); - bswaptls(&shdr->sh_addralign); - bswaptls(&shdr->sh_entsize); + int i; + + for (i = 0; i < shnum; i++, shdr++) { + bswap32s(&shdr->sh_name); + bswap32s(&shdr->sh_type); + bswaptls(&shdr->sh_flags); + bswaptls(&shdr->sh_addr); + bswaptls(&shdr->sh_offset); + bswaptls(&shdr->sh_size); + bswap32s(&shdr->sh_link); + bswap32s(&shdr->sh_info); + bswaptls(&shdr->sh_addralign); + bswaptls(&shdr->sh_entsize); + } } static void bswap_sym(struct elf_sym *sym) @@ -605,7 +103,25 @@ static void bswap_sym(struct elf_sym *sym) bswaptls(&sym->st_size); bswap16s(&sym->st_shndx); } -#endif + +static void bswap_note(struct elf_note *en) +{ + bswap32s(&en->n_namesz); + bswap32s(&en->n_descsz); + bswap32s(&en->n_type); +} + +#else /* ! BSWAP_NEEDED */ + +static void bswap_ehdr(struct elfhdr *ehdr) { } +static void bswap_phdr(struct elf_phdr *phdr, int phnum) { } +static void bswap_shdr(struct elf_shdr *shdr, int shnum) { } +static void bswap_sym(struct elf_sym *sym) { } +static void bswap_note(struct elf_note *en) { } + +#endif /* ! BSWAP_NEEDED */ + +#include "elfcore.c" /* * 'copy_elf_strings()' copies argument/envelope strings from user @@ -613,7 +129,7 @@ static void bswap_sym(struct elf_sym *sym) * to be put directly into the top of new user memory. * */ -static abi_ulong copy_elf_strings(int argc,char ** argv, void **page, +static abi_ulong copy_elf_strings(int argc, char **argv, void **page, abi_ulong p) { char *tmp, *tmp1, *pag = NULL; @@ -629,27 +145,29 @@ static abi_ulong copy_elf_strings(int argc,char ** argv, void **page, exit(-1); } tmp1 = tmp; - while (*tmp++); + while (*tmp++) { + continue; + } len = tmp - tmp1; if (p < len) { /* this shouldn't happen - 128kB */ - return 0; + return 0; } while (len) { --p; --tmp; --len; if (--offset < 0) { offset = p % TARGET_PAGE_SIZE; - pag = (char *)page[p/TARGET_PAGE_SIZE]; + pag = (char *)page[p / TARGET_PAGE_SIZE]; if (!pag) { pag = g_try_malloc0(TARGET_PAGE_SIZE); - page[p/TARGET_PAGE_SIZE] = pag; - if (!pag) + page[p / TARGET_PAGE_SIZE] = pag; + if (!pag) { return 0; + } } } if (len == 0 || offset == 0) { *(pag + offset) = *tmp; - } - else { + } else { int bytes_to_copy = (len > offset) ? offset : len; tmp -= bytes_to_copy; p -= bytes_to_copy; @@ -662,331 +180,174 @@ static abi_ulong copy_elf_strings(int argc,char ** argv, void **page, return p; } -static abi_ulong setup_arg_pages(abi_ulong p, struct linux_binprm *bprm, - struct image_info *info) +static void setup_arg_pages(struct bsd_binprm *bprm, struct image_info *info, + abi_ulong *stackp, abi_ulong *stringp) { - abi_ulong stack_base, size, error; - int i; + abi_ulong stack_base, size; + abi_long addr; - /* Create enough stack to hold everything. If we don't use - * it for args, we'll use it for something else... + /* + * Create enough stack to hold everything. If we don't use it for args, + * we'll use it for something else... */ - size = x86_stack_size; - if (size < MAX_ARG_PAGES*TARGET_PAGE_SIZE) - size = MAX_ARG_PAGES*TARGET_PAGE_SIZE; - error = target_mmap(0, - size + qemu_host_page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, - -1, 0); - if (error == -1) { + size = target_dflssiz; + stack_base = TARGET_USRSTACK - size; + addr = target_mmap(stack_base , size + qemu_host_page_size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (addr == -1) { perror("stk mmap"); exit(-1); } /* we reserve one extra page at the top of the stack as guard */ - target_mprotect(error + size, qemu_host_page_size, PROT_NONE); + target_mprotect(addr + size, qemu_host_page_size, PROT_NONE); - stack_base = error + size - MAX_ARG_PAGES*TARGET_PAGE_SIZE; - p += stack_base; + target_stksiz = size; + target_stkbas = addr; - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - if (bprm->page[i]) { - info->rss++; - /* FIXME - check return value of memcpy_to_target() for failure */ - memcpy_to_target(stack_base, bprm->page[i], TARGET_PAGE_SIZE); - g_free(bprm->page[i]); - } - stack_base += TARGET_PAGE_SIZE; + if (setup_initial_stack(bprm, stackp, stringp) != 0) { + perror("stk setup"); + exit(-1); } - return p; } static void set_brk(abi_ulong start, abi_ulong end) { - /* page-align the start and end addresses... */ - start = HOST_PAGE_ALIGN(start); - end = HOST_PAGE_ALIGN(end); - if (end <= start) - return; - if(target_mmap(start, end - start, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) { - perror("cannot mmap brk"); - exit(-1); - } + /* page-align the start and end addresses... */ + start = HOST_PAGE_ALIGN(start); + end = HOST_PAGE_ALIGN(end); + if (end <= start) { + return; + } + if (target_mmap(start, end - start, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) { + perror("cannot mmap brk"); + exit(-1); + } } -/* We need to explicitly zero any fractional pages after the data - section (i.e. bss). This would contain the junk from the file that - should not be in memory. */ +/* + * We need to explicitly zero any fractional pages after the data + * section (i.e. bss). This would contain the junk from the file that + * should not be in memory. + */ static void padzero(abi_ulong elf_bss, abi_ulong last_bss) { - abi_ulong nbyte; - - if (elf_bss >= last_bss) - return; - - /* XXX: this is really a hack : if the real host page size is - smaller than the target page size, some pages after the end - of the file may not be mapped. A better fix would be to - patch target_mmap(), but it is more complicated as the file - size must be known */ - if (qemu_real_host_page_size < qemu_host_page_size) { - abi_ulong end_addr, end_addr1; - end_addr1 = REAL_HOST_PAGE_ALIGN(elf_bss); - end_addr = HOST_PAGE_ALIGN(elf_bss); - if (end_addr1 < end_addr) { - mmap((void *)g2h_untagged(end_addr1), end_addr - end_addr1, - PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_FIXED|MAP_PRIVATE|MAP_ANON, -1, 0); - } - } - - nbyte = elf_bss & (qemu_host_page_size-1); - if (nbyte) { - nbyte = qemu_host_page_size - nbyte; - do { - /* FIXME - what to do if put_user() fails? */ - put_user_u8(0, elf_bss); - elf_bss++; - } while (--nbyte); - } -} + abi_ulong nbyte; + if (elf_bss >= last_bss) { + return; + } -static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, - struct elfhdr * exec, - abi_ulong load_addr, - abi_ulong load_bias, - abi_ulong interp_load_addr, int ibcs, - struct image_info *info) -{ - abi_ulong sp; - int size; - abi_ulong u_platform; - const char *k_platform; - const int n = sizeof(elf_addr_t); - - sp = p; - u_platform = 0; - k_platform = ELF_PLATFORM; - if (k_platform) { - size_t len = strlen(k_platform) + 1; - sp -= (len + n - 1) & ~(n - 1); - u_platform = sp; - /* FIXME - check return value of memcpy_to_target() for failure */ - memcpy_to_target(sp, k_platform, len); + /* + * XXX: this is really a hack : if the real host page size is + * smaller than the target page size, some pages after the end + * of the file may not be mapped. A better fix would be to + * patch target_mmap(), but it is more complicated as the file + * size must be known. + */ + if (qemu_real_host_page_size < qemu_host_page_size) { + abi_ulong end_addr, end_addr1; + end_addr1 = REAL_HOST_PAGE_ALIGN(elf_bss); + end_addr = HOST_PAGE_ALIGN(elf_bss); + if (end_addr1 < end_addr) { + mmap((void *)g2h_untagged(end_addr1), end_addr - end_addr1, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); } - /* - * Force 16 byte _final_ alignment here for generality. - */ - sp = sp &~ (abi_ulong)15; - size = (DLINFO_ITEMS + 1) * 2; - if (k_platform) - size += 2; -#ifdef DLINFO_ARCH_ITEMS - size += DLINFO_ARCH_ITEMS * 2; -#endif - size += envc + argc + 2; - size += (!ibcs ? 3 : 1); /* argc itself */ - size *= n; - if (size & 15) - sp -= 16 - (size & 15); - - /* This is correct because Linux defines - * elf_addr_t as Elf32_Off / Elf64_Off - */ -#define NEW_AUX_ENT(id, val) do { \ - sp -= n; put_user_ual(val, sp); \ - sp -= n; put_user_ual(id, sp); \ - } while(0) - - NEW_AUX_ENT (AT_NULL, 0); - - /* There must be exactly DLINFO_ITEMS entries here. */ - NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff)); - NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr))); - NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); - NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); - NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr)); - NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); - NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry); - NEW_AUX_ENT(AT_UID, (abi_ulong) getuid()); - NEW_AUX_ENT(AT_EUID, (abi_ulong) geteuid()); - NEW_AUX_ENT(AT_GID, (abi_ulong) getgid()); - NEW_AUX_ENT(AT_EGID, (abi_ulong) getegid()); - NEW_AUX_ENT(AT_HWCAP, (abi_ulong) ELF_HWCAP); - NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK)); - if (k_platform) - NEW_AUX_ENT(AT_PLATFORM, u_platform); -#ifdef ARCH_DLINFO - /* - * ARCH_DLINFO must come last so platform specific code can enforce - * special alignment requirements on the AUXV if necessary (eg. PPC). - */ - ARCH_DLINFO; -#endif -#undef NEW_AUX_ENT + } - sp = loader_build_argptr(envc, argc, sp, p, !ibcs); - return sp; + nbyte = elf_bss & (qemu_host_page_size - 1); + if (nbyte) { + nbyte = qemu_host_page_size - nbyte; + do { + /* FIXME - what to do if put_user() fails? */ + put_user_u8(0, elf_bss); + elf_bss++; + } while (--nbyte); + } } - -static abi_ulong load_elf_interp(struct elfhdr * interp_elf_ex, +static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex, int interpreter_fd, abi_ulong *interp_load_addr) { - struct elf_phdr *elf_phdata = NULL; - struct elf_phdr *eppnt; - abi_ulong load_addr = 0; - int load_addr_set = 0; - int retval; - abi_ulong last_bss, elf_bss; - abi_ulong error; - int i; + struct elf_phdr *elf_phdata = NULL; + abi_ulong rbase; + int retval; + abi_ulong baddr, error; - elf_bss = 0; - last_bss = 0; - error = 0; + error = 0; -#ifdef BSWAP_NEEDED - bswap_ehdr(interp_elf_ex); -#endif - /* First of all, some simple consistency checks */ - if ((interp_elf_ex->e_type != ET_EXEC && - interp_elf_ex->e_type != ET_DYN) || - !elf_check_arch(interp_elf_ex->e_machine)) { - return ~((abi_ulong)0UL); - } + bswap_ehdr(interp_elf_ex); + /* First of all, some simple consistency checks */ + if ((interp_elf_ex->e_type != ET_EXEC && interp_elf_ex->e_type != ET_DYN) || + !elf_check_arch(interp_elf_ex->e_machine)) { + return ~((abi_ulong)0UL); + } - /* Now read in all of the header information */ + /* Now read in all of the header information */ + if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > TARGET_PAGE_SIZE) { + return ~(abi_ulong)0UL; + } - if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > TARGET_PAGE_SIZE) - return ~(abi_ulong)0UL; + elf_phdata = (struct elf_phdr *) malloc(sizeof(struct elf_phdr) * + interp_elf_ex->e_phnum); - elf_phdata = (struct elf_phdr *) - malloc(sizeof(struct elf_phdr) * interp_elf_ex->e_phnum); + if (!elf_phdata) { + return ~((abi_ulong)0UL); + } - if (!elf_phdata) - return ~((abi_ulong)0UL); + /* + * If the size of this structure has changed, then punt, since + * we will be doing the wrong thing. + */ + if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr)) { + free(elf_phdata); + return ~((abi_ulong)0UL); + } + + retval = lseek(interpreter_fd, interp_elf_ex->e_phoff, SEEK_SET); + if (retval >= 0) { + retval = read(interpreter_fd, (char *) elf_phdata, + sizeof(struct elf_phdr) * interp_elf_ex->e_phnum); + } + if (retval < 0) { + perror("load_elf_interp"); + exit(-1); + free(elf_phdata); + return retval; + } + bswap_phdr(elf_phdata, interp_elf_ex->e_phnum); + rbase = 0; + if (interp_elf_ex->e_type == ET_DYN) { /* - * If the size of this structure has changed, then punt, since - * we will be doing the wrong thing. + * In order to avoid hardcoding the interpreter load + * address in qemu, we allocate a big enough memory zone. */ - if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr)) { - free(elf_phdata); - return ~((abi_ulong)0UL); - } - - retval = lseek(interpreter_fd, interp_elf_ex->e_phoff, SEEK_SET); - if(retval >= 0) { - retval = read(interpreter_fd, - (char *) elf_phdata, - sizeof(struct elf_phdr) * interp_elf_ex->e_phnum); - } - if (retval < 0) { - perror("load_elf_interp"); - exit(-1); - free (elf_phdata); - return retval; - } -#ifdef BSWAP_NEEDED - eppnt = elf_phdata; - for (i=0; ie_phnum; i++, eppnt++) { - bswap_phdr(eppnt); - } -#endif - - if (interp_elf_ex->e_type == ET_DYN) { - /* in order to avoid hardcoding the interpreter load - address in qemu, we allocate a big enough memory zone */ - error = target_mmap(0, INTERP_MAP_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANON, - -1, 0); - if (error == -1) { - perror("mmap"); - exit(-1); - } - load_addr = error; - load_addr_set = 1; + rbase = target_mmap(0, INTERP_MAP_SIZE, PROT_NONE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (rbase == -1) { + perror("mmap"); + exit(-1); } + } - eppnt = elf_phdata; - for(i=0; ie_phnum; i++, eppnt++) - if (eppnt->p_type == PT_LOAD) { - int elf_type = MAP_PRIVATE | MAP_DENYWRITE; - int elf_prot = 0; - abi_ulong vaddr = 0; - abi_ulong k; - - if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; - if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; - if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; - if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) { - elf_type |= MAP_FIXED; - vaddr = eppnt->p_vaddr; - } - error = target_mmap(load_addr+TARGET_ELF_PAGESTART(vaddr), - eppnt->p_filesz + TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr), - elf_prot, - elf_type, - interpreter_fd, - eppnt->p_offset - TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr)); - - if (error == -1) { - /* Real error */ - close(interpreter_fd); - free(elf_phdata); - return ~((abi_ulong)0UL); - } - - if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) { - load_addr = error; - load_addr_set = 1; - } - - /* - * Find the end of the file mapping for this phdr, and keep - * track of the largest address we see for this. - */ - k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; - if (k > elf_bss) elf_bss = k; - - /* - * Do the same thing for the memory mapping - between - * elf_bss and last_bss is the bss section. - */ - k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; - if (k > last_bss) last_bss = k; - } - - /* Now use mmap to map the library into memory. */ - - close(interpreter_fd); + error = load_elf_sections(interp_elf_ex, elf_phdata, interpreter_fd, rbase, + &baddr); + if (error != 0) { + perror("load_elf_sections"); + exit(-1); + } - /* - * Now fill out the bss section. First pad the last page up - * to the page boundary, and then perform a mmap to make sure - * that there are zeromapped pages up to and including the last - * bss page. - */ - padzero(elf_bss, last_bss); - elf_bss = TARGET_ELF_PAGESTART(elf_bss + qemu_host_page_size - 1); /* What we have mapped so far */ - - /* Map the last of the bss segment */ - if (last_bss > elf_bss) { - target_mmap(elf_bss, last_bss-elf_bss, - PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_FIXED|MAP_PRIVATE|MAP_ANON, -1, 0); - } - free(elf_phdata); + /* Now use mmap to map the library into memory. */ + close(interpreter_fd); + free(elf_phdata); - *interp_load_addr = load_addr; - return ((abi_ulong) interp_elf_ex->e_entry) + load_addr; + *interp_load_addr = baddr; + return ((abi_ulong) interp_elf_ex->e_entry) + rbase; } static int symfind(const void *s0, const void *s1) @@ -1010,7 +371,7 @@ static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) struct elf_sym *syms = s->disas_symtab.elf64; #endif - // binary search + /* binary search */ struct elf_sym *sym; sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms), symfind); @@ -1026,9 +387,8 @@ static int symcmp(const void *s0, const void *s1) { struct elf_sym *sym0 = (struct elf_sym *)s0; struct elf_sym *sym1 = (struct elf_sym *)s1; - return (sym0->st_value < sym1->st_value) - ? -1 - : ((sym0->st_value > sym1->st_value) ? 1 : 0); + return (sym0->st_value < sym1->st_value) ? -1 : + ((sym0->st_value > sym1->st_value) ? 1 : 0); } /* Best attempt to load symbols from this ELF object. */ @@ -1042,27 +402,24 @@ static void load_symbols(struct elfhdr *hdr, int fd) lseek(fd, hdr->e_shoff, SEEK_SET); for (i = 0; i < hdr->e_shnum; i++) { - if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr)) + if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr)) { return; -#ifdef BSWAP_NEEDED - bswap_shdr(&sechdr); -#endif + } + bswap_shdr(&sechdr, 1); if (sechdr.sh_type == SHT_SYMTAB) { symtab = sechdr; - lseek(fd, hdr->e_shoff - + sizeof(sechdr) * sechdr.sh_link, SEEK_SET); - if (read(fd, &strtab, sizeof(strtab)) - != sizeof(strtab)) + lseek(fd, hdr->e_shoff + sizeof(sechdr) * sechdr.sh_link, + SEEK_SET); + if (read(fd, &strtab, sizeof(strtab)) != sizeof(strtab)) { return; -#ifdef BSWAP_NEEDED - bswap_shdr(&strtab); -#endif + } + bswap_shdr(&strtab, 1); goto found; } } return; /* Shouldn't happen... */ - found: +found: /* Now know where the strtab and symtab are. Snarf them. */ s = malloc(sizeof(*s)); syms = malloc(symtab.sh_size); @@ -1089,10 +446,8 @@ static void load_symbols(struct elfhdr *hdr, int fd) i = 0; while (i < nsyms) { -#ifdef BSWAP_NEEDED bswap_sym(syms + i); -#endif - // Throw away entries which we do not need. + /* Throw away entries which we do not need. */ if (syms[i].st_shndx == SHN_UNDEF || syms[i].st_shndx >= SHN_LORESERVE || ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) { @@ -1102,17 +457,19 @@ static void load_symbols(struct elfhdr *hdr, int fd) } continue; } -#if defined(TARGET_ARM) || defined (TARGET_MIPS) +#if defined(TARGET_ARM) || defined(TARGET_MIPS) /* The bottom address bit marks a Thumb or MIPS16 symbol. */ syms[i].st_value &= ~(target_ulong)1; #endif i++; } - /* Attempt to free the storage associated with the local symbols - that we threw away. Whether or not this has any effect on the - memory allocation depends on the malloc implementation and how - many symbols we managed to discard. */ + /* + * Attempt to free the storage associated with the local symbols + * that we threw away. Whether or not this has any effect on the + * memory allocation depends on the malloc implementation and how + * many symbols we managed to discard. + */ new_syms = realloc(syms, nsyms * sizeof(*syms)); if (new_syms == NULL) { free(s); @@ -1143,142 +500,191 @@ static void load_symbols(struct elfhdr *hdr, int fd) syminfos = s; } -int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, - struct image_info * info) +/* Check the elf header and see if this a target elf binary. */ +int is_target_elf_binary(int fd) +{ + uint8_t buf[128]; + struct elfhdr elf_ex; + + if (lseek(fd, 0L, SEEK_SET) < 0) { + return 0; + } + if (read(fd, buf, sizeof(buf)) < 0) { + return 0; + } + + elf_ex = *((struct elfhdr *)buf); + bswap_ehdr(&elf_ex); + + if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || + (!elf_check_arch(elf_ex.e_machine))) { + return 0; + } else { + return 1; + } +} + +static int +load_elf_sections(const struct elfhdr *hdr, struct elf_phdr *phdr, int fd, + abi_ulong rbase, abi_ulong *baddrp) +{ + struct elf_phdr *elf_ppnt; + abi_ulong baddr; + int i; + bool first; + + /* + * Now we do a little grungy work by mmaping the ELF image into + * the correct location in memory. At this point, we assume that + * the image should be loaded at fixed address, not at a variable + * address. + */ + first = true; + for (i = 0, elf_ppnt = phdr; i < hdr->e_phnum; i++, elf_ppnt++) { + int elf_prot = 0; + abi_ulong error; + + /* XXX Skip memsz == 0. */ + if (elf_ppnt->p_type != PT_LOAD) { + continue; + } + + if (elf_ppnt->p_flags & PF_R) { + elf_prot |= PROT_READ; + } + if (elf_ppnt->p_flags & PF_W) { + elf_prot |= PROT_WRITE; + } + if (elf_ppnt->p_flags & PF_X) { + elf_prot |= PROT_EXEC; + } + + error = target_mmap(TARGET_ELF_PAGESTART(rbase + elf_ppnt->p_vaddr), + (elf_ppnt->p_filesz + + TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)), + elf_prot, + (MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE), + fd, + (elf_ppnt->p_offset - + TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr))); + if (error == -1) { + perror("mmap"); + exit(-1); + } else if (elf_ppnt->p_memsz != elf_ppnt->p_filesz) { + abi_ulong start_bss, end_bss; + + start_bss = rbase + elf_ppnt->p_vaddr + elf_ppnt->p_filesz; + end_bss = rbase + elf_ppnt->p_vaddr + elf_ppnt->p_memsz; + + /* + * Calling set_brk effectively mmaps the pages that we need for the + * bss and break sections. + */ + set_brk(start_bss, end_bss); + padzero(start_bss, end_bss); + } + + if (first) { + baddr = TARGET_ELF_PAGESTART(rbase + elf_ppnt->p_vaddr); + first = false; + } + } + + if (baddrp != NULL) { + *baddrp = baddr; + } + return 0; +} + +int load_elf_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs, + struct image_info *info) { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; - struct exec interp_ex; int interpreter_fd = -1; /* avoid warning */ - abi_ulong load_addr, load_bias; - int load_addr_set = 0; - unsigned int interpreter_type = INTERPRETER_NONE; - unsigned char ibcs2_interpreter; + abi_ulong load_addr; int i; - struct elf_phdr * elf_ppnt; + struct elf_phdr *elf_ppnt; struct elf_phdr *elf_phdata; - abi_ulong elf_bss, k, elf_brk; - int retval; - char * elf_interpreter; - abi_ulong elf_entry, interp_load_addr = 0; - abi_ulong start_code, end_code, start_data, end_data; + abi_ulong elf_brk; + int error, retval; + char *elf_interpreter; + abi_ulong baddr, elf_entry, et_dyn_addr, interp_load_addr = 0; abi_ulong reloc_func_desc = 0; -#ifdef LOW_ELF_STACK - abi_ulong elf_stack = ~((abi_ulong)0UL); -#endif - char passed_fileno[6]; - ibcs2_interpreter = 0; load_addr = 0; - load_bias = 0; elf_ex = *((struct elfhdr *) bprm->buf); /* exec-header */ -#ifdef BSWAP_NEEDED bswap_ehdr(&elf_ex); -#endif /* First of all, some simple consistency checks */ if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || - (! elf_check_arch(elf_ex.e_machine))) { + (!elf_check_arch(elf_ex.e_machine))) { return -ENOEXEC; } bprm->p = copy_elf_strings(1, &bprm->filename, bprm->page, bprm->p); - bprm->p = copy_elf_strings(bprm->envc,bprm->envp,bprm->page,bprm->p); - bprm->p = copy_elf_strings(bprm->argc,bprm->argv,bprm->page,bprm->p); + bprm->p = copy_elf_strings(bprm->envc, bprm->envp, bprm->page, bprm->p); + bprm->p = copy_elf_strings(bprm->argc, bprm->argv, bprm->page, bprm->p); if (!bprm->p) { retval = -E2BIG; } /* Now read in all of the header information */ - elf_phdata = (struct elf_phdr *)malloc(elf_ex.e_phentsize*elf_ex.e_phnum); + elf_phdata = (struct elf_phdr *)malloc(elf_ex.e_phentsize * elf_ex.e_phnum); if (elf_phdata == NULL) { return -ENOMEM; } retval = lseek(bprm->fd, elf_ex.e_phoff, SEEK_SET); - if(retval > 0) { - retval = read(bprm->fd, (char *) elf_phdata, + if (retval > 0) { + retval = read(bprm->fd, (char *)elf_phdata, elf_ex.e_phentsize * elf_ex.e_phnum); } if (retval < 0) { perror("load_elf_binary"); exit(-1); - free (elf_phdata); + free(elf_phdata); return -errno; } -#ifdef BSWAP_NEEDED - elf_ppnt = elf_phdata; - for (i=0; ip_type == PT_INTERP) { - if ( elf_interpreter != NULL ) - { - free (elf_phdata); + if (elf_interpreter != NULL) { + free(elf_phdata); free(elf_interpreter); close(bprm->fd); return -EINVAL; } - /* This is the program interpreter used for - * shared libraries - for now assume that this - * is an a.out format binary - */ - elf_interpreter = (char *)malloc(elf_ppnt->p_filesz); - if (elf_interpreter == NULL) { - free (elf_phdata); + free(elf_phdata); close(bprm->fd); return -ENOMEM; } retval = lseek(bprm->fd, elf_ppnt->p_offset, SEEK_SET); - if(retval >= 0) { + if (retval >= 0) { retval = read(bprm->fd, elf_interpreter, elf_ppnt->p_filesz); } - if(retval < 0) { + if (retval < 0) { perror("load_elf_binary2"); exit(-1); } - /* If the program interpreter is one of these two, - then assume an iBCS2 image. Otherwise assume - a native linux image. */ - - /* JRP - Need to add X86 lib dir stuff here... */ - - if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 || - strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0) { - ibcs2_interpreter = 1; - } - -#if 0 - printf("Using ELF interpreter %s\n", path(elf_interpreter)); -#endif if (retval >= 0) { retval = open(path(elf_interpreter), O_RDONLY); - if(retval >= 0) { + if (retval >= 0) { interpreter_fd = retval; - } - else { + } else { perror(elf_interpreter); exit(-1); /* retval = -errno; */ @@ -1287,18 +693,17 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, if (retval >= 0) { retval = lseek(interpreter_fd, 0, SEEK_SET); - if(retval >= 0) { - retval = read(interpreter_fd,bprm->buf,128); + if (retval >= 0) { + retval = read(interpreter_fd, bprm->buf, 128); } } if (retval >= 0) { - interp_ex = *((struct exec *) bprm->buf); /* aout exec-header */ - interp_elf_ex = *((struct elfhdr *) bprm->buf); /* elf exec-header */ + interp_elf_ex = *((struct elfhdr *) bprm->buf); } if (retval < 0) { perror("load_elf_binary3"); exit(-1); - free (elf_phdata); + free(elf_phdata); free(elf_interpreter); close(bprm->fd); return retval; @@ -1308,21 +713,9 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, } /* Some simple consistency checks for the interpreter */ - if (elf_interpreter){ - interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT; - - /* Now figure out which format our binary is */ - if ((N_MAGIC(interp_ex) != OMAGIC) && (N_MAGIC(interp_ex) != ZMAGIC) && - (N_MAGIC(interp_ex) != QMAGIC)) { - interpreter_type = INTERPRETER_ELF; - } - + if (elf_interpreter) { if (interp_elf_ex.e_ident[0] != 0x7f || - strncmp((char *)&interp_elf_ex.e_ident[1], "ELF",3) != 0) { - interpreter_type &= ~INTERPRETER_ELF; - } - - if (!interpreter_type) { + strncmp((char *)&interp_elf_ex.e_ident[1], "ELF", 3) != 0) { free(elf_interpreter); free(elf_phdata); close(bprm->fd); @@ -1330,27 +723,15 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, } } - /* OK, we are done with that, now set up the arg stuff, - and then start this sucker up */ - - { - char * passed_p; - - if (interpreter_type == INTERPRETER_AOUT) { - snprintf(passed_fileno, sizeof(passed_fileno), "%d", bprm->fd); - passed_p = passed_fileno; - - if (elf_interpreter) { - bprm->p = copy_elf_strings(1,&passed_p,bprm->page,bprm->p); - bprm->argc++; - } - } - if (!bprm->p) { - free(elf_interpreter); - free (elf_phdata); - close(bprm->fd); - return -E2BIG; - } + /* + * OK, we are done with that, now set up the arg stuff, and then start this + * sucker up + */ + if (!bprm->p) { + free(elf_interpreter); + free(elf_phdata); + close(bprm->fd); + return -E2BIG; } /* OK, This is the point of no return */ @@ -1360,129 +741,49 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, info->mmap = 0; elf_entry = (abi_ulong) elf_ex.e_entry; - /* - * In case where user has not explicitly set the guest_base, we - * probe here that should we set it automatically. - */ - if (!have_guest_base) { - /* - * Go through ELF program header table and find out whether - * any of the segments drop below our current mmap_min_addr and - * in that case set guest_base to corresponding address. - */ - for (i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; - i++, elf_ppnt++) { - if (elf_ppnt->p_type != PT_LOAD) - continue; - if (HOST_PAGE_ALIGN(elf_ppnt->p_vaddr) < mmap_min_addr) { - guest_base = HOST_PAGE_ALIGN(mmap_min_addr); - break; - } + /* XXX Join this with PT_INTERP search? */ + baddr = 0; + for (i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) { + if (elf_ppnt->p_type != PT_LOAD) { + continue; } + baddr = elf_ppnt->p_vaddr; + break; } - /* Do this so that we can load the interpreter, if need be. We will - change some of these later */ - info->rss = 0; - bprm->p = setup_arg_pages(bprm->p, bprm, info); - info->start_stack = bprm->p; + et_dyn_addr = 0; + if (elf_ex.e_type == ET_DYN && baddr == 0) { + et_dyn_addr = ELF_ET_DYN_LOAD_ADDR; + } - /* Now we do a little grungy work by mmaping the ELF image into - * the correct location in memory. At this point, we assume that - * the image should be loaded at fixed address, not at a variable - * address. + /* + * Do this so that we can load the interpreter, if need be. We will + * change some of these later */ + info->rss = 0; + setup_arg_pages(bprm, info, &bprm->p, &bprm->stringp); + info->start_stack = bprm->p; - for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) { - int elf_prot = 0; - int elf_flags = 0; - abi_ulong error; + info->elf_flags = elf_ex.e_flags; - if (elf_ppnt->p_type != PT_LOAD) + error = load_elf_sections(&elf_ex, elf_phdata, bprm->fd, et_dyn_addr, + &load_addr); + for (i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) { + if (elf_ppnt->p_type != PT_LOAD) { continue; - - if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ; - if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; - if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; - elf_flags = MAP_PRIVATE | MAP_DENYWRITE; - if (elf_ex.e_type == ET_EXEC || load_addr_set) { - elf_flags |= MAP_FIXED; - } else if (elf_ex.e_type == ET_DYN) { - /* Try and get dynamic programs out of the way of the default mmap - base, as well as whatever program they might try to exec. This - is because the brk will follow the loader, and is not movable. */ - /* NOTE: for qemu, we do a big mmap to get enough space - without hardcoding any address */ - error = target_mmap(0, ET_DYN_MAP_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANON, - -1, 0); - if (error == -1) { - perror("mmap"); - exit(-1); - } - load_bias = TARGET_ELF_PAGESTART(error - elf_ppnt->p_vaddr); } - - error = target_mmap(TARGET_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr), - (elf_ppnt->p_filesz + - TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)), - elf_prot, - (MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE), - bprm->fd, - (elf_ppnt->p_offset - - TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr))); - if (error == -1) { - perror("mmap"); - exit(-1); - } - -#ifdef LOW_ELF_STACK - if (TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr) < elf_stack) - elf_stack = TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr); -#endif - - if (!load_addr_set) { - load_addr_set = 1; - load_addr = elf_ppnt->p_vaddr - elf_ppnt->p_offset; - if (elf_ex.e_type == ET_DYN) { - load_bias += error - - TARGET_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr); - load_addr += load_bias; - reloc_func_desc = load_bias; - } - } - k = elf_ppnt->p_vaddr; - if (k < start_code) - start_code = k; - if (start_data < k) - start_data = k; - k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; - if (k > elf_bss) - elf_bss = k; - if ((elf_ppnt->p_flags & PF_X) && end_code < k) - end_code = k; - if (end_data < k) - end_data = k; - k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; - if (k > elf_brk) elf_brk = k; + if (elf_ppnt->p_memsz > elf_ppnt->p_filesz) + elf_brk = MAX(elf_brk, et_dyn_addr + elf_ppnt->p_vaddr + + elf_ppnt->p_memsz); + } + if (error != 0) { + perror("load_elf_sections"); + exit(-1); } - - elf_entry += load_bias; - elf_bss += load_bias; - elf_brk += load_bias; - start_code += load_bias; - end_code += load_bias; - start_data += load_bias; - end_data += load_bias; if (elf_interpreter) { - if (interpreter_type & 1) { - elf_entry = load_aout_interp(&interp_ex, interpreter_fd); - } - else if (interpreter_type & 2) { - elf_entry = load_elf_interp(&interp_elf_ex, interpreter_fd, - &interp_load_addr); - } + elf_entry = load_elf_interp(&interp_elf_ex, interpreter_fd, + &interp_load_addr); reloc_func_desc = interp_load_addr; close(interpreter_fd); @@ -1494,72 +795,40 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, exit(-1); return 0; } + } else { + interp_load_addr = et_dyn_addr; + elf_entry += interp_load_addr; } free(elf_phdata); - if (qemu_log_enabled()) + if (qemu_log_enabled()) { load_symbols(&elf_ex, bprm->fd); + } - if (interpreter_type != INTERPRETER_AOUT) close(bprm->fd); - info->personality = (ibcs2_interpreter ? PER_SVR4 : PER_LINUX); + close(bprm->fd); -#ifdef LOW_ELF_STACK - info->start_stack = bprm->p = elf_stack - 4; -#endif - bprm->p = create_elf_tables(bprm->p, - bprm->argc, - bprm->envc, - &elf_ex, - load_addr, load_bias, - interp_load_addr, - (interpreter_type == INTERPRETER_AOUT ? 0 : 1), - info); + bprm->p = target_create_elf_tables(bprm->p, bprm->argc, bprm->envc, + bprm->stringp, &elf_ex, load_addr, + et_dyn_addr, interp_load_addr, info); info->load_addr = reloc_func_desc; info->start_brk = info->brk = elf_brk; - info->end_code = end_code; - info->start_code = start_code; - info->start_data = start_data; - info->end_data = end_data; info->start_stack = bprm->p; + info->load_bias = 0; - /* Calling set_brk effectively mmaps the pages that we need for the bss and break - sections */ - set_brk(elf_bss, elf_brk); - - padzero(elf_bss, elf_brk); + info->entry = elf_entry; -#if 0 - printf("(start_brk) %x\n" , info->start_brk); - printf("(end_code) %x\n" , info->end_code); - printf("(start_code) %x\n" , info->start_code); - printf("(end_data) %x\n" , info->end_data); - printf("(start_stack) %x\n" , info->start_stack); - printf("(brk) %x\n" , info->brk); +#ifdef USE_ELF_CORE_DUMP + bprm->core_dump = &elf_core_dump; +#else + bprm->core_dump = NULL; #endif - if ( info->personality == PER_SVR4 ) - { - /* Why this, you ask??? Well SVr4 maps page 0 as read-only, - and some applications "depend" upon this behavior. - Since we do not have the power to recompile these, we - emulate the SVr4 behavior. Sigh. */ - target_mmap(0, qemu_host_page_size, PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE, -1, 0); - } - - info->entry = elf_entry; - return 0; } -static int load_aout_interp(void * exptr, int interp_fd) -{ - printf("a.out interpreter not yet supported\n"); - return(0); -} - void do_init_thread(struct target_pt_regs *regs, struct image_info *infop) { - init_thread(regs, infop); + + target_thread_init(regs, infop); } diff --git a/bsd-user/errno_defs.h b/bsd-user/errno_defs.h index 1efa502a129..832671354fd 100644 --- a/bsd-user/errno_defs.h +++ b/bsd-user/errno_defs.h @@ -1,6 +1,3 @@ -/* $OpenBSD: errno.h,v 1.20 2007/09/03 14:37:52 millert Exp $ */ -/* $NetBSD: errno.h,v 1.10 1996/01/20 01:33:53 jtc Exp $ */ - /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -37,6 +34,9 @@ * @(#)errno.h 8.5 (Berkeley) 1/21/94 */ +#ifndef _ERRNO_DEFS_H_ +#define _ERRNO_DEFS_H_ + #define TARGET_EPERM 1 /* Operation not permitted */ #define TARGET_ENOENT 2 /* No such file or directory */ #define TARGET_ESRCH 3 /* No such process */ @@ -147,3 +147,10 @@ #define TARGET_EIDRM 89 /* Identifier removed */ #define TARGET_ENOMSG 90 /* No message of desired type */ #define TARGET_ELAST 90 /* Must be equal largest errno */ + +/* Internal errors: */ +#define TARGET_EJUSTRETURN 254 /* Just return without modifing regs */ +#define TARGET_ERESTART 255 /* Restart syscall */ +#define TARGET_ERESTARTSYS TARGET_ERESTART /* Linux compat */ + +#endif /* ! _ERRNO_DEFS_H_ */ diff --git a/bsd-user/freebsd/host-os.h b/bsd-user/freebsd/host-os.h new file mode 100644 index 00000000000..dfb8344b7b6 --- /dev/null +++ b/bsd-user/freebsd/host-os.h @@ -0,0 +1,25 @@ +/* + * FreeBSD host dependent code and definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _HOST_OS_H_ +#define _HOST_OS_H_ + +#define HOST_DEFAULT_BSD_TYPE target_freebsd + +#endif /*!_HOST_OS_H_ */ diff --git a/bsd-user/freebsd/meson.build b/bsd-user/freebsd/meson.build new file mode 100644 index 00000000000..4b69cca7b90 --- /dev/null +++ b/bsd-user/freebsd/meson.build @@ -0,0 +1,3 @@ +bsd_user_ss.add(files( + 'os-sys.c', +)) diff --git a/bsd-user/freebsd/os-sys.c b/bsd-user/freebsd/os-sys.c new file mode 100644 index 00000000000..309e27b9d63 --- /dev/null +++ b/bsd-user/freebsd/os-sys.c @@ -0,0 +1,27 @@ +/* + * FreeBSD sysctl() and sysarch() system call emulation + * + * Copyright (c) 2013-15 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu.h" +#include "target_arch_sysarch.h" + +/* sysarch() is architecture dependent. */ +abi_long do_freebsd_sysarch(void *cpu_env, abi_long arg1, abi_long arg2) +{ + return do_freebsd_arch_sysarch(cpu_env, arg1, arg2); +} diff --git a/bsd-user/freebsd/strace.list b/bsd-user/freebsd/strace.list index b01b5f36e88..275d2dbe274 100644 --- a/bsd-user/freebsd/strace.list +++ b/bsd-user/freebsd/strace.list @@ -33,10 +33,6 @@ { TARGET_FREEBSD_NR___syscall, "__syscall", NULL, NULL, NULL }, { TARGET_FREEBSD_NR___sysctl, "__sysctl", NULL, print_sysctl, NULL }, { TARGET_FREEBSD_NR__umtx_op, "_umtx_op", "%s(%#x, %d, %d, %#x, %#x)", NULL, NULL }, -#if defined(__FreeBSD_version) && __FreeBSD_version < 1000000 -{ TARGET_FREEBSD_NR__umtx_lock, "__umtx_lock", NULL, NULL, NULL }, -{ TARGET_FREEBSD_NR__umtx_unlock, "__umtx_unlock", NULL, NULL, NULL }, -#endif { TARGET_FREEBSD_NR_accept, "accept", "%s(%d,%#x,%#x)", NULL, NULL }, { TARGET_FREEBSD_NR_accept4, "accept4", "%s(%d,%d,%#x,%#x)", NULL, NULL }, { TARGET_FREEBSD_NR_access, "access", "%s(\"%s\",%#o)", NULL, NULL }, @@ -49,10 +45,6 @@ { TARGET_FREEBSD_NR_cap_fcntls_get, "cap_fcntls_get", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_cap_fcntls_limit, "cap_fcntls_limit", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_cap_getmode, "cap_getmode", NULL, NULL, NULL }, -#if defined(__FreeBSD_version) && __FreeBSD_version < 1000000 -{ TARGET_FREEBSD_NR_cap_getrights, "cap_getrights", NULL, NULL, NULL }, -{ TARGET_FREEBSD_NR_cap_new, "cap_new", NULL, NULL, NULL }, -#endif { TARGET_FREEBSD_NR_cap_ioctls_get, "cap_ioctls_get", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_cap_ioctls_limit, "cap_ioctls_limit", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_cap_rights_limit, "cap_rights_limit", NULL, NULL, NULL }, @@ -146,9 +138,6 @@ { TARGET_FREEBSD_NR_freebsd11_kevent, "freebsd11_kevent", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_kevent, "kevent", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_kill, "kill", NULL, NULL, NULL }, -#if defined(__FreeBSD_version) && __FreeBSD_version < 1000000 -{ TARGET_FREEBSD_NR_killpg, "killpg", NULL, NULL, NULL }, -#endif { TARGET_FREEBSD_NR_kqueue, "kqueue", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_ktrace, "ktrace", NULL, NULL, NULL }, { TARGET_FREEBSD_NR_lchown, "lchown", NULL, NULL, NULL }, diff --git a/bsd-user/freebsd/target_os_elf.h b/bsd-user/freebsd/target_os_elf.h new file mode 100644 index 00000000000..e5ac8e8e501 --- /dev/null +++ b/bsd-user/freebsd/target_os_elf.h @@ -0,0 +1,137 @@ +/* + * freebsd ELF definitions + * + * Copyright (c) 2013-15 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_OS_ELF_H_ +#define _TARGET_OS_ELF_H_ + +#include "target_arch_elf.h" +#include "elf.h" + +#define bsd_get_ncpu() 1 /* until we pull in bsd-proc.[hc] */ + +/* this flag is uneffective under linux too, should be deleted */ +#ifndef MAP_DENYWRITE +#define MAP_DENYWRITE 0 +#endif + +/* should probably go in elf.h */ +#ifndef ELIBBAD +#define ELIBBAD 80 +#endif + +#ifndef ELF_PLATFORM +#define ELF_PLATFORM (NULL) +#endif + +/* XXX Look at the other conflicting AT_* values. */ +#define FREEBSD_AT_NCPUS 19 +#define FREEBSD_AT_HWCAP 25 +#define FREEBSD_AT_HWCAP2 26 + +#ifdef TARGET_ABI32 +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 +#undef bswaptls +#define bswaptls(ptr) bswap32s(ptr) +#endif + +/* max code+data+bss space allocated to elf interpreter */ +#define INTERP_MAP_SIZE (32 * 1024 * 1024) + +/* max code+data+bss+brk space allocated to ET_DYN executables */ +#define ET_DYN_MAP_SIZE (128 * 1024 * 1024) + +/* Necessary parameters */ +#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE +#define TARGET_ELF_PAGESTART(_v) ((_v) & \ + ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE - 1)) +#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1)) + +#define DLINFO_ITEMS 14 + +static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc, + abi_ulong stringp, + struct elfhdr *exec, + abi_ulong load_addr, + abi_ulong load_bias, + abi_ulong interp_load_addr, + struct image_info *info) +{ + abi_ulong features, sp; + int size; + const int n = sizeof(elf_addr_t); + + target_auxents_sz = 0; + sp = p; + /* + * Force 16 byte _final_ alignment here for generality. + */ + sp = sp & ~(abi_ulong)15; + size = (DLINFO_ITEMS + 1) * 2; + size += envc + argc + 2; + size += 1; /* argc itself */ + size *= n; + if (size & 15) { + sp -= 16 - (size & 15); + } + + /* + * FreeBSD defines elf_addr_t as Elf32_Off / Elf64_Off + */ +#define NEW_AUX_ENT(id, val) do { \ + sp -= n; put_user_ual(val, sp); \ + sp -= n; put_user_ual(id, sp); \ + target_auxents_sz += 2 * n; \ + } while (0) + + NEW_AUX_ENT(AT_NULL, 0); + + /* There must be exactly DLINFO_ITEMS entries here. */ + NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff)); + NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof(struct elf_phdr))); + NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); + NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); + NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr)); + NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); + NEW_AUX_ENT(FREEBSD_AT_NCPUS, (abi_ulong)bsd_get_ncpu()); + NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry); + features = ELF_HWCAP; + NEW_AUX_ENT(FREEBSD_AT_HWCAP, features); +#ifdef ELF_HWCAP2 + features = ELF_HWCAP2; + NEW_AUX_ENT(FREEBSD_AT_HWCAP2, features); +#endif + NEW_AUX_ENT(AT_UID, (abi_ulong)getuid()); + NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid()); + NEW_AUX_ENT(AT_GID, (abi_ulong)getgid()); + NEW_AUX_ENT(AT_EGID, (abi_ulong)getegid()); + target_auxents = sp; /* Note where the aux entries are in the target */ +#ifdef ARCH_DLINFO + /* + * ARCH_DLINFO must come last so platform specific code can enforce + * special alignment requirements on the AUXV if necessary (eg. PPC). + */ + ARCH_DLINFO; +#endif +#undef NEW_AUX_ENT + + sp = loader_build_argptr(envc, argc, sp, stringp); + return sp; +} + +#endif /* _TARGET_OS_ELF_H_ */ diff --git a/bsd-user/freebsd/target_os_siginfo.h b/bsd-user/freebsd/target_os_siginfo.h new file mode 100644 index 00000000000..84944faa4d3 --- /dev/null +++ b/bsd-user/freebsd/target_os_siginfo.h @@ -0,0 +1,145 @@ +/* + * FreeBSD siginfo related definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_OS_SIGINFO_H_ +#define _TARGET_OS_SIGINFO_H_ + +#define TARGET_NSIG 128 +#define TARGET_NSIG_BPW (sizeof(uint32_t) * 8) +#define TARGET_NSIG_WORDS (TARGET_NSIG / TARGET_NSIG_BPW) + +/* this struct defines a stack used during syscall handling */ +typedef struct target_sigaltstack { + abi_long ss_sp; + abi_ulong ss_size; + abi_long ss_flags; +} target_stack_t; + +typedef struct { + uint32_t __bits[TARGET_NSIG_WORDS]; +} target_sigset_t; + +struct target_sigaction { + abi_ulong _sa_handler; + int32_t sa_flags; + target_sigset_t sa_mask; +}; + +typedef union target_sigval { + int32_t sival_int; + abi_ulong sival_ptr; + int32_t sigval_int; + abi_ulong sigval_ptr; +} target_sigval_t; + +typedef struct target_siginfo { + int32_t si_signo; /* signal number */ + int32_t si_errno; /* errno association */ + int32_t si_code; /* signal code */ + int32_t si_pid; /* sending process */ + int32_t si_uid; /* sender's ruid */ + int32_t si_status; /* exit value */ + abi_ulong si_addr; /* faulting instruction */ + union target_sigval si_value; /* signal value */ + union { + struct { + int32_t _trapno; /* machine specific trap code */ + } _fault; + + /* POSIX.1b timers */ + struct { + int32_t _timerid; + int32_t _overrun; + } _timer; + + struct { + int32_t _mqd; + } _mesgp; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + } _poll; + + struct { + abi_long __spare1__; + int32_t __spare2_[7]; + } __spare__; + } _reason; +} target_siginfo_t; + +struct target_sigevent { + abi_int sigev_notify; + abi_int sigev_signo; + target_sigval_t sigev_value; + union { + abi_int _threadid; + + /* + * The kernel (and thus QEMU) never looks at these; + * they're only used as part of the ABI between a + * userspace program and libc. + */ + struct { + abi_ulong _function; + abi_ulong _attribute; + } _sigev_thread; + abi_ushort _kevent_flags; + abi_long _pad[8]; + } _sigev_un; +}; + +#define target_si_signo si_signo +#define target_si_code si_code +#define target_si_errno si_errno +#define target_si_addr si_addr + +/* SIGILL si_codes */ +#define TARGET_ILL_ILLOPC (1) /* Illegal opcode. */ +#define TARGET_ILL_ILLOPN (2) /* Illegal operand. */ +#define TARGET_ILL_ILLADR (3) /* Illegal addressing mode. */ +#define TARGET_ILL_ILLTRP (4) /* Illegal trap. */ +#define TARGET_ILL_PRVOPC (5) /* Privileged opcode. */ +#define TARGET_ILL_PRVREG (6) /* Privileged register. */ +#define TARGET_ILL_COPROC (7) /* Coprocessor error. */ +#define TARGET_ILL_BADSTK (8) /* Internal stack error. */ + +/* SIGSEGV si_codes */ +#define TARGET_SEGV_MAPERR (1) /* address not mapped to object */ +#define TARGET_SEGV_ACCERR (2) /* invalid permissions for mapped object */ + +/* SIGTRAP si_codes */ +#define TARGET_TRAP_BRKPT (1) /* process beakpoint */ +#define TARGET_TRAP_TRACE (2) /* process trace trap */ + +/* SIGBUS si_codes */ +#define TARGET_BUS_ADRALN (1) +#define TARGET_BUS_ADRERR (2) +#define TARGET_BUS_OBJERR (3) + +/* SIGFPE codes */ +#define TARGET_FPE_INTOVF (1) /* Integer overflow. */ +#define TARGET_FPE_INTDIV (2) /* Integer divide by zero. */ +#define TARGET_FPE_FLTDIV (3) /* Floating point divide by zero. */ +#define TARGET_FPE_FLTOVF (4) /* Floating point overflow. */ +#define TARGET_FPE_FLTUND (5) /* Floating point underflow. */ +#define TARGET_FPE_FLTRES (6) /* Floating point inexact result. */ +#define TARGET_FPE_FLTINV (7) /* Invalid floating point operation. */ +#define TARGET_FPE_FLTSUB (8) /* Subscript out of range. */ + +#endif /* !_TARGET_OS_SIGINFO_H_ */ diff --git a/bsd-user/freebsd/target_os_signal.h b/bsd-user/freebsd/target_os_signal.h new file mode 100644 index 00000000000..1a4c5faf19e --- /dev/null +++ b/bsd-user/freebsd/target_os_signal.h @@ -0,0 +1,81 @@ +#ifndef _TARGET_OS_SIGNAL_H_ +#define _TARGET_OS_SIGNAL_H_ + +/* FreeBSD's sys/ucontext.h defines this */ +#define TARGET_MC_GET_CLEAR_RET 0x0001 + +#include "target_os_siginfo.h" +#include "target_arch_signal.h" + +/* Compare to sys/signal.h */ +#define TARGET_SIGHUP 1 /* hangup */ +#define TARGET_SIGINT 2 /* interrupt */ +#define TARGET_SIGQUIT 3 /* quit */ +#define TARGET_SIGILL 4 /* illegal instruction (not reset when caught) */ +#define TARGET_SIGTRAP 5 /* trace trap (not reset when caught) */ +#define TARGET_SIGABRT 6 /* abort() */ +#define TARGET_SIGIOT SIGABRT /* compatibility */ +#define TARGET_SIGEMT 7 /* EMT instruction */ +#define TARGET_SIGFPE 8 /* floating point exception */ +#define TARGET_SIGKILL 9 /* kill (cannot be caught or ignored) */ +#define TARGET_SIGBUS 10 /* bus error */ +#define TARGET_SIGSEGV 11 /* segmentation violation */ +#define TARGET_SIGSYS 12 /* bad argument to system call */ +#define TARGET_SIGPIPE 13 /* write on a pipe with no one to read it */ +#define TARGET_SIGALRM 14 /* alarm clock */ +#define TARGET_SIGTERM 15 /* software termination signal from kill */ +#define TARGET_SIGURG 16 /* urgent condition on IO channel */ +#define TARGET_SIGSTOP 17 /* sendable stop signal not from tty */ +#define TARGET_SIGTSTP 18 /* stop signal from tty */ +#define TARGET_SIGCONT 19 /* continue a stopped process */ +#define TARGET_SIGCHLD 20 /* to parent on child stop or exit */ +#define TARGET_SIGTTIN 21 /* to readers pgrp upon background tty read */ +#define TARGET_SIGTTOU 22 /* like TTIN for output if(tp->t_local<OSTOP)*/ +#define TARGET_SIGIO 23 /* input/output possible signal */ +#define TARGET_SIGXCPU 24 /* exceeded CPU time limit */ +#define TARGET_SIGXFSZ 25 /* exceeded file size limit */ +#define TARGET_SIGVTALRM 26 /* virtual time alarm */ +#define TARGET_SIGPROF 27 /* profiling time alarm */ +#define TARGET_SIGWINCH 28 /* window size changes */ +#define TARGET_SIGINFO 29 /* information request */ +#define TARGET_SIGUSR1 30 /* user defined signal 1 */ +#define TARGET_SIGUSR2 31 /* user defined signal 2 */ +#define TARGET_SIGTHR 32 /* reserved by thread library */ +#define TARGET_SIGLWP SIGTHR /* compatibility */ +#define TARGET_SIGLIBRT 33 /* reserved by the real-time library */ +#define TARGET_SIGRTMIN 65 +#define TARGET_SIGRTMAX 126 + +/* + * Language spec says we must list exactly one parameter, even though we + * actually supply three. Ugh! + */ +#define TARGET_SIG_DFL ((abi_long)0) /* default signal handling */ +#define TARGET_SIG_IGN ((abi_long)1) /* ignore signal */ +#define TARGET_SIG_ERR ((abi_long)-1) /* error return from signal */ + +#define TARGET_SA_ONSTACK 0x0001 /* take signal on signal stack */ +#define TARGET_SA_RESTART 0x0002 /* restart system on signal return */ +#define TARGET_SA_RESETHAND 0x0004 /* reset to SIG_DFL when taking signal */ +#define TARGET_SA_NODEFER 0x0010 /* don't mask the signal we're delivering */ +#define TARGET_SA_NOCLDWAIT 0x0020 /* don't create zombies (assign to pid 1) */ +#define TARGET_SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */ +#define TARGET_SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */ +#define TARGET_SA_SIGINFO 0x0040 /* generate siginfo_t */ + +/* + * Flags for sigprocmask: + */ +#define TARGET_SIG_BLOCK 1 /* block specified signal set */ +#define TARGET_SIG_UNBLOCK 2 /* unblock specified signal set */ +#define TARGET_SIG_SETMASK 3 /* set specified signal set */ + +#define TARGET_BADSIG SIG_ERR + +/* + * sigaltstack control + */ +#define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */ +#define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack*/ + +#endif /* !_TARGET_OS_SIGNAL_H_ */ diff --git a/bsd-user/freebsd/target_os_stack.h b/bsd-user/freebsd/target_os_stack.h new file mode 100644 index 00000000000..1bb1a2bf569 --- /dev/null +++ b/bsd-user/freebsd/target_os_stack.h @@ -0,0 +1,181 @@ +/* + * FreeBSD setup_initial_stack() implementation. + * + * Copyright (c) 2013-14 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_OS_STACK_H_ +#define _TARGET_OS_STACK_H_ + +#include +#include "target_arch_sigtramp.h" +#include "qemu/guest-random.h" + +/* + * The inital FreeBSD stack is as follows: + * (see kern/kern_exec.c exec_copyout_strings() ) + * + * Hi Address -> char **ps_argvstr (struct ps_strings for ps, w, etc.) + * unsigned ps_nargvstr + * char **ps_envstr + * PS_STRINGS -> unsigned ps_nenvstr + * + * machine dependent sigcode (sv_sigcode of size + * sv_szsigcode) + * + * execpath (absolute image path for rtld) + * + * SSP Canary (sizeof(long) * 8) + * + * page sizes array (usually sizeof(u_long) ) + * + * "destp" -> argv, env strings (up to 262144 bytes) + */ +static inline int setup_initial_stack(struct bsd_binprm *bprm, + abi_ulong *ret_addr, abi_ulong *stringp) +{ + int i; + abi_ulong stack_hi_addr; + size_t execpath_len, stringspace; + abi_ulong destp, argvp, envp, p; + struct target_ps_strings ps_strs; + char canary[sizeof(abi_long) * 8]; + + stack_hi_addr = p = target_stkbas + target_stksiz; + + /* Save some space for ps_strings. */ + p -= sizeof(struct target_ps_strings); + + /* Add machine depedent sigcode. */ + p -= TARGET_SZSIGCODE; + if (setup_sigtramp(p, (unsigned)offsetof(struct target_sigframe, sf_uc), + TARGET_FREEBSD_NR_sigreturn)) { + errno = EFAULT; + return -1; + } + if (bprm->fullpath) { + execpath_len = strlen(bprm->fullpath) + 1; + p -= roundup(execpath_len, sizeof(abi_ulong)); + if (memcpy_to_target(p, bprm->fullpath, execpath_len)) { + errno = EFAULT; + return -1; + } + } + /* Add canary for SSP. */ + qemu_guest_getrandom_nofail(canary, sizeof(canary)); + p -= roundup(sizeof(canary), sizeof(abi_ulong)); + if (memcpy_to_target(p, canary, sizeof(canary))) { + errno = EFAULT; + return -1; + } + /* Add page sizes array. */ + p -= sizeof(abi_ulong); + if (put_user_ual(TARGET_PAGE_SIZE, p)) { + errno = EFAULT; + return -1; + } + /* + * Deviate from FreeBSD stack layout: force stack to new page here + * so that signal trampoline is not sharing the page with user stack + * frames. This is actively harmful in qemu as it marks pages with + * code it translated as read-only, which is somewhat problematic + * for user trying to use the stack as intended. + */ + p = rounddown(p, TARGET_PAGE_SIZE); + + /* Calculate the string space needed */ + stringspace = 0; + for (i = 0; i < bprm->argc; ++i) { + stringspace += strlen(bprm->argv[i]) + 1; + } + for (i = 0; i < bprm->envc; ++i) { + stringspace += strlen(bprm->envp[i]) + 1; + } + if (stringspace > TARGET_ARG_MAX) { + errno = ENOMEM; + return -1; + } + /* Make room for the argv and envp strings */ + destp = rounddown(p - stringspace, sizeof(abi_ulong)); + p = argvp = destp - (bprm->argc + bprm->envc + 2) * sizeof(abi_ulong); + /* Remember the strings pointer */ + if (stringp) { + *stringp = destp; + } + /* + * Add argv strings. Note that the argv[] vectors are added by + * loader_build_argptr() + */ + /* XXX need to make room for auxargs */ + ps_strs.ps_argvstr = tswapl(argvp); + ps_strs.ps_nargvstr = tswap32(bprm->argc); + for (i = 0; i < bprm->argc; ++i) { + size_t len = strlen(bprm->argv[i]) + 1; + + if (memcpy_to_target(destp, bprm->argv[i], len)) { + errno = EFAULT; + return -1; + } + if (put_user_ual(destp, argvp)) { + errno = EFAULT; + return -1; + } + argvp += sizeof(abi_ulong); + destp += len; + } + if (put_user_ual(0, argvp)) { + errno = EFAULT; + return -1; + } + /* + * Add env strings. Note that the envp[] vectors are added by + * loader_build_argptr(). + */ + envp = argvp + sizeof(abi_ulong); + ps_strs.ps_envstr = tswapl(envp); + ps_strs.ps_nenvstr = tswap32(bprm->envc); + for (i = 0; i < bprm->envc; ++i) { + size_t len = strlen(bprm->envp[i]) + 1; + + if (memcpy_to_target(destp, bprm->envp[i], len)) { + errno = EFAULT; + return -1; + } + if (put_user_ual(destp, envp)) { + errno = EFAULT; + return -1; + } + envp += sizeof(abi_ulong); + destp += len; + } + if (put_user_ual(0, envp)) { + errno = EFAULT; + return -1; + } + if (memcpy_to_target(stack_hi_addr - sizeof(ps_strs), &ps_strs, + sizeof(ps_strs))) { + errno = EFAULT; + return -1; + } + + if (ret_addr) { + *ret_addr = p; + } + + return 0; + } + +#endif /* !_TARGET_OS_STACK_H_ */ diff --git a/bsd-user/freebsd/target_os_thread.h b/bsd-user/freebsd/target_os_thread.h new file mode 100644 index 00000000000..77433acdff8 --- /dev/null +++ b/bsd-user/freebsd/target_os_thread.h @@ -0,0 +1,25 @@ +/* + * FreeBSD thread dependent code and definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_OS_THREAD_H_ +#define _TARGET_OS_THREAD_H_ + +#include "target_arch_thread.h" + +#endif /* !_TARGET_OS_THREAD_H_ */ diff --git a/bsd-user/freebsd/target_os_user.h b/bsd-user/freebsd/target_os_user.h new file mode 100644 index 00000000000..19892c5071b --- /dev/null +++ b/bsd-user/freebsd/target_os_user.h @@ -0,0 +1,329 @@ +/* + * sys/user.h definitions + * + * Copyright (c) 2015 Stacey D. Son (sson at FreeBSD) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_OS_USER_H_ +#define _TARGET_OS_USER_H_ + +/* + * from sys/priority.h + */ +struct target_priority { + uint8_t pri_class; /* Scheduling class. */ + uint8_t pri_level; /* Normal priority level. */ + uint8_t pri_native; /* Priority before propogation. */ + uint8_t pri_user; /* User priority based on p_cpu and p_nice. */ +}; + +/* + * sys/caprights.h + */ +#define TARGET_CAP_RIGHTS_VERSION 0 + +typedef struct target_cap_rights { + uint64_t cr_rights[TARGET_CAP_RIGHTS_VERSION + 2]; +} target_cap_rights_t; + +/* + * From sys/_socketaddr_storage.h + * + */ +#define TARGET_SS_MAXSIZE 128U +#define TARGET_SS_ALIGNSIZE (sizeof(__int64_t)) +#define TARGET_SS_PAD1SIZE (TARGET_SS_ALIGNSIZE - sizeof(unsigned char) - \ + sizeof(uint8_t)) +#define TARGET_SS_PAD2SIZE (TARGET_SS_MAXSIZE - sizeof(unsigned char) - \ + sizeof(uint8_t) - TARGET_SS_PAD1SIZE - TARGET_SS_ALIGNSIZE) + +struct target_sockaddr_storage { + unsigned char ss_len; /* address length */ + uint8_t ss_family; /* address family */ + char __ss_pad1[TARGET_SS_PAD1SIZE]; + __int64_t __ss_align; /* force desired struct alignment */ + char __ss_pad2[TARGET_SS_PAD2SIZE]; +}; + +/* + * from sys/user.h + */ +#define TARGET_KI_NSPARE_INT 2 +#define TARGET_KI_NSPARE_LONG 12 +#define TARGET_KI_NSPARE_PTR 6 + +#define TARGET_WMESGLEN 8 +#define TARGET_LOCKNAMELEN 8 +#define TARGET_TDNAMLEN 16 +#define TARGET_COMMLEN 19 +#define TARGET_KI_EMULNAMELEN 16 +#define TARGET_KI_NGROUPS 16 +#define TARGET_LOGNAMELEN 17 +#define TARGET_LOGINCLASSLEN 17 + +#define TARGET_KF_TYPE_NONE 0 +#define TARGET_KF_TYPE_VNODE 1 +#define TARGET_KF_TYPE_SOCKET 2 +#define TARGET_KF_TYPE_PIPE 3 +#define TARGET_KF_TYPE_FIFO 4 +#define TARGET_KF_TYPE_KQUEUE 5 +#define TARGET_KF_TYPE_CRYPTO 6 +#define TARGET_KF_TYPE_MQUEUE 7 +#define TARGET_KF_TYPE_SHM 8 +#define TARGET_KF_TYPE_SEM 9 +#define TARGET_KF_TYPE_PTS 10 +#define TARGET_KF_TYPE_PROCDESC 11 +#define TARGET_KF_TYPE_DEV 12 +#define TARGET_KF_TYPE_UNKNOWN 255 + +struct target_kinfo_proc { + int32_t ki_structsize; /* size of this structure */ + int32_t ki_layout; /* reserved: layout identifier */ + abi_ulong ki_args; /* address of command arguments */ + abi_ulong ki_paddr; /* address of proc */ + abi_ulong ki_addr; /* kernel virtual addr of u-area */ + abi_ulong ki_tracep; /* pointer to trace file */ + abi_ulong ki_textvp; /* pointer to executable file */ + abi_ulong ki_fd; /* pointer to open file info */ + abi_ulong ki_vmspace; /* pointer to kernel vmspace struct */ + abi_ulong ki_wchan; /* sleep address */ + int32_t ki_pid; /* Process identifier */ + int32_t ki_ppid; /* parent process id */ + int32_t ki_pgid; /* process group id */ + int32_t ki_tpgid; /* tty process group id */ + int32_t ki_sid; /* Process session ID */ + int32_t ki_tsid; /* Terminal session ID */ + int16_t ki_jobc; /* job control counter */ + int16_t ki_spare_short1; /* unused (just here for alignment) */ + int32_t ki_tdev__freebsd11; /* controlling tty dev */ + target_sigset_t ki_siglist; /* Signals arrived but not delivered */ + target_sigset_t ki_sigmask; /* Current signal mask */ + target_sigset_t ki_sigignore; /* Signals being ignored */ + target_sigset_t ki_sigcatch; /* Signals being caught by user */ + + int32_t ki_uid; /* effective user id */ + int32_t ki_ruid; /* Real user id */ + int32_t ki_svuid; /* Saved effective user id */ + int32_t ki_rgid; /* Real group id */ + int32_t ki_svgid; /* Saved effective group id */ + int16_t ki_ngroups; /* number of groups */ + int16_t ki_spare_short2; /* unused (just here for alignment) */ + int32_t ki_groups[TARGET_KI_NGROUPS]; /* groups */ + + abi_long ki_size; /* virtual size */ + + abi_long ki_rssize; /* current resident set size in pages */ + abi_long ki_swrss; /* resident set size before last swap */ + abi_long ki_tsize; /* text size (pages) XXX */ + abi_long ki_dsize; /* data size (pages) XXX */ + abi_long ki_ssize; /* stack size (pages) */ + + uint16_t ki_xstat; /* Exit status for wait & stop signal */ + uint16_t ki_acflag; /* Accounting flags */ + + uint32_t ki_pctcpu; /* %cpu for process during ki_swtime */ + + uint32_t ki_estcpu; /* Time averaged value of ki_cpticks */ + uint32_t ki_slptime; /* Time since last blocked */ + uint32_t ki_swtime; /* Time swapped in or out */ + uint32_t ki_cow; /* number of copy-on-write faults */ + uint64_t ki_runtime; /* Real time in microsec */ + + struct target_freebsd_timeval ki_start; /* starting time */ + struct target_freebsd_timeval ki_childtime; /* time used by process children */ + + abi_long ki_flag; /* P_* flags */ + abi_long ki_kiflag; /* KI_* flags (below) */ + int32_t ki_traceflag; /* Kernel trace points */ + char ki_stat; /* S* process status */ + int8_t ki_nice; /* Process "nice" value */ + char ki_lock; /* Process lock (prevent swap) count */ + char ki_rqindex; /* Run queue index */ + u_char ki_oncpu_old; /* Which cpu we are on (legacy) */ + u_char ki_lastcpu_old; /* Last cpu we were on (legacy) */ + char ki_tdname[TARGET_TDNAMLEN + 1]; /* thread name */ + char ki_wmesg[TARGET_WMESGLEN + 1]; /* wchan message */ + char ki_login[TARGET_LOGNAMELEN + 1]; /* setlogin name */ + char ki_lockname[TARGET_LOCKNAMELEN + 1]; /* lock name */ + char ki_comm[TARGET_COMMLEN + 1]; /* command name */ + char ki_emul[TARGET_KI_EMULNAMELEN + 1]; /* emulation name */ + char ki_loginclass[TARGET_LOGINCLASSLEN + 1]; /* login class */ + + char ki_sparestrings[50]; /* spare string space */ + int32_t ki_spareints[TARGET_KI_NSPARE_INT]; /* spare room for growth */ + uint64_t ki_tdev; /* controlling tty dev */ + int32_t ki_oncpu; /* Which cpu we are on */ + int32_t ki_lastcpu; /* Last cpu we were on */ + int32_t ki_tracer; /* Pid of tracing process */ + int32_t ki_flag2; /* P2_* flags */ + int32_t ki_fibnum; /* Default FIB number */ + uint32_t ki_cr_flags; /* Credential flags */ + int32_t ki_jid; /* Process jail ID */ + int32_t ki_numthreads; /* XXXKSE number of threads in total */ + + int32_t ki_tid; /* XXXKSE thread id */ + + struct target_priority ki_pri; /* process priority */ + struct target_freebsd_rusage ki_rusage; /* process rusage statistics */ + /* XXX - most fields in ki_rusage_ch are not (yet) filled in */ + struct target_freebsd_rusage ki_rusage_ch; /* rusage of children processes */ + + + abi_ulong ki_pcb; /* kernel virtual addr of pcb */ + abi_ulong ki_kstack; /* kernel virtual addr of stack */ + abi_ulong ki_udata; /* User convenience pointer */ + abi_ulong ki_tdaddr; /* address of thread */ + + abi_ulong ki_spareptrs[TARGET_KI_NSPARE_PTR]; /* spare room for growth */ + abi_long ki_sparelongs[TARGET_KI_NSPARE_LONG];/* spare room for growth */ + abi_long ki_sflag; /* PS_* flags */ + abi_long ki_tdflags; /* XXXKSE kthread flag */ +}; + +struct target_kinfo_file { + int32_t kf_structsize; /* Variable size of record. */ + int32_t kf_type; /* Descriptor type. */ + int32_t kf_fd; /* Array index. */ + int32_t kf_ref_count; /* Reference count. */ + int32_t kf_flags; /* Flags. */ + int32_t kf_pad0; /* Round to 64 bit alignment. */ + int64_t kf_offset; /* Seek location. */ + union { + struct { + uint32_t kf_spareint; + /* Socket domain. */ + int kf_sock_domain0; + /* Socket type. */ + int kf_sock_type0; + /* Socket protocol. */ + int kf_sock_protocol0; + /* Socket address. */ + struct sockaddr_storage kf_sa_local; + /* Peer address. */ + struct sockaddr_storage kf_sa_peer; + /* Address of so_pcb. */ + uint64_t kf_sock_pcb; + /* Address of inp_ppcb. */ + uint64_t kf_sock_inpcb; + /* Address of unp_conn. */ + uint64_t kf_sock_unpconn; + /* Send buffer state. */ + uint16_t kf_sock_snd_sb_state; + /* Receive buffer state. */ + uint16_t kf_sock_rcv_sb_state; + /* Round to 64 bit alignment. */ + uint32_t kf_sock_pad0; + } kf_sock; + struct { + /* Vnode type. */ + int kf_file_type; + /* Space for future use */ + int kf_spareint[3]; + uint64_t kf_spareint64[30]; + /* Vnode filesystem id. */ + uint64_t kf_file_fsid; + /* File device. */ + uint64_t kf_file_rdev; + /* Global file id. */ + uint64_t kf_file_fileid; + /* File size. */ + uint64_t kf_file_size; + /* Vnode filesystem id, FreeBSD 11 compat. */ + uint32_t kf_file_fsid_freebsd11; + /* File device, FreeBSD 11 compat. */ + uint32_t kf_file_rdev_freebsd11; + /* File mode. */ + uint16_t kf_file_mode; + /* Round to 64 bit alignment. */ + uint16_t kf_file_pad0; + uint32_t kf_file_pad1; + } kf_file; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + uint32_t kf_sem_value; + uint16_t kf_sem_mode; + } kf_sem; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + uint64_t kf_pipe_addr; + uint64_t kf_pipe_peer; + uint32_t kf_pipe_buffer_cnt; + /* Round to 64 bit alignment. */ + uint32_t kf_pipe_pad0[3]; + } kf_pipe; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + uint32_t kf_pts_dev_freebsd11; + uint32_t kf_pts_pad0; + uint64_t kf_pts_dev; + /* Round to 64 bit alignment. */ + uint32_t kf_pts_pad1[4]; + } kf_pts; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + int32_t kf_pid; + } kf_proc; + } kf_un; + uint16_t kf_status; /* Status flags. */ + uint16_t kf_pad1; /* Round to 32 bit alignment. */ + int32_t _kf_ispare0; /* Space for more stuff. */ + target_cap_rights_t kf_cap_rights; /* Capability rights. */ + uint64_t _kf_cap_spare; /* Space for future cap_rights_t. */ + /* Truncated before copyout in sysctl */ + char kf_path[PATH_MAX]; /* Path to file, if any. */ +}; + +struct target_kinfo_vmentry { + int32_t kve_structsize; /* Variable size of record. */ + int32_t kve_type; /* Type of map entry. */ + uint64_t kve_start; /* Starting address. */ + uint64_t kve_end; /* Finishing address. */ + uint64_t kve_offset; /* Mapping offset in object */ + uint64_t kve_vn_fileid; /* inode number if vnode */ + uint32_t kve_vn_fsid_freebsd11; /* dev_t of vnode location */ + int32_t kve_flags; /* Flags on map entry. */ + int32_t kve_resident; /* Number of resident pages. */ + int32_t kve_private_resident; /* Number of private pages. */ + int32_t kve_protection; /* Protection bitmask. */ + int32_t kve_ref_count; /* VM obj ref count. */ + int32_t kve_shadow_count; /* VM obj shadow count. */ + int32_t kve_vn_type; /* Vnode type. */ + uint64_t kve_vn_size; /* File size. */ + uint32_t kve_vn_rdev_freebsd11; /* Device id if device. */ + uint16_t kve_vn_mode; /* File mode. */ + uint16_t kve_status; /* Status flags. */ +#if (__FreeBSD_version >= 1300501 && __FreeBSD_version < 1400000) || \ + __FreeBSD_version >= 1400009 + union { + uint64_t _kve_vn_fsid; /* dev_t of vnode location */ + uint64_t _kve_obj; /* handle of anon obj */ + } kve_type_spec; +#define kve_vn_fsid kve_type_spec._kve_vn_fsid +#define kve_obj kve_type_spec._kve_obj +#else + uint64_t kve_vn_fsid; /* dev_t of vnode location */ +#endif + uint64_t kve_vn_rdev; /* Device id if device. */ + int _kve_ispare[8]; /* Space for more stuff. */ + /* Truncated before copyout in sysctl */ + char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ +}; + +#endif /* ! _TARGET_OS_USER_H_ */ diff --git a/bsd-user/freebsd/target_os_vmparam.h b/bsd-user/freebsd/target_os_vmparam.h new file mode 100644 index 00000000000..990300c619f --- /dev/null +++ b/bsd-user/freebsd/target_os_vmparam.h @@ -0,0 +1,38 @@ +/* + * FreeBSD VM parameters definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_OS_VMPARAM_H_ +#define _TARGET_OS_VMPARAM_H_ + +#include "target_arch_vmparam.h" + +/* Compare to sys/exec.h */ +struct target_ps_strings { + abi_ulong ps_argvstr; + uint32_t ps_nargvstr; + abi_ulong ps_envstr; + uint32_t ps_nenvstr; +}; + +extern abi_ulong target_stkbas; +extern abi_ulong target_stksiz; + +#define TARGET_PS_STRINGS ((target_stkbas + target_stksiz) - \ + sizeof(struct target_ps_strings)) + +#endif /* !TARGET_OS_VMPARAM_H_ */ diff --git a/bsd-user/i386/target_arch.h b/bsd-user/i386/target_arch.h new file mode 100644 index 00000000000..73e9a028feb --- /dev/null +++ b/bsd-user/i386/target_arch.h @@ -0,0 +1,31 @@ +/* + * Intel x86 specific prototypes for bsd-user + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_H_ +#define _TARGET_ARCH_H_ + +/* target_arch_cpu.c */ +void bsd_i386_write_dt(void *ptr, unsigned long addr, unsigned long limit, + int flags); +void bsd_i386_set_idt(int n, unsigned int dpl); +void bsd_i386_set_idt_base(uint64_t base); + +#define target_cpu_set_tls(env, newtls) + +#endif /* ! _TARGET_ARCH_H_ */ diff --git a/bsd-user/i386/target_arch_cpu.c b/bsd-user/i386/target_arch_cpu.c new file mode 100644 index 00000000000..d349e452997 --- /dev/null +++ b/bsd-user/i386/target_arch_cpu.c @@ -0,0 +1,71 @@ +/* + * i386 cpu related code + * + * Copyright (c) 2013 Stacey Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include + +#include "qemu/osdep.h" +#include "cpu.h" +#include "qemu.h" +#include "qemu/timer.h" + +#include "target_arch.h" + +static uint64_t *idt_table; + +uint64_t cpu_get_tsc(CPUX86State *env) +{ + return cpu_get_host_ticks(); +} + +void bsd_i386_write_dt(void *ptr, unsigned long addr, unsigned long limit, + int flags) +{ + unsigned int e1, e2; + uint32_t *p; + e1 = (addr << 16) | (limit & 0xffff); + e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); + e2 |= flags; + p = ptr; + p[0] = tswap32(e1); + p[1] = tswap32(e2); +} + + +static void set_gate(void *ptr, unsigned int type, unsigned int dpl, + uint32_t addr, unsigned int sel) +{ + uint32_t *p, e1, e2; + e1 = (addr & 0xffff) | (sel << 16); + e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); + p = ptr; + p[0] = tswap32(e1); + p[1] = tswap32(e2); +} + +/* only dpl matters as we do only user space emulation */ +void bsd_i386_set_idt(int n, unsigned int dpl) +{ + set_gate(idt_table + n, 0, dpl, 0, 0); +} + +void bsd_i386_set_idt_base(uint64_t base) +{ + idt_table = g2h_untagged(base); +} + diff --git a/bsd-user/i386/target_arch_cpu.h b/bsd-user/i386/target_arch_cpu.h new file mode 100644 index 00000000000..b28602adbbd --- /dev/null +++ b/bsd-user/i386/target_arch_cpu.h @@ -0,0 +1,207 @@ +/* + * i386 cpu init and loop + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_CPU_H_ +#define _TARGET_ARCH_CPU_H_ + +#include "target_arch.h" + +#define TARGET_DEFAULT_CPU_MODEL "qemu32" + +static inline void target_cpu_init(CPUX86State *env, + struct target_pt_regs *regs) +{ + uint64_t *gdt_table; + + env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; + env->hflags |= HF_PE_MASK | HF_CPL_MASK; + if (env->features[FEAT_1_EDX] & CPUID_SSE) { + env->cr[4] |= CR4_OSFXSR_MASK; + env->hflags |= HF_OSFXSR_MASK; + } + + /* flags setup : we activate the IRQs by default as in user mode */ + env->eflags |= IF_MASK; + + /* register setup */ + env->regs[R_EAX] = regs->eax; + env->regs[R_EBX] = regs->ebx; + env->regs[R_ECX] = regs->ecx; + env->regs[R_EDX] = regs->edx; + env->regs[R_ESI] = regs->esi; + env->regs[R_EDI] = regs->edi; + env->regs[R_EBP] = regs->ebp; + env->regs[R_ESP] = regs->esp; + env->eip = regs->eip; + + /* interrupt setup */ + env->idt.limit = 255; + + env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + bsd_i386_set_idt_base(env->idt.base); + bsd_i386_set_idt(0, 0); + bsd_i386_set_idt(1, 0); + bsd_i386_set_idt(2, 0); + bsd_i386_set_idt(3, 3); + bsd_i386_set_idt(4, 3); + bsd_i386_set_idt(5, 0); + bsd_i386_set_idt(6, 0); + bsd_i386_set_idt(7, 0); + bsd_i386_set_idt(8, 0); + bsd_i386_set_idt(9, 0); + bsd_i386_set_idt(10, 0); + bsd_i386_set_idt(11, 0); + bsd_i386_set_idt(12, 0); + bsd_i386_set_idt(13, 0); + bsd_i386_set_idt(14, 0); + bsd_i386_set_idt(15, 0); + bsd_i386_set_idt(16, 0); + bsd_i386_set_idt(17, 0); + bsd_i386_set_idt(18, 0); + bsd_i386_set_idt(19, 0); + bsd_i386_set_idt(0x80, 3); + + /* segment setup */ + env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; + gdt_table = g2h_untagged(env->gdt.base); + + bsd_i386_write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | + (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); + + bsd_i386_write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | + (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); + + cpu_x86_load_seg(env, R_CS, __USER_CS); + cpu_x86_load_seg(env, R_SS, __USER_DS); + cpu_x86_load_seg(env, R_DS, __USER_DS); + cpu_x86_load_seg(env, R_ES, __USER_DS); + cpu_x86_load_seg(env, R_FS, __USER_DS); + cpu_x86_load_seg(env, R_GS, __USER_DS); + /* This hack makes Wine work... */ + env->segs[R_FS].selector = 0; +} + +static inline void target_cpu_loop(CPUX86State *env) +{ + CPUState *cs = env_cpu(env); + int trapnr; + abi_ulong pc; + /* target_siginfo_t info; */ + + for (;;) { + cpu_exec_start(cs); + trapnr = cpu_exec(cs); + cpu_exec_end(cs); + process_queued_cpu_work(cs); + + switch (trapnr) { + case 0x80: + /* syscall from int $0x80 */ + if (bsd_type == target_freebsd) { + abi_ulong params = (abi_ulong) env->regs[R_ESP] + + sizeof(int32_t); + int32_t syscall_nr = env->regs[R_EAX]; + int32_t arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8; + + if (syscall_nr == TARGET_FREEBSD_NR_syscall) { + get_user_s32(syscall_nr, params); + params += sizeof(int32_t); + } else if (syscall_nr == TARGET_FREEBSD_NR___syscall) { + get_user_s32(syscall_nr, params); + params += sizeof(int64_t); + } + get_user_s32(arg1, params); + params += sizeof(int32_t); + get_user_s32(arg2, params); + params += sizeof(int32_t); + get_user_s32(arg3, params); + params += sizeof(int32_t); + get_user_s32(arg4, params); + params += sizeof(int32_t); + get_user_s32(arg5, params); + params += sizeof(int32_t); + get_user_s32(arg6, params); + params += sizeof(int32_t); + get_user_s32(arg7, params); + params += sizeof(int32_t); + get_user_s32(arg8, params); + env->regs[R_EAX] = do_freebsd_syscall(env, + syscall_nr, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } else { /* if (bsd_type == target_openbsd) */ + env->regs[R_EAX] = do_openbsd_syscall(env, + env->regs[R_EAX], + env->regs[R_EBX], + env->regs[R_ECX], + env->regs[R_EDX], + env->regs[R_ESI], + env->regs[R_EDI], + env->regs[R_EBP]); + } + if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) { + env->regs[R_EAX] = -env->regs[R_EAX]; + env->eflags |= CC_C; + } else { + env->eflags &= ~CC_C; + } + break; + + case EXCP_INTERRUPT: + /* just indicate that signals should be handled asap */ + break; + + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; + + default: + pc = env->segs[R_CS].base + env->eip; + fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - " + "aborting\n", (long)pc, trapnr); + abort(); + } + process_pending_signals(env); + } +} + +static inline void target_cpu_clone_regs(CPUX86State *env, target_ulong newsp) +{ + if (newsp) { + env->regs[R_ESP] = newsp; + } + env->regs[R_EAX] = 0; +} + +static inline void target_cpu_reset(CPUArchState *cpu) +{ + cpu_reset(env_cpu(cpu)); +} + +#endif /* ! _TARGET_ARCH_CPU_H_ */ diff --git a/bsd-user/i386/target_arch_elf.h b/bsd-user/i386/target_arch_elf.h new file mode 100644 index 00000000000..eb760e07faa --- /dev/null +++ b/bsd-user/i386/target_arch_elf.h @@ -0,0 +1,35 @@ +/* + * i386 ELF definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_ARCH_ELF_H_ +#define _TARGET_ARCH_ELF_H_ + +#define ELF_START_MMAP 0x80000000 +#define ELF_ET_DYN_LOAD_ADDR 0x01001000 +#define elf_check_arch(x) (((x) == EM_386) || ((x) == EM_486)) + +#define ELF_HWCAP 0 /* FreeBSD doesn't do AT_HWCAP{,2} on x86 */ + +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +#endif /* _TARGET_ARCH_ELF_H_ */ diff --git a/bsd-user/i386/target_arch_reg.h b/bsd-user/i386/target_arch_reg.h new file mode 100644 index 00000000000..1fce1daf015 --- /dev/null +++ b/bsd-user/i386/target_arch_reg.h @@ -0,0 +1,82 @@ +/* + * FreeBSD i386 register structures + * + * Copyright (c) 2015 Stacey Son + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_REG_H_ +#define _TARGET_ARCH_REG_H_ + +/* See sys/i386/include/reg.h */ +typedef struct target_reg { + uint32_t r_fs; + uint32_t r_es; + uint32_t r_ds; + uint32_t r_edi; + uint32_t r_esi; + uint32_t r_ebp; + uint32_t r_isp; + uint32_t r_ebx; + uint32_t r_edx; + uint32_t r_ecx; + uint32_t r_eax; + uint32_t r_trapno; + uint32_t r_err; + uint32_t r_eip; + uint32_t r_cs; + uint32_t r_eflags; + uint32_t r_esp; + uint32_t r_ss; + uint32_t r_gs; +} target_reg_t; + +typedef struct target_fpreg { + uint32_t fpr_env[7]; + uint8_t fpr_acc[8][10]; + uint32_t fpr_ex_sw; + uint8_t fpr_pad[64]; +} target_fpreg_t; + +static inline void target_copy_regs(target_reg_t *regs, const CPUX86State *env) +{ + + regs->r_fs = env->segs[R_FS].selector & 0xffff; + regs->r_es = env->segs[R_ES].selector & 0xffff; + regs->r_ds = env->segs[R_DS].selector & 0xffff; + + regs->r_edi = env->regs[R_EDI]; + regs->r_esi = env->regs[R_ESI]; + regs->r_ebp = env->regs[R_EBP]; + /* regs->r_isp = env->regs[R_ISP]; XXX */ + regs->r_ebx = env->regs[R_EBX]; + regs->r_edx = env->regs[R_EDX]; + regs->r_ecx = env->regs[R_ECX]; + regs->r_eax = env->regs[R_EAX]; + /* regs->r_trapno = env->regs[R_TRAPNO]; XXX */ + regs->r_err = env->error_code; /* XXX ? */ + regs->r_eip = env->eip; + + regs->r_cs = env->segs[R_CS].selector & 0xffff; + + regs->r_eflags = env->eflags; + regs->r_esp = env->regs[R_ESP]; + + regs->r_ss = env->segs[R_SS].selector & 0xffff; + regs->r_gs = env->segs[R_GS].selector & 0xffff; +} + +#endif /* !_TARGET_ARCH_REG_H_ */ diff --git a/bsd-user/i386/target_arch_signal.h b/bsd-user/i386/target_arch_signal.h new file mode 100644 index 00000000000..a90750d602c --- /dev/null +++ b/bsd-user/i386/target_arch_signal.h @@ -0,0 +1,92 @@ +/* + * i386 dependent signal definitions + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef TARGET_ARCH_SIGNAL_H +#define TARGET_ARCH_SIGNAL_H + +#include "cpu.h" + +/* Size of the signal trampolin code placed on the stack. */ +#define TARGET_SZSIGCODE 0 + +/* compare to x86/include/_limits.h */ +#define TARGET_MINSIGSTKSZ (512 * 4) /* min sig stack size */ +#define TARGET_SIGSTKSZ (MINSIGSTKSZ + 32768) /* recommended size */ + +struct target_sigcontext { + /* to be added */ +}; + +typedef struct target_mcontext { +} target_mcontext_t; + +typedef struct target_ucontext { + target_sigset_t uc_sigmask; + target_mcontext_t uc_mcontext; + abi_ulong uc_link; + target_stack_t uc_stack; + int32_t uc_flags; + int32_t __spare__[4]; +} target_ucontext_t; + +struct target_sigframe { + abi_ulong sf_signum; + abi_ulong sf_siginfo; /* code or pointer to sf_si */ + abi_ulong sf_ucontext; /* points to sf_uc */ + abi_ulong sf_addr; /* undocumented 4th arg */ + target_ucontext_t sf_uc; /* = *sf_uncontext */ + target_siginfo_t sf_si; /* = *sf_siginfo (SA_SIGINFO case)*/ + uint32_t __spare__[2]; +}; + +/* + * Compare to i386/i386/machdep.c sendsig() + * Assumes that target stack frame memory is locked. + */ +static inline abi_long set_sigtramp_args(CPUX86State *regs, + int sig, struct target_sigframe *frame, abi_ulong frame_addr, + struct target_sigaction *ka) +{ + /* XXX return -TARGET_EOPNOTSUPP; */ + return 0; +} + +/* Compare to i386/i386/machdep.c get_mcontext() */ +static inline abi_long get_mcontext(CPUX86State *regs, + target_mcontext_t *mcp, int flags) +{ + /* XXX */ + return -TARGET_EOPNOTSUPP; +} + +/* Compare to i386/i386/machdep.c set_mcontext() */ +static inline abi_long set_mcontext(CPUX86State *regs, + target_mcontext_t *mcp, int srflag) +{ + /* XXX */ + return -TARGET_EOPNOTSUPP; +} + +static inline abi_long get_ucontext_sigreturn(CPUX86State *regs, + abi_ulong target_sf, abi_ulong *target_uc) +{ + /* XXX */ + *target_uc = 0; + return -TARGET_EOPNOTSUPP; +} + +#endif /* TARGET_ARCH_SIGNAL_H */ diff --git a/bsd-user/i386/target_arch_sigtramp.h b/bsd-user/i386/target_arch_sigtramp.h new file mode 100644 index 00000000000..cb4e89b0b0d --- /dev/null +++ b/bsd-user/i386/target_arch_sigtramp.h @@ -0,0 +1,29 @@ +/* + * Intel i386 sigcode for bsd-user + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_SIGTRAMP_H_ +#define _TARGET_ARCH_SIGTRAMP_H_ + +static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, + unsigned sys_sigreturn) +{ + + return 0; +} +#endif /* _TARGET_ARCH_SIGTRAMP_H_ */ diff --git a/bsd-user/i386/target_arch_thread.h b/bsd-user/i386/target_arch_thread.h new file mode 100644 index 00000000000..e65e476f757 --- /dev/null +++ b/bsd-user/i386/target_arch_thread.h @@ -0,0 +1,47 @@ +/* + * i386 thread support + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_ARCH_THREAD_H_ +#define _TARGET_ARCH_THREAD_H_ + +/* Compare to vm_machdep.c cpu_set_upcall_kse() */ +static inline void target_thread_set_upcall(CPUX86State *regs, abi_ulong entry, + abi_ulong arg, abi_ulong stack_base, abi_ulong stack_size) +{ + /* XXX */ +} + +static inline void target_thread_init(struct target_pt_regs *regs, + struct image_info *infop) +{ + regs->esp = infop->start_stack; + regs->eip = infop->entry; + + /* + * SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx + * contains a pointer to a function which might be registered using + * `atexit'. This provides a mean for the dynamic linker to call DT_FINI + * functions for shared libraries that have been loaded before the code + * runs. + * + * A value of 0 tells we have no such handler. + */ + regs->edx = 0; +} + +#endif /* !_TARGET_ARCH_THREAD_H_ */ diff --git a/bsd-user/i386/target_arch_vmparam.h b/bsd-user/i386/target_arch_vmparam.h new file mode 100644 index 00000000000..bb7718265b2 --- /dev/null +++ b/bsd-user/i386/target_arch_vmparam.h @@ -0,0 +1,46 @@ +/* + * i386 VM parameters definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_ARCH_VMPARAM_H_ +#define _TARGET_ARCH_VMPARAM_H_ + +#include "cpu.h" + +/* compare to i386/include/vmparam.h */ +#define TARGET_MAXTSIZ (128 * MiB) /* max text size */ +#define TARGET_DFLDSIZ (128 * MiB) /* initial data size limit */ +#define TARGET_MAXDSIZ (512 * MiB) /* max data size */ +#define TARGET_DFLSSIZ (8 * MiB) /* initial stack size limit */ +#define TARGET_MAXSSIZ (64 * MiB) /* max stack size */ +#define TARGET_SGROWSIZ (128 * KiB) /* amount to grow stack */ + +#define TARGET_RESERVED_VA 0xf7000000 + +#define TARGET_USRSTACK (0xbfc00000) + +static inline abi_ulong get_sp_from_cpustate(CPUX86State *state) +{ + return state->regs[R_ESP]; +} + +static inline void set_second_rval(CPUX86State *state, abi_ulong retval2) +{ + state->regs[R_EDX] = retval2; +} + +#endif /* !_TARGET_ARCH_VMPARAM_H_ */ diff --git a/bsd-user/i386/target_signal.h b/bsd-user/i386/target_signal.h deleted file mode 100644 index 2ef36d1f980..00000000000 --- a/bsd-user/i386/target_signal.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef TARGET_SIGNAL_H -#define TARGET_SIGNAL_H - -#include "cpu.h" - -/* this struct defines a stack used during syscall handling */ - -typedef struct target_sigaltstack { - abi_ulong ss_sp; - abi_long ss_flags; - abi_ulong ss_size; -} target_stack_t; - - -static inline abi_ulong get_sp_from_cpustate(CPUX86State *state) -{ - return state->regs[R_ESP]; -} - -#endif /* TARGET_SIGNAL_H */ diff --git a/bsd-user/main.c b/bsd-user/main.c index 798aba512c1..cb5ea402361 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -1,7 +1,8 @@ /* - * qemu user main + * qemu bsd user main * * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2013-14 Stacey Son * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,6 +18,11 @@ * along with this program; if not, see . */ +#include +#include +#include +#include + #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/units.h" @@ -32,632 +38,115 @@ #include "qemu/path.h" #include "qemu/help_option.h" #include "qemu/module.h" -#include "cpu.h" #include "exec/exec-all.h" #include "tcg/tcg.h" #include "qemu/timer.h" #include "qemu/envlist.h" +#include "qemu/cutils.h" #include "exec/log.h" #include "trace/control.h" +#include "crypto/init.h" +#include "qemu/guest-random.h" + +#include "host-os.h" +#include "target_arch_cpu.h" int singlestep; -unsigned long mmap_min_addr; uintptr_t guest_base; bool have_guest_base; +/* + * When running 32-on-64 we should make sure we can fit all of the possible + * guest address space into a contiguous chunk of virtual host memory. + * + * This way we will never overlap with our own libraries or binaries or stack + * or anything else that QEMU maps. + * + * Many cpus reserve the high bit (or more than one for some 64-bit cpus) + * of the address for the kernel. Some cpus rely on this and user space + * uses the high bit(s) for pointer tagging and the like. For them, we + * must preserve the expected address space. + */ +#ifndef MAX_RESERVED_VA +# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS +# if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \ + (TARGET_LONG_BITS == 32 || defined(TARGET_ABI32)) +/* + * There are a number of places where we assign reserved_va to a variable + * of type abi_ulong and expect it to fit. Avoid the last page. + */ +# define MAX_RESERVED_VA (0xfffffffful & TARGET_PAGE_MASK) +# else +# define MAX_RESERVED_VA (1ul << TARGET_VIRT_ADDR_SPACE_BITS) +# endif +# else +# define MAX_RESERVED_VA 0 +# endif +#endif + +/* + * That said, reserving *too* much vm space via mmap can run into problems + * with rlimits, oom due to page table creation, etc. We will still try it, + * if directed by the command-line option, but not by default. + */ +#if HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32 +unsigned long reserved_va = MAX_RESERVED_VA; +#else unsigned long reserved_va; +#endif static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; const char *qemu_uname_release; -extern char **environ; enum BSDType bsd_type; +char qemu_proc_pathname[PATH_MAX]; /* full path to exeutable */ -/* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so - we allocate a bigger stack. Need a better solution, for example - by remapping the process stack directly at the right place */ -unsigned long x86_stack_size = 512 * 1024; +unsigned long target_maxtsiz = TARGET_MAXTSIZ; /* max text size */ +unsigned long target_dfldsiz = TARGET_DFLDSIZ; /* initial data size limit */ +unsigned long target_maxdsiz = TARGET_MAXDSIZ; /* max data size */ +unsigned long target_dflssiz = TARGET_DFLSSIZ; /* initial data size limit */ +unsigned long target_maxssiz = TARGET_MAXSSIZ; /* max stack size */ +unsigned long target_sgrowsiz = TARGET_SGROWSIZ; /* amount to grow stack */ -void gemu_log(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -#if defined(TARGET_I386) -int cpu_get_pic_interrupt(CPUX86State *env) -{ - return -1; -} -#endif +/* Helper routines for implementing atomic operations. */ void fork_start(void) { + start_exclusive(); + cpu_list_lock(); + mmap_fork_start(); } void fork_end(int child) { if (child) { - gdbserver_fork(thread_cpu); - } -} - -#ifdef TARGET_I386 -/***********************************************************/ -/* CPUX86 core interface */ - -uint64_t cpu_get_tsc(CPUX86State *env) -{ - return cpu_get_host_ticks(); -} - -static void write_dt(void *ptr, unsigned long addr, unsigned long limit, - int flags) -{ - unsigned int e1, e2; - uint32_t *p; - e1 = (addr << 16) | (limit & 0xffff); - e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); - e2 |= flags; - p = ptr; - p[0] = tswap32(e1); - p[1] = tswap32(e2); -} - -static uint64_t *idt_table; -#ifdef TARGET_X86_64 -static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, - uint64_t addr, unsigned int sel) -{ - uint32_t *p, e1, e2; - e1 = (addr & 0xffff) | (sel << 16); - e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); - p = ptr; - p[0] = tswap32(e1); - p[1] = tswap32(e2); - p[2] = tswap32(addr >> 32); - p[3] = 0; -} -/* only dpl matters as we do only user space emulation */ -static void set_idt(int n, unsigned int dpl) -{ - set_gate64(idt_table + n * 2, 0, dpl, 0, 0); -} -#else -static void set_gate(void *ptr, unsigned int type, unsigned int dpl, - uint32_t addr, unsigned int sel) -{ - uint32_t *p, e1, e2; - e1 = (addr & 0xffff) | (sel << 16); - e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); - p = ptr; - p[0] = tswap32(e1); - p[1] = tswap32(e2); -} - -/* only dpl matters as we do only user space emulation */ -static void set_idt(int n, unsigned int dpl) -{ - set_gate(idt_table + n, 0, dpl, 0, 0); -} -#endif - -void cpu_loop(CPUX86State *env) -{ - CPUState *cs = env_cpu(env); - int trapnr; - abi_ulong pc; - //target_siginfo_t info; - - for(;;) { - cpu_exec_start(cs); - trapnr = cpu_exec(cs); - cpu_exec_end(cs); - process_queued_cpu_work(cs); - - switch(trapnr) { - case 0x80: - /* syscall from int $0x80 */ - if (bsd_type == target_freebsd) { - abi_ulong params = (abi_ulong) env->regs[R_ESP] + - sizeof(int32_t); - int32_t syscall_nr = env->regs[R_EAX]; - int32_t arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8; - - if (syscall_nr == TARGET_FREEBSD_NR_syscall) { - get_user_s32(syscall_nr, params); - params += sizeof(int32_t); - } else if (syscall_nr == TARGET_FREEBSD_NR___syscall) { - get_user_s32(syscall_nr, params); - params += sizeof(int64_t); - } - get_user_s32(arg1, params); - params += sizeof(int32_t); - get_user_s32(arg2, params); - params += sizeof(int32_t); - get_user_s32(arg3, params); - params += sizeof(int32_t); - get_user_s32(arg4, params); - params += sizeof(int32_t); - get_user_s32(arg5, params); - params += sizeof(int32_t); - get_user_s32(arg6, params); - params += sizeof(int32_t); - get_user_s32(arg7, params); - params += sizeof(int32_t); - get_user_s32(arg8, params); - env->regs[R_EAX] = do_freebsd_syscall(env, - syscall_nr, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8); - } else { //if (bsd_type == target_openbsd) - env->regs[R_EAX] = do_openbsd_syscall(env, - env->regs[R_EAX], - env->regs[R_EBX], - env->regs[R_ECX], - env->regs[R_EDX], - env->regs[R_ESI], - env->regs[R_EDI], - env->regs[R_EBP]); - } - if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) { - env->regs[R_EAX] = -env->regs[R_EAX]; - env->eflags |= CC_C; - } else { - env->eflags &= ~CC_C; - } - break; -#ifndef TARGET_ABI32 - case EXCP_SYSCALL: - /* syscall from syscall instruction */ - if (bsd_type == target_freebsd) - env->regs[R_EAX] = do_freebsd_syscall(env, - env->regs[R_EAX], - env->regs[R_EDI], - env->regs[R_ESI], - env->regs[R_EDX], - env->regs[R_ECX], - env->regs[8], - env->regs[9], 0, 0); - else { //if (bsd_type == target_openbsd) - env->regs[R_EAX] = do_openbsd_syscall(env, - env->regs[R_EAX], - env->regs[R_EDI], - env->regs[R_ESI], - env->regs[R_EDX], - env->regs[10], - env->regs[8], - env->regs[9]); - } - env->eip = env->exception_next_eip; - if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) { - env->regs[R_EAX] = -env->regs[R_EAX]; - env->eflags |= CC_C; - } else { - env->eflags &= ~CC_C; - } - break; -#endif -#if 0 - case EXCP0B_NOSEG: - case EXCP0C_STACK: - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, &info); - break; - case EXCP0D_GPF: - /* XXX: potential problem if ABI32 */ -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_fault(env); - } else -#endif - { - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, &info); - } - break; - case EXCP0E_PAGE: - info.si_signo = SIGSEGV; - info.si_errno = 0; - if (!(env->error_code & 1)) - info.si_code = TARGET_SEGV_MAPERR; - else - info.si_code = TARGET_SEGV_ACCERR; - info._sifields._sigfault._addr = env->cr[2]; - queue_signal(env, info.si_signo, &info); - break; - case EXCP00_DIVZ: -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_trap(env, trapnr); - } else -#endif - { - /* division by zero */ - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = TARGET_FPE_INTDIV; - info._sifields._sigfault._addr = env->eip; - queue_signal(env, info.si_signo, &info); + CPUState *cpu, *next_cpu; + /* + * Child processes created by fork() only have a single thread. Discard + * information about the parent threads. + */ + CPU_FOREACH_SAFE(cpu, next_cpu) { + if (cpu != thread_cpu) { + QTAILQ_REMOVE_RCU(&cpus, cpu, node); } - break; - case EXCP01_DB: - case EXCP03_INT3: -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_trap(env, trapnr); - } else -#endif - { - info.si_signo = SIGTRAP; - info.si_errno = 0; - if (trapnr == EXCP01_DB) { - info.si_code = TARGET_TRAP_BRKPT; - info._sifields._sigfault._addr = env->eip; - } else { - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - } - queue_signal(env, info.si_signo, &info); - } - break; - case EXCP04_INTO: - case EXCP05_BOUND: -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_trap(env, trapnr); - } else -#endif - { - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SI_KERNEL; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, &info); - } - break; - case EXCP06_ILLOP: - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = TARGET_ILL_ILLOPN; - info._sifields._sigfault._addr = env->eip; - queue_signal(env, info.si_signo, &info); - break; -#endif - case EXCP_INTERRUPT: - /* just indicate that signals should be handled asap */ - break; -#if 0 - case EXCP_DEBUG: - { - int sig; - - sig = gdb_handlesig (env, TARGET_SIGTRAP); - if (sig) - { - info.si_signo = sig; - info.si_errno = 0; - info.si_code = TARGET_TRAP_BRKPT; - queue_signal(env, info.si_signo, &info); - } - } - break; -#endif - default: - pc = env->segs[R_CS].base + env->eip; - fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", - (long)pc, trapnr); - abort(); } - process_pending_signals(env); - } -} -#endif - -#ifdef TARGET_SPARC -#define SPARC64_STACK_BIAS 2047 - -//#define DEBUG_WIN -/* WARNING: dealing with register windows _is_ complicated. More info - can be found at http://www.sics.se/~psm/sparcstack.html */ -static inline int get_reg_index(CPUSPARCState *env, int cwp, int index) -{ - index = (index + cwp * 16) % (16 * env->nwindows); - /* wrap handling : if cwp is on the last window, then we use the - registers 'after' the end */ - if (index < 8 && env->cwp == env->nwindows - 1) - index += 16 * env->nwindows; - return index; -} - -/* save the register window 'cwp1' */ -static inline void save_window_offset(CPUSPARCState *env, int cwp1) -{ - unsigned int i; - abi_ulong sp_ptr; - - sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)]; -#ifdef TARGET_SPARC64 - if (sp_ptr & 3) - sp_ptr += SPARC64_STACK_BIAS; -#endif -#if defined(DEBUG_WIN) - printf("win_overflow: sp_ptr=0x" TARGET_ABI_FMT_lx " save_cwp=%d\n", - sp_ptr, cwp1); -#endif - for(i = 0; i < 16; i++) { - /* FIXME - what to do if put_user() fails? */ - put_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr); - sp_ptr += sizeof(abi_ulong); - } -} - -static void save_window(CPUSPARCState *env) -{ -#ifndef TARGET_SPARC64 - unsigned int new_wim; - new_wim = ((env->wim >> 1) | (env->wim << (env->nwindows - 1))) & - ((1LL << env->nwindows) - 1); - save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2)); - env->wim = new_wim; -#else - /* - * cansave is zero if the spill trap handler is triggered by `save` and - * nonzero if triggered by a `flushw` - */ - save_window_offset(env, cpu_cwp_dec(env, env->cwp - env->cansave - 2)); - env->cansave++; - env->canrestore--; -#endif -} - -static void restore_window(CPUSPARCState *env) -{ -#ifndef TARGET_SPARC64 - unsigned int new_wim; -#endif - unsigned int i, cwp1; - abi_ulong sp_ptr; - -#ifndef TARGET_SPARC64 - new_wim = ((env->wim << 1) | (env->wim >> (env->nwindows - 1))) & - ((1LL << env->nwindows) - 1); -#endif - - /* restore the invalid window */ - cwp1 = cpu_cwp_inc(env, env->cwp + 1); - sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)]; -#ifdef TARGET_SPARC64 - if (sp_ptr & 3) - sp_ptr += SPARC64_STACK_BIAS; -#endif -#if defined(DEBUG_WIN) - printf("win_underflow: sp_ptr=0x" TARGET_ABI_FMT_lx " load_cwp=%d\n", - sp_ptr, cwp1); -#endif - for(i = 0; i < 16; i++) { - /* FIXME - what to do if get_user() fails? */ - get_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr); - sp_ptr += sizeof(abi_ulong); + mmap_fork_end(child); + /* + * qemu_init_cpu_list() takes care of reinitializing the exclusive + * state, so we don't need to end_exclusive() here. + */ + qemu_init_cpu_list(); + gdbserver_fork(thread_cpu); + } else { + mmap_fork_end(child); + cpu_list_unlock(); + end_exclusive(); } -#ifdef TARGET_SPARC64 - env->canrestore++; - if (env->cleanwin < env->nwindows - 1) - env->cleanwin++; - env->cansave--; -#else - env->wim = new_wim; -#endif } -static void flush_windows(CPUSPARCState *env) +void cpu_loop(CPUArchState *env) { - int offset, cwp1; - - offset = 1; - for(;;) { - /* if restore would invoke restore_window(), then we can stop */ - cwp1 = cpu_cwp_inc(env, env->cwp + offset); -#ifndef TARGET_SPARC64 - if (env->wim & (1 << cwp1)) - break; -#else - if (env->canrestore == 0) - break; - env->cansave++; - env->canrestore--; -#endif - save_window_offset(env, cwp1); - offset++; - } - cwp1 = cpu_cwp_inc(env, env->cwp + 1); -#ifndef TARGET_SPARC64 - /* set wim so that restore will reload the registers */ - env->wim = 1 << cwp1; -#endif -#if defined(DEBUG_WIN) - printf("flush_windows: nb=%d\n", offset - 1); -#endif + target_cpu_loop(env); } -void cpu_loop(CPUSPARCState *env) -{ - CPUState *cs = env_cpu(env); - int trapnr, ret, syscall_nr; - //target_siginfo_t info; - - while (1) { - cpu_exec_start(cs); - trapnr = cpu_exec(cs); - cpu_exec_end(cs); - process_queued_cpu_work(cs); - - switch (trapnr) { -#ifndef TARGET_SPARC64 - case 0x80: -#else - /* FreeBSD uses 0x141 for syscalls too */ - case 0x141: - if (bsd_type != target_freebsd) - goto badtrap; - /* fallthrough */ - case 0x100: -#endif - syscall_nr = env->gregs[1]; - if (bsd_type == target_freebsd) - ret = do_freebsd_syscall(env, syscall_nr, - env->regwptr[0], env->regwptr[1], - env->regwptr[2], env->regwptr[3], - env->regwptr[4], env->regwptr[5], 0, 0); - else if (bsd_type == target_netbsd) - ret = do_netbsd_syscall(env, syscall_nr, - env->regwptr[0], env->regwptr[1], - env->regwptr[2], env->regwptr[3], - env->regwptr[4], env->regwptr[5]); - else { //if (bsd_type == target_openbsd) -#if defined(TARGET_SPARC64) - syscall_nr &= ~(TARGET_OPENBSD_SYSCALL_G7RFLAG | - TARGET_OPENBSD_SYSCALL_G2RFLAG); -#endif - ret = do_openbsd_syscall(env, syscall_nr, - env->regwptr[0], env->regwptr[1], - env->regwptr[2], env->regwptr[3], - env->regwptr[4], env->regwptr[5]); - } - if ((unsigned int)ret >= (unsigned int)(-515)) { - ret = -ret; -#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32) - env->xcc |= PSR_CARRY; -#else - env->psr |= PSR_CARRY; -#endif - } else { -#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32) - env->xcc &= ~PSR_CARRY; -#else - env->psr &= ~PSR_CARRY; -#endif - } - env->regwptr[0] = ret; - /* next instruction */ -#if defined(TARGET_SPARC64) - if (bsd_type == target_openbsd && - env->gregs[1] & TARGET_OPENBSD_SYSCALL_G2RFLAG) { - env->pc = env->gregs[2]; - env->npc = env->pc + 4; - } else if (bsd_type == target_openbsd && - env->gregs[1] & TARGET_OPENBSD_SYSCALL_G7RFLAG) { - env->pc = env->gregs[7]; - env->npc = env->pc + 4; - } else { - env->pc = env->npc; - env->npc = env->npc + 4; - } -#else - env->pc = env->npc; - env->npc = env->npc + 4; -#endif - break; - case 0x83: /* flush windows */ -#ifdef TARGET_ABI32 - case 0x103: -#endif - flush_windows(env); - /* next instruction */ - env->pc = env->npc; - env->npc = env->npc + 4; - break; -#ifndef TARGET_SPARC64 - case TT_WIN_OVF: /* window overflow */ - save_window(env); - break; - case TT_WIN_UNF: /* window underflow */ - restore_window(env); - break; - case TT_TFAULT: - case TT_DFAULT: -#if 0 - { - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->mmuregs[4]; - queue_signal(env, info.si_signo, &info); - } -#endif - break; -#else - case TT_SPILL: /* window overflow */ - save_window(env); - break; - case TT_FILL: /* window underflow */ - restore_window(env); - break; - case TT_TFAULT: - case TT_DFAULT: -#if 0 - { - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - if (trapnr == TT_DFAULT) - info._sifields._sigfault._addr = env->dmmuregs[4]; - else - info._sifields._sigfault._addr = env->tsptr->tpc; - //queue_signal(env, info.si_signo, &info); - } -#endif - break; -#endif - case EXCP_INTERRUPT: - /* just indicate that signals should be handled asap */ - break; - case EXCP_DEBUG: - { -#if 0 - int sig = -#endif - gdb_handlesig(cs, TARGET_SIGTRAP); -#if 0 - if (sig) - { - info.si_signo = sig; - info.si_errno = 0; - info.si_code = TARGET_TRAP_BRKPT; - //queue_signal(env, info.si_signo, &info); - } -#endif - } - break; - default: -#ifdef TARGET_SPARC64 - badtrap: -#endif - printf ("Unhandled trap: 0x%x\n", trapnr); - cpu_dump_state(cs, stderr, 0); - exit (1); - } - process_pending_signals (env); - } -} - -#endif - static void usage(void) { printf("qemu-" TARGET_NAME " version " QEMU_FULL_VERSION @@ -681,7 +170,6 @@ static void usage(void) "-d item1[,...] enable logging of specified items\n" " (use '-d help' for a list of log items)\n" "-D logfile write logs to 'logfile' (default stderr)\n" - "-p pagesize set the host page size to 'pagesize'\n" "-singlestep always run in singlestep mode\n" "-strace log system calls\n" "-trace [[enable=]][,events=][,file=]\n" @@ -701,11 +189,20 @@ static void usage(void) , TARGET_NAME, interp_prefix, - x86_stack_size); + target_dflssiz); exit(1); } -THREAD CPUState *thread_cpu; +__thread CPUState *thread_cpu; + +void stop_all_tasks(void) +{ + /* + * We trust when using NPTL (pthreads) start_exclusive() handles thread + * stopping correctly. + */ + start_exclusive(); +} bool qemu_cpu_is_self(CPUState *cpu) { @@ -722,7 +219,6 @@ void init_task_state(TaskState *ts) { int i; - ts->used = 1; ts->first_free = ts->sigqueue_table; for (i = 0; i < MAX_SIGQUEUE_SIZE - 1; i++) { ts->sigqueue_table[i].next = &ts->sigqueue_table[i + 1]; @@ -730,6 +226,48 @@ void init_task_state(TaskState *ts) ts->sigqueue_table[i].next = NULL; } +void gemu_log(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static void +adjust_ssize(void) +{ + struct rlimit rl; + + if (getrlimit(RLIMIT_STACK, &rl) != 0) { + return; + } + + target_maxssiz = MIN(target_maxssiz, rl.rlim_max); + target_dflssiz = MIN(MAX(target_dflssiz, rl.rlim_cur), target_maxssiz); + + rl.rlim_max = target_maxssiz; + rl.rlim_cur = target_dflssiz; + setrlimit(RLIMIT_STACK, &rl); +} + +static void save_proc_pathname(char *argv0) +{ + int mib[4]; + size_t len; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PATHNAME; + mib[3] = -1; + + len = sizeof(qemu_proc_pathname); + if (sysctl(mib, 4, qemu_proc_pathname, &len, NULL, 0)) { + perror("sysctl"); + } +} + int main(int argc, char **argv) { const char *filename; @@ -737,20 +275,28 @@ int main(int argc, char **argv) const char *cpu_type; const char *log_file = NULL; const char *log_mask = NULL; + const char *seed_optarg = NULL; struct target_pt_regs regs1, *regs = ®s1; struct image_info info1, *info = &info1; - TaskState ts1, *ts = &ts1; + struct bsd_binprm bprm; + TaskState *ts; CPUArchState *env; CPUState *cpu; - int optind; + int optind, rv; const char *r; const char *gdbstub = NULL; char **target_environ, **wrk; envlist_t *envlist = NULL; - bsd_type = target_openbsd; + bsd_type = HOST_DEFAULT_BSD_TYPE; + char *argv0 = NULL; + + adjust_ssize(); - if (argc <= 1) + if (argc <= 1) { usage(); + } + + save_proc_pathname(argv[0]); error_init(argv[0]); module_call_init(MODULE_INIT_TRACE); @@ -770,11 +316,13 @@ int main(int argc, char **argv) optind = 1; for (;;) { - if (optind >= argc) + if (optind >= argc) { break; + } r = argv[optind]; - if (r[0] != '-') + if (r[0] != '-') { break; + } optind++; r++; if (!strcmp(r, "-")) { @@ -791,24 +339,31 @@ int main(int argc, char **argv) log_file = argv[optind++]; } else if (!strcmp(r, "E")) { r = argv[optind++]; - if (envlist_setenv(envlist, r) != 0) + if (envlist_setenv(envlist, r) != 0) { usage(); + } } else if (!strcmp(r, "ignore-environment")) { envlist_free(envlist); envlist = envlist_create(); } else if (!strcmp(r, "U")) { r = argv[optind++]; - if (envlist_unsetenv(envlist, r) != 0) + if (envlist_unsetenv(envlist, r) != 0) { usage(); + } } else if (!strcmp(r, "s")) { r = argv[optind++]; - x86_stack_size = strtol(r, (char **)&r, 0); - if (x86_stack_size <= 0) + rv = qemu_strtoul(r, &r, 0, &target_dflssiz); + if (rv < 0 || target_dflssiz <= 0) { usage(); - if (*r == 'M') - x86_stack_size *= MiB; - else if (*r == 'k' || *r == 'K') - x86_stack_size *= KiB; + } + if (*r == 'M') { + target_dflssiz *= 1024 * 1024; + } else if (*r == 'k' || *r == 'K') { + target_dflssiz *= 1024; + } + if (target_dflssiz > target_maxssiz) { + usage(); + } } else if (!strcmp(r, "L")) { interp_prefix = argv[optind++]; } else if (!strcmp(r, "p")) { @@ -825,15 +380,18 @@ int main(int argc, char **argv) } else if (!strcmp(r, "cpu")) { cpu_model = argv[optind++]; if (is_help_option(cpu_model)) { -/* XXX: implement xxx_cpu_list for targets that still miss it */ + /* XXX: implement xxx_cpu_list for targets that still miss it */ #if defined(cpu_list) - cpu_list(); + cpu_list(); #endif exit(1); } } else if (!strcmp(r, "B")) { - guest_base = strtol(argv[optind++], NULL, 0); - have_guest_base = true; + rv = qemu_strtoul(argv[optind++], NULL, 0, &guest_base); + if (rv < 0) { + usage(); + } + have_guest_base = true; } else if (!strcmp(r, "drop-ld-preload")) { (void) envlist_unsetenv(envlist, "LD_PRELOAD"); } else if (!strcmp(r, "bsd")) { @@ -847,12 +405,16 @@ int main(int argc, char **argv) usage(); } optind++; + } else if (!strcmp(r, "seed")) { + seed_optarg = optarg; } else if (!strcmp(r, "singlestep")) { singlestep = 1; } else if (!strcmp(r, "strace")) { do_strace = 1; } else if (!strcmp(r, "trace")) { trace_opt_parse(optarg); + } else if (!strcmp(r, "0")) { + argv0 = argv[optind++]; } else { usage(); } @@ -876,6 +438,9 @@ int main(int argc, char **argv) usage(); } filename = argv[optind]; + if (argv0) { + argv[optind] = argv0; + } if (!trace_init_backends()) { exit(1); @@ -885,6 +450,9 @@ int main(int argc, char **argv) /* Zero out regs */ memset(regs, 0, sizeof(struct target_pt_regs)); + /* Zero bsd params */ + memset(&bprm, 0, sizeof(bprm)); + /* Zero out image_info */ memset(info, 0, sizeof(struct image_info)); @@ -892,36 +460,21 @@ int main(int argc, char **argv) init_paths(interp_prefix); if (cpu_model == NULL) { -#if defined(TARGET_I386) -#ifdef TARGET_X86_64 - cpu_model = "qemu64"; -#else - cpu_model = "qemu32"; -#endif -#elif defined(TARGET_SPARC) -#ifdef TARGET_SPARC64 - cpu_model = "TI UltraSparc II"; -#else - cpu_model = "Fujitsu MB86904"; -#endif -#else - cpu_model = "any"; -#endif + cpu_model = TARGET_DEFAULT_CPU_MODEL; } cpu_type = parse_cpu_option(cpu_model); + /* init tcg before creating CPUs and to get qemu_host_page_size */ { AccelClass *ac = ACCEL_GET_CLASS(current_accel()); - ac->init_machine(NULL); accel_init_interfaces(ac); + ac->init_machine(NULL); } cpu = cpu_create(cpu_type); env = cpu->env_ptr; -#if defined(TARGET_SPARC) || defined(TARGET_PPC) cpu_reset(cpu); -#endif thread_cpu = cpu; if (getenv("QEMU_STRACE")) { @@ -931,34 +484,31 @@ int main(int argc, char **argv) target_environ = envlist_to_environ(envlist, NULL); envlist_free(envlist); + if (reserved_va) { + mmap_next_start = reserved_va; + } + + { + Error *err = NULL; + if (seed_optarg != NULL) { + qemu_guest_random_seed_main(seed_optarg, &err); + } else { + qcrypto_init(&err); + } + if (err) { + error_reportf_err(err, "cannot initialize crypto: "); + exit(1); + } + } + /* - * Now that page sizes are configured in tcg_exec_init() we can do + * Now that page sizes are configured we can do * proper page alignment for guest_base. */ guest_base = HOST_PAGE_ALIGN(guest_base); - /* - * Read in mmap_min_addr kernel parameter. This value is used - * When loading the ELF image to determine whether guest_base - * is needed. - * - * When user has explicitly set the quest base, we skip this - * test. - */ - if (!have_guest_base) { - FILE *fp; - - if ((fp = fopen("/proc/sys/vm/mmap_min_addr", "r")) != NULL) { - unsigned long tmp; - if (fscanf(fp, "%lu", &tmp) == 1) { - mmap_min_addr = tmp; - qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx\n", mmap_min_addr); - } - fclose(fp); - } - } - - if (loader_exec(filename, argv+optind, target_environ, regs, info) != 0) { + if (loader_exec(filename, argv + optind, target_environ, regs, info, + &bprm) != 0) { printf("Error loading %s\n", filename); _exit(1); } @@ -986,151 +536,25 @@ int main(int argc, char **argv) qemu_log("entry 0x" TARGET_ABI_FMT_lx "\n", info->entry); } - target_set_brk(info->brk); - syscall_init(); - signal_init(); - - /* Now that we've loaded the binary, GUEST_BASE is fixed. Delay - generating the prologue until now so that the prologue can take - the real value of GUEST_BASE into account. */ - tcg_prologue_init(tcg_ctx); - tcg_region_init(); - /* build Task State */ - memset(ts, 0, sizeof(TaskState)); + ts = g_new0(TaskState, 1); init_task_state(ts); ts->info = info; + ts->bprm = &bprm; cpu->opaque = ts; -#if defined(TARGET_I386) - env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; - env->hflags |= HF_PE_MASK | HF_CPL_MASK; - if (env->features[FEAT_1_EDX] & CPUID_SSE) { - env->cr[4] |= CR4_OSFXSR_MASK; - env->hflags |= HF_OSFXSR_MASK; - } -#ifndef TARGET_ABI32 - /* enable 64 bit mode if possible */ - if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { - fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); - exit(1); - } - env->cr[4] |= CR4_PAE_MASK; - env->efer |= MSR_EFER_LMA | MSR_EFER_LME; - env->hflags |= HF_LMA_MASK; -#endif - - /* flags setup : we activate the IRQs by default as in user mode */ - env->eflags |= IF_MASK; - - /* linux register setup */ -#ifndef TARGET_ABI32 - env->regs[R_EAX] = regs->rax; - env->regs[R_EBX] = regs->rbx; - env->regs[R_ECX] = regs->rcx; - env->regs[R_EDX] = regs->rdx; - env->regs[R_ESI] = regs->rsi; - env->regs[R_EDI] = regs->rdi; - env->regs[R_EBP] = regs->rbp; - env->regs[R_ESP] = regs->rsp; - env->eip = regs->rip; -#else - env->regs[R_EAX] = regs->eax; - env->regs[R_EBX] = regs->ebx; - env->regs[R_ECX] = regs->ecx; - env->regs[R_EDX] = regs->edx; - env->regs[R_ESI] = regs->esi; - env->regs[R_EDI] = regs->edi; - env->regs[R_EBP] = regs->ebp; - env->regs[R_ESP] = regs->esp; - env->eip = regs->eip; -#endif + target_set_brk(info->brk); + syscall_init(); + signal_init(); - /* linux interrupt setup */ -#ifndef TARGET_ABI32 - env->idt.limit = 511; -#else - env->idt.limit = 255; -#endif - env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), - PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - idt_table = g2h_untagged(env->idt.base); - set_idt(0, 0); - set_idt(1, 0); - set_idt(2, 0); - set_idt(3, 3); - set_idt(4, 3); - set_idt(5, 0); - set_idt(6, 0); - set_idt(7, 0); - set_idt(8, 0); - set_idt(9, 0); - set_idt(10, 0); - set_idt(11, 0); - set_idt(12, 0); - set_idt(13, 0); - set_idt(14, 0); - set_idt(15, 0); - set_idt(16, 0); - set_idt(17, 0); - set_idt(18, 0); - set_idt(19, 0); - set_idt(0x80, 3); - - /* linux segment setup */ - { - uint64_t *gdt_table; - env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, - PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; - gdt_table = g2h_untagged(env->gdt.base); -#ifdef TARGET_ABI32 - write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, - DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | - (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); -#else - /* 64 bit code segment */ - write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, - DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | - DESC_L_MASK | - (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); -#endif - write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, - DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | - (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); - } + /* + * Now that we've loaded the binary, GUEST_BASE is fixed. Delay + * generating the prologue until now so that the prologue can take + * the real value of GUEST_BASE into account. + */ + tcg_prologue_init(tcg_ctx); - cpu_x86_load_seg(env, R_CS, __USER_CS); - cpu_x86_load_seg(env, R_SS, __USER_DS); -#ifdef TARGET_ABI32 - cpu_x86_load_seg(env, R_DS, __USER_DS); - cpu_x86_load_seg(env, R_ES, __USER_DS); - cpu_x86_load_seg(env, R_FS, __USER_DS); - cpu_x86_load_seg(env, R_GS, __USER_DS); - /* This hack makes Wine work... */ - env->segs[R_FS].selector = 0; -#else - cpu_x86_load_seg(env, R_DS, 0); - cpu_x86_load_seg(env, R_ES, 0); - cpu_x86_load_seg(env, R_FS, 0); - cpu_x86_load_seg(env, R_GS, 0); -#endif -#elif defined(TARGET_SPARC) - { - int i; - env->pc = regs->pc; - env->npc = regs->npc; - env->y = regs->y; - for(i = 0; i < 8; i++) - env->gregs[i] = regs->u_regs[i]; - for(i = 0; i < 8; i++) - env->regwptr[i] = regs->u_regs[i + 8]; - } -#else -#error unsupported target CPU -#endif + target_cpu_init(env, regs); if (gdbstub) { gdbserver_start(gdbstub); diff --git a/bsd-user/meson.build b/bsd-user/meson.build index 03695493408..87885d91edc 100644 --- a/bsd-user/meson.build +++ b/bsd-user/meson.build @@ -1,3 +1,7 @@ +if not have_bsd_user + subdir_done() +endif + bsd_user_ss.add(files( 'bsdload.c', 'elfload.c', @@ -8,3 +12,6 @@ bsd_user_ss.add(files( 'syscall.c', 'uaccess.c', )) + +# Pull in the OS-specific build glue, if any +subdir(targetos) diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c index 01ec8080038..13cb32dba13 100644 --- a/bsd-user/mmap.c +++ b/bsd-user/mmap.c @@ -20,10 +20,6 @@ #include "qemu.h" #include "qemu-common.h" -#include "bsd-mman.h" -#include "exec/exec-all.h" - -//#define DEBUG_MMAP static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; static __thread int mmap_lock_count; @@ -69,14 +65,11 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot) abi_ulong end, host_start, host_end, addr; int prot1, ret; -#ifdef DEBUG_MMAP - printf("mprotect: start=0x" TARGET_FMT_lx - " len=0x" TARGET_FMT_lx " prot=%c%c%c\n", start, len, - prot & PROT_READ ? 'r' : '-', - prot & PROT_WRITE ? 'w' : '-', - prot & PROT_EXEC ? 'x' : '-'); -#endif - + qemu_log_mask(CPU_LOG_PAGE, "mprotect: start=0x" TARGET_ABI_FMT_lx + " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len, + prot & PROT_READ ? 'r' : '-', + prot & PROT_WRITE ? 'w' : '-', + prot & PROT_EXEC ? 'x' : '-'); if ((start & ~TARGET_PAGE_MASK) != 0) return -EINVAL; len = TARGET_PAGE_ALIGN(len); @@ -93,11 +86,11 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot) if (start > host_start) { /* handle host page containing start */ prot1 = prot; - for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) { + for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) { prot1 |= page_get_flags(addr); } if (host_end == host_start + qemu_host_page_size) { - for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { + for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { prot1 |= page_get_flags(addr); } end = host_end; @@ -110,7 +103,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot) } if (end < host_end) { prot1 = prot; - for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { + for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) { prot1 |= page_get_flags(addr); } ret = mprotect(g2h_untagged(host_end - qemu_host_page_size), @@ -134,7 +127,27 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot) return ret; } -/* map an incomplete host page */ +/* + * map an incomplete host page + * + * mmap_frag can be called with a valid fd, if flags doesn't contain one of + * MAP_ANON, MAP_STACK, MAP_GUARD. If we need to map a page in those cases, we + * pass fd == -1. However, if flags contains MAP_GUARD then MAP_ANON cannot be + * added. + * + * * If fd is valid (not -1) we want to map the pages with MAP_ANON. + * * If flags contains MAP_GUARD we don't want to add MAP_ANON because it + * will be rejected. See kern_mmap's enforcing of constraints for MAP_GUARD + * in sys/vm/vm_mmap.c. + * * If flags contains MAP_ANON it doesn't matter if we add it or not. + * * If flags contains MAP_STACK, mmap adds MAP_ANON when called so doesn't + * matter if we add it or not either. See enforcing of constraints for + * MAP_STACK in kern_mmap. + * + * Don't add MAP_ANON for the flags that use fd == -1 without specifying the + * flags directly, with the assumption that future flags that require fd == -1 + * will also not require MAP_ANON. + */ static int mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong end, int prot, int flags, int fd, abi_ulong offset) @@ -148,15 +161,15 @@ static int mmap_frag(abi_ulong real_start, /* get the protection of the target pages outside the mapping */ prot1 = 0; - for(addr = real_start; addr < real_end; addr++) { + for (addr = real_start; addr < real_end; addr++) { if (addr < start || addr >= end) prot1 |= page_get_flags(addr); } if (prot1 == 0) { - /* no page was there, so we allocate one */ + /* no page was there, so we allocate one. See also above. */ void *p = mmap(host_start, qemu_host_page_size, prot, - flags | MAP_ANON, -1, 0); + flags | ((fd != -1) ? MAP_ANON : 0), -1, 0); if (p == MAP_FAILED) return -1; prot1 = prot; @@ -164,7 +177,7 @@ static int mmap_frag(abi_ulong real_start, prot1 &= PAGE_BITS; prot_new = prot | prot1; - if (!(flags & MAP_ANON)) { + if (fd != -1) { /* msync() won't work here, so we return an error if write is possible while it is a shared mapping */ if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED && @@ -176,144 +189,379 @@ static int mmap_frag(abi_ulong real_start, mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE); /* read the corresponding file data */ - pread(fd, g2h_untagged(start), end - start, offset); + if (pread(fd, g2h_untagged(start), end - start, offset) == -1) { + return -1; + } /* put final protection */ if (prot_new != (prot1 | PROT_WRITE)) mprotect(host_start, qemu_host_page_size, prot_new); } else { - /* just update the protection */ if (prot_new != prot1) { mprotect(host_start, qemu_host_page_size, prot_new); } + if (prot_new & PROT_WRITE) { + memset(g2h_untagged(start), 0, end - start); + } } return 0; } -static abi_ulong mmap_next_start = 0x40000000; +#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 +# define TASK_UNMAPPED_BASE (1ul << 38) +#else +# define TASK_UNMAPPED_BASE 0x40000000 +#endif +abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; unsigned long last_brk; -/* find a free memory area of size 'size'. The search starts at - 'start'. If 'start' == 0, then a default start address is used. - Return -1 if error. -*/ -/* page_init() marks pages used by the host as reserved to be sure not - to use them. */ -static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size) +/* + * Subroutine of mmap_find_vma, used when we have pre-allocated a chunk of guest + * address space. + */ +static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, + abi_ulong alignment) { - abi_ulong addr, addr1, addr_start; + abi_ulong addr; + abi_ulong end_addr; int prot; - unsigned long new_brk; + int looped = 0; + + if (size > reserved_va) { + return (abi_ulong)-1; + } + + size = HOST_PAGE_ALIGN(size) + alignment; + end_addr = start + size; + if (end_addr > reserved_va) { + end_addr = reserved_va; + } + addr = end_addr - qemu_host_page_size; - new_brk = (unsigned long)sbrk(0); - if (last_brk && last_brk < new_brk && last_brk == (target_ulong)last_brk) { - /* This is a hack to catch the host allocating memory with brk(). - If it uses mmap then we loose. - FIXME: We really want to avoid the host allocating memory in - the first place, and maybe leave some slack to avoid switching - to mmap. */ - page_set_flags(last_brk & TARGET_PAGE_MASK, - TARGET_PAGE_ALIGN(new_brk), - PAGE_RESERVED); + while (1) { + if (addr > end_addr) { + if (looped) { + return (abi_ulong)-1; + } + end_addr = reserved_va; + addr = end_addr - qemu_host_page_size; + looped = 1; + continue; + } + prot = page_get_flags(addr); + if (prot) { + end_addr = addr; + } + if (end_addr - addr >= size) { + break; + } + addr -= qemu_host_page_size; + } + + if (start == mmap_next_start) { + mmap_next_start = addr; + } + /* addr is sufficiently low to align it up */ + if (alignment != 0) { + addr = (addr + alignment) & ~(alignment - 1); + } + return addr; +} + +/* + * Find and reserve a free memory area of size 'size'. The search + * starts at 'start'. + * It must be called with mmap_lock() held. + * Return -1 if error. + */ +static abi_ulong mmap_find_vma_aligned(abi_ulong start, abi_ulong size, + abi_ulong alignment) +{ + void *ptr, *prev; + abi_ulong addr; + int flags; + int wrapped, repeat; + + /* If 'start' == 0, then a default start address is used. */ + if (start == 0) { + start = mmap_next_start; + } else { + start &= qemu_host_page_mask; } - last_brk = new_brk; size = HOST_PAGE_ALIGN(size); - start = start & qemu_host_page_mask; + + if (reserved_va) { + return mmap_find_vma_reserved(start, size, + (alignment != 0 ? 1 << alignment : 0)); + } + addr = start; - if (addr == 0) - addr = mmap_next_start; - addr_start = addr; - for(;;) { - prot = 0; - for(addr1 = addr; addr1 < (addr + size); addr1 += TARGET_PAGE_SIZE) { - prot |= page_get_flags(addr1); + wrapped = repeat = 0; + prev = 0; + flags = MAP_ANON | MAP_PRIVATE; + if (alignment != 0) { + flags |= MAP_ALIGNED(alignment); + } + + for (;; prev = ptr) { + /* + * Reserve needed memory area to avoid a race. + * It should be discarded using: + * - mmap() with MAP_FIXED flag + * - mremap() with MREMAP_FIXED flag + * - shmat() with SHM_REMAP flag + */ + ptr = mmap(g2h_untagged(addr), size, PROT_NONE, + flags, -1, 0); + + /* ENOMEM, if host address space has no memory */ + if (ptr == MAP_FAILED) { + return (abi_ulong)-1; } - if (prot == 0) - break; - addr += qemu_host_page_size; - /* we found nothing */ - if (addr == addr_start) + + /* + * Count the number of sequential returns of the same address. + * This is used to modify the search algorithm below. + */ + repeat = (ptr == prev ? repeat + 1 : 0); + + if (h2g_valid(ptr + size - 1)) { + addr = h2g(ptr); + + if ((addr & ~TARGET_PAGE_MASK) == 0) { + /* Success. */ + if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { + mmap_next_start = addr + size; + } + return addr; + } + + /* The address is not properly aligned for the target. */ + switch (repeat) { + case 0: + /* + * Assume the result that the kernel gave us is the + * first with enough free space, so start again at the + * next higher target page. + */ + addr = TARGET_PAGE_ALIGN(addr); + break; + case 1: + /* + * Sometimes the kernel decides to perform the allocation + * at the top end of memory instead. + */ + addr &= TARGET_PAGE_MASK; + break; + case 2: + /* Start over at low memory. */ + addr = 0; + break; + default: + /* Fail. This unaligned block must the last. */ + addr = -1; + break; + } + } else { + /* + * Since the result the kernel gave didn't fit, start + * again at low memory. If any repetition, fail. + */ + addr = (repeat ? -1 : 0); + } + + /* Unmap and try again. */ + munmap(ptr, size); + + /* ENOMEM if we checked the whole of the target address space. */ + if (addr == (abi_ulong)-1) { return (abi_ulong)-1; + } else if (addr == 0) { + if (wrapped) { + return (abi_ulong)-1; + } + wrapped = 1; + /* + * Don't actually use 0 when wrapping, instead indicate + * that we'd truly like an allocation in low memory. + */ + addr = TARGET_PAGE_SIZE; + } else if (wrapped && addr >= start) { + return (abi_ulong)-1; + } } - if (start == 0) - mmap_next_start = addr + size; - return addr; +} + +abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size) +{ + return mmap_find_vma_aligned(start, size, 0); } /* NOTE: all the constants are the HOST ones */ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, - int flags, int fd, abi_ulong offset) + int flags, int fd, off_t offset) { abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len; - unsigned long host_start; mmap_lock(); -#ifdef DEBUG_MMAP - { - printf("mmap: start=0x" TARGET_FMT_lx - " len=0x" TARGET_FMT_lx " prot=%c%c%c flags=", - start, len, - prot & PROT_READ ? 'r' : '-', - prot & PROT_WRITE ? 'w' : '-', - prot & PROT_EXEC ? 'x' : '-'); - if (flags & MAP_FIXED) - printf("MAP_FIXED "); - if (flags & MAP_ANON) - printf("MAP_ANON "); - switch(flags & TARGET_BSD_MAP_FLAGMASK) { - case MAP_PRIVATE: - printf("MAP_PRIVATE "); - break; - case MAP_SHARED: - printf("MAP_SHARED "); - break; - default: - printf("[MAP_FLAGMASK=0x%x] ", flags & TARGET_BSD_MAP_FLAGMASK); - break; + if (qemu_loglevel_mask(CPU_LOG_PAGE)) { + qemu_log("mmap: start=0x" TARGET_ABI_FMT_lx + " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=", + start, len, + prot & PROT_READ ? 'r' : '-', + prot & PROT_WRITE ? 'w' : '-', + prot & PROT_EXEC ? 'x' : '-'); + if (flags & MAP_ALIGNMENT_MASK) { + qemu_log("MAP_ALIGNED(%u) ", + (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT); + } + if (flags & MAP_GUARD) { + qemu_log("MAP_GUARD "); + } + if (flags & MAP_FIXED) { + qemu_log("MAP_FIXED "); + } + if (flags & MAP_ANON) { + qemu_log("MAP_ANON "); } - printf("fd=%d offset=" TARGET_FMT_lx "\n", fd, offset); + if (flags & MAP_EXCL) { + qemu_log("MAP_EXCL "); + } + if (flags & MAP_PRIVATE) { + qemu_log("MAP_PRIVATE "); + } + if (flags & MAP_SHARED) { + qemu_log("MAP_SHARED "); + } + if (flags & MAP_NOCORE) { + qemu_log("MAP_NOCORE "); + } + if (flags & MAP_STACK) { + qemu_log("MAP_STACK "); + } + qemu_log("fd=%d offset=0x%lx\n", fd, offset); + } + + if ((flags & MAP_ANON) && fd != -1) { + errno = EINVAL; + goto fail; + } + if (flags & MAP_STACK) { + if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) != + (PROT_READ | PROT_WRITE))) { + errno = EINVAL; + goto fail; + } + } + if ((flags & MAP_GUARD) && (prot != PROT_NONE || fd != -1 || + offset != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | + /* MAP_PREFAULT | */ /* MAP_PREFAULT not in mman.h */ + MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0)) { + errno = EINVAL; + goto fail; } -#endif if (offset & ~TARGET_PAGE_MASK) { errno = EINVAL; goto fail; } + if (len == 0) { + errno = EINVAL; + goto fail; + } + + /* Check for overflows */ len = TARGET_PAGE_ALIGN(len); - if (len == 0) - goto the_end; + if (len == 0) { + errno = ENOMEM; + goto fail; + } + real_start = start & qemu_host_page_mask; + host_offset = offset & qemu_host_page_mask; + /* + * If the user is asking for the kernel to find a location, do that + * before we truncate the length for mapping files below. + */ if (!(flags & MAP_FIXED)) { - abi_ulong mmap_start; - void *p; - host_offset = offset & qemu_host_page_mask; host_len = len + offset - host_offset; host_len = HOST_PAGE_ALIGN(host_len); - mmap_start = mmap_find_vma(real_start, host_len); - if (mmap_start == (abi_ulong)-1) { + if ((flags & MAP_ALIGNMENT_MASK) != 0) + start = mmap_find_vma_aligned(real_start, host_len, + (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT); + else + start = mmap_find_vma(real_start, host_len); + if (start == (abi_ulong)-1) { errno = ENOMEM; goto fail; } - /* Note: we prefer to control the mapping address. It is - especially important if qemu_host_page_size > - qemu_real_host_page_size */ - p = mmap(g2h_untagged(mmap_start), - host_len, prot, flags | MAP_FIXED, fd, host_offset); + } + + /* + * When mapping files into a memory area larger than the file, accesses + * to pages beyond the file size will cause a SIGBUS. + * + * For example, if mmaping a file of 100 bytes on a host with 4K pages + * emulating a target with 8K pages, the target expects to be able to + * access the first 8K. But the host will trap us on any access beyond + * 4K. + * + * When emulating a target with a larger page-size than the hosts, we + * may need to truncate file maps at EOF and add extra anonymous pages + * up to the targets page boundary. + */ + + if ((qemu_real_host_page_size < qemu_host_page_size) && fd != -1) { + struct stat sb; + + if (fstat(fd, &sb) == -1) { + goto fail; + } + + /* Are we trying to create a map beyond EOF?. */ + if (offset + len > sb.st_size) { + /* + * If so, truncate the file map at eof aligned with + * the hosts real pagesize. Additional anonymous maps + * will be created beyond EOF. + */ + len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); + } + } + + if (!(flags & MAP_FIXED)) { + unsigned long host_start; + void *p; + + host_len = len + offset - host_offset; + host_len = HOST_PAGE_ALIGN(host_len); + + /* + * Note: we prefer to control the mapping address. It is + * especially important if qemu_host_page_size > + * qemu_real_host_page_size + */ + p = mmap(g2h_untagged(start), host_len, prot, + flags | MAP_FIXED | ((fd != -1) ? MAP_ANON : 0), -1, 0); if (p == MAP_FAILED) goto fail; /* update start so that it points to the file position at 'offset' */ host_start = (unsigned long)p; - if (!(flags & MAP_ANON)) + if (fd != -1) { + p = mmap(g2h_untagged(start), len, prot, + flags | MAP_FIXED, fd, host_offset); + if (p == MAP_FAILED) { + munmap(g2h_untagged(start), host_len); + goto fail; + } host_start += offset - host_offset; + } start = h2g(host_start); } else { - int flg; - target_ulong addr; - if (start & ~TARGET_PAGE_MASK) { errno = EINVAL; goto fail; @@ -321,20 +569,26 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, end = start + len; real_end = HOST_PAGE_ALIGN(end); - for(addr = real_start; addr < real_end; addr += TARGET_PAGE_SIZE) { - flg = page_get_flags(addr); - if (flg & PAGE_RESERVED) { - errno = ENXIO; - goto fail; - } + /* + * Test if requested memory area fits target address space + * It can fail only on 64-bit host with 32-bit target. + * On any other target/host host mmap() handles this error correctly. + */ + if (!guest_range_valid_untagged(start, len)) { + errno = EINVAL; + goto fail; } - /* worst case: we cannot map the file because the offset is not - aligned, so we read it */ - if (!(flags & MAP_ANON) && + /* + * worst case: we cannot map the file because the offset is not + * aligned, so we read it + */ + if (fd != -1 && (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { - /* msync() won't work here, so we return an error if write is - possible while it is a shared mapping */ + /* + * msync() won't work here, so we return an error if write is + * possible while it is a shared mapping + */ if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED && (prot & PROT_WRITE)) { errno = EINVAL; @@ -345,17 +599,22 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, -1, 0); if (retaddr == -1) goto fail; - pread(fd, g2h_untagged(start), len, offset); + if (pread(fd, g2h_untagged(start), len, offset) == -1) { + goto fail; + } if (!(prot & PROT_WRITE)) { ret = target_mprotect(start, len, prot); - if (ret != 0) { - start = ret; - goto the_end; - } + assert(ret == 0); } goto the_end; } + /* Reject the mapping if any page within the range is mapped */ + if ((flags & MAP_EXCL) && page_check_range(start, len, 0) < 0) { + errno = EINVAL; + goto fail; + } + /* handle the start of the mapping */ if (start > real_start) { if (real_end == real_start + qemu_host_page_size) { @@ -375,7 +634,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, /* handle the end of the mapping */ if (end < real_end) { ret = mmap_frag(real_end - qemu_host_page_size, - real_end - qemu_host_page_size, real_end, + real_end - qemu_host_page_size, end, prot, flags, fd, offset + real_end - qemu_host_page_size - start); if (ret == -1) @@ -401,10 +660,11 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, page_set_flags(start, start + len, prot | PAGE_VALID); the_end: #ifdef DEBUG_MMAP - printf("ret=0x" TARGET_FMT_lx "\n", start); + printf("ret=0x" TARGET_ABI_FMT_lx "\n", start); page_dump(stdout); printf("\n"); #endif + tb_invalidate_phys_range(start, start + len); mmap_unlock(); return start; fail: @@ -412,13 +672,57 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, return -1; } +static void mmap_reserve(abi_ulong start, abi_ulong size) +{ + abi_ulong real_start; + abi_ulong real_end; + abi_ulong addr; + abi_ulong end; + int prot; + + real_start = start & qemu_host_page_mask; + real_end = HOST_PAGE_ALIGN(start + size); + end = start + size; + if (start > real_start) { + /* handle host page containing start */ + prot = 0; + for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { + prot |= page_get_flags(addr); + } + if (real_end == real_start + qemu_host_page_size) { + for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { + prot |= page_get_flags(addr); + } + end = real_end; + } + if (prot != 0) { + real_start += qemu_host_page_size; + } + } + if (end < real_end) { + prot = 0; + for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { + prot |= page_get_flags(addr); + } + if (prot != 0) { + real_end -= qemu_host_page_size; + } + } + if (real_start != real_end) { + mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + } +} + int target_munmap(abi_ulong start, abi_ulong len) { abi_ulong end, real_start, real_end, addr; int prot, ret; #ifdef DEBUG_MMAP - printf("munmap: start=0x%lx len=0x%lx\n", start, len); + printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x" + TARGET_ABI_FMT_lx "\n", + start, len); #endif if (start & ~TARGET_PAGE_MASK) return -EINVAL; @@ -433,11 +737,11 @@ int target_munmap(abi_ulong start, abi_ulong len) if (start > real_start) { /* handle host page containing start */ prot = 0; - for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { + for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { prot |= page_get_flags(addr); } if (real_end == real_start + qemu_host_page_size) { - for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { + for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { prot |= page_get_flags(addr); } end = real_end; @@ -447,7 +751,7 @@ int target_munmap(abi_ulong start, abi_ulong len) } if (end < real_end) { prot = 0; - for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { + for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { prot |= page_get_flags(addr); } if (prot != 0) @@ -457,11 +761,17 @@ int target_munmap(abi_ulong start, abi_ulong len) ret = 0; /* unmap what we can */ if (real_start < real_end) { - ret = munmap(g2h_untagged(real_start), real_end - real_start); + if (reserved_va) { + mmap_reserve(real_start, real_end - real_start); + } else { + ret = munmap(g2h_untagged(real_start), real_end - real_start); + } } - if (ret == 0) + if (ret == 0) { page_set_flags(start, start + len, 0); + tb_invalidate_phys_range(start, start + len); + } mmap_unlock(); return ret; } diff --git a/bsd-user/netbsd/host-os.h b/bsd-user/netbsd/host-os.h new file mode 100644 index 00000000000..c0be51a7ef4 --- /dev/null +++ b/bsd-user/netbsd/host-os.h @@ -0,0 +1,25 @@ +/* + * NetBSD host dependent code and definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _HOST_OS_H_ +#define _HOST_OS_H_ + +#define HOST_DEFAULT_BSD_TYPE target_netbsd + +#endif /*!_HOST_OS_H_ */ diff --git a/bsd-user/netbsd/target_os_elf.h b/bsd-user/netbsd/target_os_elf.h new file mode 100644 index 00000000000..21b475f458c --- /dev/null +++ b/bsd-user/netbsd/target_os_elf.h @@ -0,0 +1,146 @@ +/* + * netbsd ELF definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_OS_ELF_H_ +#define _TARGET_OS_ELF_H_ + +#include "target_arch_elf.h" +#include "elf.h" + +/* this flag is uneffective under linux too, should be deleted */ +#ifndef MAP_DENYWRITE +#define MAP_DENYWRITE 0 +#endif + +/* should probably go in elf.h */ +#ifndef ELIBBAD +#define ELIBBAD 80 +#endif + +#ifndef ELF_PLATFORM +#define ELF_PLATFORM (NULL) +#endif + +#ifndef ELF_HWCAP +#define ELF_HWCAP 0 +#endif + +#ifdef TARGET_ABI32 +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 +#undef bswaptls +#define bswaptls(ptr) bswap32s(ptr) +#endif + +/* max code+data+bss space allocated to elf interpreter */ +#define INTERP_MAP_SIZE (32 * 1024 * 1024) + +/* max code+data+bss+brk space allocated to ET_DYN executables */ +#define ET_DYN_MAP_SIZE (128 * 1024 * 1024) + +/* Necessary parameters */ +#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE +#define TARGET_ELF_PAGESTART(_v) ((_v) & \ + ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE - 1)) +#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1)) + +#define DLINFO_ITEMS 12 + +static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc, + abi_ulong stringp, + struct elfhdr *exec, + abi_ulong load_addr, + abi_ulong load_bias, + abi_ulong interp_load_addr, + struct image_info *info) +{ + abi_ulong sp; + int size; + abi_ulong u_platform; + const char *k_platform; + const int n = sizeof(elf_addr_t); + + sp = p; + u_platform = 0; + k_platform = ELF_PLATFORM; + if (k_platform) { + size_t len = strlen(k_platform) + 1; + sp -= (len + n - 1) & ~(n - 1); + u_platform = sp; + /* FIXME - check return value of memcpy_to_target() for failure */ + memcpy_to_target(sp, k_platform, len); + } + /* + * Force 16 byte _final_ alignment here for generality. + */ + sp = sp & ~(abi_ulong)15; + size = (DLINFO_ITEMS + 1) * 2; + if (k_platform) { + size += 2; + } +#ifdef DLINFO_ARCH_ITEMS + size += DLINFO_ARCH_ITEMS * 2; +#endif + size += envc + argc + 2; + size += 1; /* argc itself */ + size *= n; + if (size & 15) { + sp -= 16 - (size & 15); + } + + /* + * NetBSD defines elf_addr_t as Elf32_Off / Elf64_Off + */ +#define NEW_AUX_ENT(id, val) do { \ + sp -= n; put_user_ual(val, sp); \ + sp -= n; put_user_ual(id, sp); \ + } while (0) + + NEW_AUX_ENT(AT_NULL, 0); + + /* There must be exactly DLINFO_ITEMS entries here. */ + NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff)); + NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof(struct elf_phdr))); + NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); + NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); + NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr)); + NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); + NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry); + NEW_AUX_ENT(AT_UID, (abi_ulong)getuid()); + NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid()); + NEW_AUX_ENT(AT_GID, (abi_ulong)getgid()); + NEW_AUX_ENT(AT_EGID, (abi_ulong)getegid()); + NEW_AUX_ENT(AT_HWCAP, (abi_ulong)ELF_HWCAP); + NEW_AUX_ENT(AT_CLKTCK, (abi_ulong)sysconf(_SC_CLK_TCK)); + if (k_platform) { + NEW_AUX_ENT(AT_PLATFORM, u_platform); + } +#ifdef ARCH_DLINFO + /* + * ARCH_DLINFO must come last so platform specific code can enforce + * special alignment requirements on the AUXV if necessary (eg. PPC). + */ + ARCH_DLINFO; +#endif +#undef NEW_AUX_ENT + + sp = loader_build_argptr(envc, argc, sp, stringp); + return sp; +} + +#endif /* _TARGET_OS_ELF_H_ */ diff --git a/bsd-user/netbsd/target_os_siginfo.h b/bsd-user/netbsd/target_os_siginfo.h new file mode 100644 index 00000000000..667c19cc7ce --- /dev/null +++ b/bsd-user/netbsd/target_os_siginfo.h @@ -0,0 +1,82 @@ +#ifndef _TARGET_OS_SIGINFO_H_ +#define _TARGET_OS_SIGINFO_H_ + +#define TARGET_NSIG 32 /* counting 0; could be 33 (mask is 1-32) */ +#define TARGET_NSIG_BPW (sizeof(uint32_t) * 8) +#define TARGET_NSIG_WORDS (TARGET_NSIG / TARGET_NSIG_BPW) + +/* this struct defines a stack used during syscall handling */ +typedef struct target_sigaltstack { + abi_long ss_sp; + abi_ulong ss_size; + abi_long ss_flags; +} target_stack_t; + +typedef struct { + uint32_t __bits[TARGET_NSIG_WORDS]; +} target_sigset_t + +struct target_sigaction { + abi_ulong _sa_handler; + int32_t sa_flags; + target_sigset_t sa_mask; +}; + +/* Compare to sys/siginfo.h */ +typedef union target_sigval { + int sival_int; + abi_ulong sival_ptr; +} target_sigval_t; + +struct target_ksiginfo { + int32_t _signo; + int32_t _code; + int32_t _errno; +#if TARGET_ABI_BITS == 64 + int32_t _pad; +#endif + union { + struct { + int32_t _pid; + int32_t _uid; + target_sigval_t _value; + } _rt; + + struct { + int32_t _pid; + int32_t _uid; + int32_t _struct; + /* clock_t _utime; */ + /* clock_t _stime; */ + } _child; + + struct { + abi_ulong _addr; + int32_t _trap; + } _fault; + + struct { + long _band; + int _fd; + } _poll; + } _reason; +}; + +typedef union target_siginfo { + int8_t si_pad[128]; + struct target_ksiginfo _info; +} target_siginfo_t; + +#define target_si_signo _info._signo +#define target_si_code _info._code +#define target_si_errno _info._errno +#define target_si_addr _info._reason._fault._addr + +#define TARGET_SEGV_MAPERR 1 +#define TARGET_SEGV_ACCERR 2 + +#define TARGET_TRAP_BRKPT 1 +#define TARGET_TRAP_TRACE 2 + + +#endif /* ! _TARGET_OS_SIGINFO_H_ */ diff --git a/bsd-user/netbsd/target_os_signal.h b/bsd-user/netbsd/target_os_signal.h new file mode 100644 index 00000000000..a373922f7e8 --- /dev/null +++ b/bsd-user/netbsd/target_os_signal.h @@ -0,0 +1,69 @@ +#ifndef _TARGET_OS_SIGNAL_H_ +#define _TARGET_OS_SIGNAL_H_ + +#include "target_os_siginfo.h" +#include "target_arch_signal.h" + +#define TARGET_SIGHUP 1 /* hangup */ +#define TARGET_SIGINT 2 /* interrupt */ +#define TARGET_SIGQUIT 3 /* quit */ +#define TARGET_SIGILL 4 /* illegal instruction (not reset when caught) */ +#define TARGET_SIGTRAP 5 /* trace trap (not reset when caught) */ +#define TARGET_SIGABRT 6 /* abort() */ +#define TARGET_SIGIOT SIGABRT /* compatibility */ +#define TARGET_SIGEMT 7 /* EMT instruction */ +#define TARGET_SIGFPE 8 /* floating point exception */ +#define TARGET_SIGKILL 9 /* kill (cannot be caught or ignored) */ +#define TARGET_SIGBUS 10 /* bus error */ +#define TARGET_SIGSEGV 11 /* segmentation violation */ +#define TARGET_SIGSYS 12 /* bad argument to system call */ +#define TARGET_SIGPIPE 13 /* write on a pipe with no one to read it */ +#define TARGET_SIGALRM 14 /* alarm clock */ +#define TARGET_SIGTERM 15 /* software termination signal from kill */ +#define TARGET_SIGURG 16 /* urgent condition on IO channel */ +#define TARGET_SIGSTOP 17 /* sendable stop signal not from tty */ +#define TARGET_SIGTSTP 18 /* stop signal from tty */ +#define TARGET_SIGCONT 19 /* continue a stopped process */ +#define TARGET_SIGCHLD 20 /* to parent on child stop or exit */ +#define TARGET_SIGTTIN 21 /* to readers pgrp upon background tty read */ +#define TARGET_SIGTTOU 22 /* like TTIN for out if (tp->t_local<OSTOP) */ +#define TARGET_SIGIO 23 /* input/output possible signal */ +#define TARGET_SIGXCPU 24 /* exceeded CPU time limit */ +#define TARGET_SIGXFSZ 25 /* exceeded file size limit */ +#define TARGET_SIGVTALRM 26 /* virtual time alarm */ +#define TARGET_SIGPROF 27 /* profiling time alarm */ +#define TARGET_SIGWINCH 28 /* window size changes */ +#define TARGET_SIGINFO 29 /* information request */ +#define TARGET_SIGUSR1 30 /* user defined signal 1 */ +#define TARGET_SIGUSR2 31 /* user defined signal 2 */ + +/* + * Language spec says we must list exactly one parameter, even though we + * actually supply three. Ugh! + */ +#define TARGET_SIG_DFL ((void (*)(int))0) +#define TARGET_SIG_IGN ((void (*)(int))1) +#define TARGET_SIG_ERR ((void (*)(int))-1) + +#define TARGET_SA_ONSTACK 0x0001 /* take signal on signal stack */ +#define TARGET_SA_RESTART 0x0002 /* restart system on signal return */ +#define TARGET_SA_RESETHAND 0x0004 /* reset to SIG_DFL when taking signal */ +#define TARGET_SA_NODEFER 0x0010 /* don't mask the signal we're delivering */ +#define TARGET_SA_NOCLDWAIT 0x0020 /* don't create zombies (assign to pid 1) */ +#define TARGET_SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */ +#define TARGET_SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */ +#define TARGET_SA_SIGINFO 0x0040 /* generate siginfo_t */ + +/* + * Flags for sigprocmask: + */ +#define TARGET_SIG_BLOCK 1 /* block specified signal set */ +#define TARGET_SIG_UNBLOCK 2 /* unblock specified signal set */ +#define TARGET_SIG_SETMASK 3 /* set specified signal set */ + +#define TARGET_BADSIG SIG_ERR + +#define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */ +#define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack */ + +#endif /* !_TARGET_OS_SIGNAL_H_ */ diff --git a/bsd-user/netbsd/target_os_stack.h b/bsd-user/netbsd/target_os_stack.h new file mode 100644 index 00000000000..503279c1a90 --- /dev/null +++ b/bsd-user/netbsd/target_os_stack.h @@ -0,0 +1,56 @@ +/* + * NetBSD setup_initial_stack() implementation. + * + * Copyright (c) 2013-14 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_OS_STACK_H_ +#define _TARGET_OS_STACK_H_ + +#include "target_arch_sigtramp.h" + +static inline int setup_initial_stack(struct bsd_binprm *bprm, abi_ulong *p, + abi_ulong *stringp) +{ + int i; + abi_ulong stack_base; + + stack_base = (target_stkbas + target_stksiz) - + MAX_ARG_PAGES * TARGET_PAGE_SIZE; + if (p) { + *p = stack_base; + } + if (stringp) { + *stringp = stack_base; + } + + for (i = 0; i < MAX_ARG_PAGES; i++) { + if (bprm->page[i]) { + info->rss++; + if (!memcpy_to_target(stack_base, bprm->page[i], + TARGET_PAGE_SIZE)) { + errno = EFAULT; + return -1; + } + g_free(bprm->page[i]); + } + stack_base += TARGET_PAGE_SIZE; + } + + return 0; +} + +#endif /* !_TARGET_OS_STACK_H_ */ diff --git a/bsd-user/netbsd/target_os_thread.h b/bsd-user/netbsd/target_os_thread.h new file mode 100644 index 00000000000..904dd1bf782 --- /dev/null +++ b/bsd-user/netbsd/target_os_thread.h @@ -0,0 +1,25 @@ +/* + * NetBSD thread dependent code and definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_OS_THREAD_H_ +#define _TARGET_OS_THREAD_H_ + +#include "target_arch_thread.h" + +#endif /* !_TARGET_OS_THREAD_H_ */ diff --git a/bsd-user/openbsd/host-os.h b/bsd-user/openbsd/host-os.h new file mode 100644 index 00000000000..eb8fdf15679 --- /dev/null +++ b/bsd-user/openbsd/host-os.h @@ -0,0 +1,25 @@ +/* + * OpenBSD host dependent code and definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _HOST_OS_H_ +#define _HOST_OS_H_ + +#define HOST_DEFAULT_BSD_TYPE target_openbsd + +#endif /*!_HOST_OS_H_ */ diff --git a/bsd-user/openbsd/target_os_elf.h b/bsd-user/openbsd/target_os_elf.h new file mode 100644 index 00000000000..a5cfcd3aff8 --- /dev/null +++ b/bsd-user/openbsd/target_os_elf.h @@ -0,0 +1,146 @@ +/* + * openbsd ELF definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_OS_ELF_H_ +#define _TARGET_OS_ELF_H_ + +#include "target_arch_elf.h" +#include "elf.h" + +/* this flag is uneffective under linux too, should be deleted */ +#ifndef MAP_DENYWRITE +#define MAP_DENYWRITE 0 +#endif + +/* should probably go in elf.h */ +#ifndef ELIBBAD +#define ELIBBAD 80 +#endif + +#ifndef ELF_PLATFORM +#define ELF_PLATFORM (NULL) +#endif + +#ifndef ELF_HWCAP +#define ELF_HWCAP 0 +#endif + +#ifdef TARGET_ABI32 +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 +#undef bswaptls +#define bswaptls(ptr) bswap32s(ptr) +#endif + +/* max code+data+bss space allocated to elf interpreter */ +#define INTERP_MAP_SIZE (32 * 1024 * 1024) + +/* max code+data+bss+brk space allocated to ET_DYN executables */ +#define ET_DYN_MAP_SIZE (128 * 1024 * 1024) + +/* Necessary parameters */ +#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE +#define TARGET_ELF_PAGESTART(_v) ((_v) & \ + ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE - 1)) +#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1)) + +#define DLINFO_ITEMS 12 + +static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc, + abi_ulong stringp, + struct elfhdr *exec, + abi_ulong load_addr, + abi_ulong load_bias, + abi_ulong interp_load_addr, + struct image_info *info) +{ + abi_ulong sp; + int size; + abi_ulong u_platform; + const char *k_platform; + const int n = sizeof(elf_addr_t); + + sp = p; + u_platform = 0; + k_platform = ELF_PLATFORM; + if (k_platform) { + size_t len = strlen(k_platform) + 1; + sp -= (len + n - 1) & ~(n - 1); + u_platform = sp; + /* FIXME - check return value of memcpy_to_target() for failure */ + memcpy_to_target(sp, k_platform, len); + } + /* + * Force 16 byte _final_ alignment here for generality. + */ + sp = sp & ~(abi_ulong)15; + size = (DLINFO_ITEMS + 1) * 2; + if (k_platform) { + size += 2; + } +#ifdef DLINFO_ARCH_ITEMS + size += DLINFO_ARCH_ITEMS * 2; +#endif + size += envc + argc + 2; + size += 1; /* argc itself */ + size *= n; + if (size & 15) { + sp -= 16 - (size & 15); + } + + /* + * OpenBSD defines elf_addr_t as Elf32_Off / Elf64_Off + */ +#define NEW_AUX_ENT(id, val) do { \ + sp -= n; put_user_ual(val, sp); \ + sp -= n; put_user_ual(id, sp); \ + } while (0) + + NEW_AUX_ENT(AT_NULL, 0); + + /* There must be exactly DLINFO_ITEMS entries here. */ + NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff)); + NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof(struct elf_phdr))); + NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); + NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); + NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr)); + NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); + NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry); + NEW_AUX_ENT(AT_UID, (abi_ulong)getuid()); + NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid()); + NEW_AUX_ENT(AT_GID, (abi_ulong)getgid()); + NEW_AUX_ENT(AT_EGID, (abi_ulong)getegid()); + NEW_AUX_ENT(AT_HWCAP, (abi_ulong)ELF_HWCAP); + NEW_AUX_ENT(AT_CLKTCK, (abi_ulong)sysconf(_SC_CLK_TCK)); + if (k_platform) { + NEW_AUX_ENT(AT_PLATFORM, u_platform); + } +#ifdef ARCH_DLINFO + /* + * ARCH_DLINFO must come last so platform specific code can enforce + * special alignment requirements on the AUXV if necessary (eg. PPC). + */ + ARCH_DLINFO; +#endif +#undef NEW_AUX_ENT + + sp = loader_build_argptr(envc, argc, sp, stringp); + return sp; +} + +#endif /* _TARGET_OS_ELF_H_ */ diff --git a/bsd-user/openbsd/target_os_siginfo.h b/bsd-user/openbsd/target_os_siginfo.h new file mode 100644 index 00000000000..baf646a5ab3 --- /dev/null +++ b/bsd-user/openbsd/target_os_siginfo.h @@ -0,0 +1,82 @@ +#ifndef _TARGET_OS_SIGINFO_H_ +#define _TARGET_OS_SIGINFO_H_ + +#define TARGET_NSIG 32 /* counting 0; could be 33 (mask is 1-32) */ +#define TARGET_NSIG_BPW (sizeof(uint32_t) * 8) +#define TARGET_NSIG_WORDS (TARGET_NSIG / TARGET_NSIG_BPW) + +/* this struct defines a stack used during syscall handling */ +typedef struct target_sigaltstack { + abi_long ss_sp; + abi_ulong ss_size; + abi_long ss_flags; +} target_stack_t; + +typedef struct { + uint32_t __bits[TARGET_NSIG_WORDS]; +} target_sigset_t + +struct target_sigaction { + abi_ulong _sa_handler; + int32_t sa_flags; + target_sigset_t sa_mask; +}; + +/* Compare to sys/siginfo.h */ +typedef union target_sigval { + int sival_int; + abi_ulong sival_ptr; +} target_sigval_t; + +struct target_ksiginfo { + int32_t _signo; + int32_t _code; + int32_t _errno; +#if TARGET_ABI_BITS == 64 + int32_t _pad; +#endif + union { + struct { + int32_t _pid; + int32_t _uid; + target_sigval_t _value; + } _rt; + + struct { + int32_t _pid; + int32_t _uid; + int32_t _struct; + /* clock_t _utime; */ + /* clock_t _stime; */ + } _child; + + struct { + abi_ulong _addr; + int32_t _trap; + } _fault; + + struct { + long _band; + int _fd; + } _poll; + } _reason; +}; + +typedef union target_siginfo { + int8_t si_pad[128]; + struct target_ksiginfo _info; +} target_siginfo_t; + +#define target_si_signo _info._signo +#define target_si_code _info._code +#define target_si_errno _info._errno +#define target_si_addr _info._reason._fault._addr + +#define TARGET_SEGV_MAPERR 1 +#define TARGET_SEGV_ACCERR 2 + +#define TARGET_TRAP_BRKPT 1 +#define TARGET_TRAP_TRACE 2 + + +#endif /* ! _TARGET_OS_SIGINFO_H_ */ diff --git a/bsd-user/openbsd/target_os_signal.h b/bsd-user/openbsd/target_os_signal.h new file mode 100644 index 00000000000..a373922f7e8 --- /dev/null +++ b/bsd-user/openbsd/target_os_signal.h @@ -0,0 +1,69 @@ +#ifndef _TARGET_OS_SIGNAL_H_ +#define _TARGET_OS_SIGNAL_H_ + +#include "target_os_siginfo.h" +#include "target_arch_signal.h" + +#define TARGET_SIGHUP 1 /* hangup */ +#define TARGET_SIGINT 2 /* interrupt */ +#define TARGET_SIGQUIT 3 /* quit */ +#define TARGET_SIGILL 4 /* illegal instruction (not reset when caught) */ +#define TARGET_SIGTRAP 5 /* trace trap (not reset when caught) */ +#define TARGET_SIGABRT 6 /* abort() */ +#define TARGET_SIGIOT SIGABRT /* compatibility */ +#define TARGET_SIGEMT 7 /* EMT instruction */ +#define TARGET_SIGFPE 8 /* floating point exception */ +#define TARGET_SIGKILL 9 /* kill (cannot be caught or ignored) */ +#define TARGET_SIGBUS 10 /* bus error */ +#define TARGET_SIGSEGV 11 /* segmentation violation */ +#define TARGET_SIGSYS 12 /* bad argument to system call */ +#define TARGET_SIGPIPE 13 /* write on a pipe with no one to read it */ +#define TARGET_SIGALRM 14 /* alarm clock */ +#define TARGET_SIGTERM 15 /* software termination signal from kill */ +#define TARGET_SIGURG 16 /* urgent condition on IO channel */ +#define TARGET_SIGSTOP 17 /* sendable stop signal not from tty */ +#define TARGET_SIGTSTP 18 /* stop signal from tty */ +#define TARGET_SIGCONT 19 /* continue a stopped process */ +#define TARGET_SIGCHLD 20 /* to parent on child stop or exit */ +#define TARGET_SIGTTIN 21 /* to readers pgrp upon background tty read */ +#define TARGET_SIGTTOU 22 /* like TTIN for out if (tp->t_local<OSTOP) */ +#define TARGET_SIGIO 23 /* input/output possible signal */ +#define TARGET_SIGXCPU 24 /* exceeded CPU time limit */ +#define TARGET_SIGXFSZ 25 /* exceeded file size limit */ +#define TARGET_SIGVTALRM 26 /* virtual time alarm */ +#define TARGET_SIGPROF 27 /* profiling time alarm */ +#define TARGET_SIGWINCH 28 /* window size changes */ +#define TARGET_SIGINFO 29 /* information request */ +#define TARGET_SIGUSR1 30 /* user defined signal 1 */ +#define TARGET_SIGUSR2 31 /* user defined signal 2 */ + +/* + * Language spec says we must list exactly one parameter, even though we + * actually supply three. Ugh! + */ +#define TARGET_SIG_DFL ((void (*)(int))0) +#define TARGET_SIG_IGN ((void (*)(int))1) +#define TARGET_SIG_ERR ((void (*)(int))-1) + +#define TARGET_SA_ONSTACK 0x0001 /* take signal on signal stack */ +#define TARGET_SA_RESTART 0x0002 /* restart system on signal return */ +#define TARGET_SA_RESETHAND 0x0004 /* reset to SIG_DFL when taking signal */ +#define TARGET_SA_NODEFER 0x0010 /* don't mask the signal we're delivering */ +#define TARGET_SA_NOCLDWAIT 0x0020 /* don't create zombies (assign to pid 1) */ +#define TARGET_SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */ +#define TARGET_SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */ +#define TARGET_SA_SIGINFO 0x0040 /* generate siginfo_t */ + +/* + * Flags for sigprocmask: + */ +#define TARGET_SIG_BLOCK 1 /* block specified signal set */ +#define TARGET_SIG_UNBLOCK 2 /* unblock specified signal set */ +#define TARGET_SIG_SETMASK 3 /* set specified signal set */ + +#define TARGET_BADSIG SIG_ERR + +#define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */ +#define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack */ + +#endif /* !_TARGET_OS_SIGNAL_H_ */ diff --git a/bsd-user/openbsd/target_os_stack.h b/bsd-user/openbsd/target_os_stack.h new file mode 100644 index 00000000000..4b37955d3b1 --- /dev/null +++ b/bsd-user/openbsd/target_os_stack.h @@ -0,0 +1,56 @@ +/* + * OpenBSD setup_initial_stack() implementation. + * + * Copyright (c) 2013-14 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_OS_STACK_H_ +#define _TARGET_OS_STACK_H_ + +#include "target_arch_sigtramp.h" + +static inline int setup_initial_stack(struct bsd_binprm *bprm, abi_ulong *p, + abi_ulong *stringp) +{ + int i; + abi_ulong stack_base; + + stack_base = (target_stkbas + target_stksiz) - + MAX_ARG_PAGES * TARGET_PAGE_SIZE; + if (p) { + *p = stack_base; + } + if (stringp) { + *stringp = stack_base; + } + + for (i = 0; i < MAX_ARG_PAGES; i++) { + if (bprm->page[i]) { + info->rss++; + if (!memcpy_to_target(stack_base, bprm->page[i], + TARGET_PAGE_SIZE)) { + errno = EFAULT; + return -1; + } + g_free(bprm->page[i]); + } + stack_base += TARGET_PAGE_SIZE; + } + + return 0; +} + +#endif /* !_TARGET_OS_STACK_H_ */ diff --git a/bsd-user/openbsd/target_os_thread.h b/bsd-user/openbsd/target_os_thread.h new file mode 100644 index 00000000000..01ed0d9fc86 --- /dev/null +++ b/bsd-user/openbsd/target_os_thread.h @@ -0,0 +1,25 @@ +/* + * OpenBSD thread dependent code and definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_OS_THREAD_H_ +#define _TARGET_OS_THREAD_H_ + +#include "target_arch_thread.h" + +#endif /* !_TARGET_OS_THREAD_H_ */ diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index d2bcaab7413..1b3b974afe9 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -17,16 +17,18 @@ #ifndef QEMU_H #define QEMU_H - +#include "qemu/osdep.h" #include "cpu.h" +#include "qemu/units.h" #include "exec/cpu_ldst.h" +#include "exec/exec-all.h" #undef DEBUG_REMAP -#ifdef DEBUG_REMAP -#endif /* DEBUG_REMAP */ #include "exec/user/abitypes.h" +extern char **environ; + enum BSDType { target_freebsd, target_netbsd, @@ -34,22 +36,21 @@ enum BSDType { }; extern enum BSDType bsd_type; +#include "exec/user/thunk.h" +#include "target_arch.h" #include "syscall_defs.h" #include "target_syscall.h" -#include "target_signal.h" +#include "target_os_vmparam.h" +#include "target_os_signal.h" #include "exec/gdbstub.h" -#if defined(CONFIG_USE_NPTL) -#define THREAD __thread -#else -#define THREAD -#endif - -/* This struct is used to hold certain information about the image. - * Basically, it replicates in user space what would be certain - * task_struct fields in the kernel +/* + * This struct is used to hold certain information about the image. Basically, + * it replicates in user space what would be certain task_struct fields in the + * kernel */ struct image_info { + abi_ulong load_bias; abi_ulong load_addr; abi_ulong start_code; abi_ulong end_code; @@ -64,78 +65,90 @@ struct image_info { abi_ulong entry; abi_ulong code_offset; abi_ulong data_offset; - int personality; + abi_ulong arg_start; + abi_ulong arg_end; + uint32_t elf_flags; }; #define MAX_SIGQUEUE_SIZE 1024 -struct sigqueue { - struct sigqueue *next; - //target_siginfo_t info; +struct qemu_sigqueue { + struct qemu_sigqueue *next; + target_siginfo_t info; }; struct emulated_sigtable { int pending; /* true if signal is pending */ - struct sigqueue *first; - struct sigqueue info; /* in order to always have memory for the - first signal, we put it here */ + struct qemu_sigqueue *first; + struct qemu_sigqueue info; /* Put first signal info here */ }; -/* NOTE: we force a big alignment so that the stack stored after is - aligned too */ +/* + * NOTE: we force a big alignment so that the stack stored after is aligned too + */ typedef struct TaskState { pid_t ts_tid; /* tid (or pid) of this task */ struct TaskState *next; - int used; /* non zero if used */ + struct bsd_binprm *bprm; struct image_info *info; struct emulated_sigtable sigtab[TARGET_NSIG]; - struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */ - struct sigqueue *first_free; /* first free siginfo queue entry */ + struct qemu_sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */ + struct qemu_sigqueue *first_free; /* first free siginfo queue entry */ int signal_pending; /* non zero if a signal may be pending */ uint8_t stack[]; } __attribute__((aligned(16))) TaskState; void init_task_state(TaskState *ts); +void stop_all_tasks(void); extern const char *qemu_uname_release; -extern unsigned long mmap_min_addr; -/* ??? See if we can avoid exposing so much of the loader internals. */ /* - * MAX_ARG_PAGES defines the number of pages allocated for arguments - * and envelope for the new program. 32 should suffice, this gives - * a maximum env+arg of 128kB w/4KB pages! + * TARGET_ARG_MAX defines the number of bytes allocated for arguments + * and envelope for the new program. 256k should suffice for a reasonable + * maxiumum env+arg in 32-bit environments, bump it up to 512k for !ILP32 + * platforms. */ -#define MAX_ARG_PAGES 32 +#if TARGET_ABI_BITS > 32 +#define TARGET_ARG_MAX (512 * KiB) +#else +#define TARGET_ARG_MAX (256 * KiB) +#endif +#define MAX_ARG_PAGES (TARGET_ARG_MAX / TARGET_PAGE_SIZE) /* * This structure is used to hold the arguments that are * used when loading binaries. */ -struct linux_binprm { +struct bsd_binprm { char buf[128]; void *page[MAX_ARG_PAGES]; abi_ulong p; + abi_ulong stringp; int fd; int e_uid, e_gid; int argc, envc; char **argv; char **envp; - char * filename; /* Name of binary */ + char *filename; /* (Given) Name of binary */ + char *fullpath; /* Full path of binary */ + int (*core_dump)(int, CPUArchState *); }; void do_init_thread(struct target_pt_regs *regs, struct image_info *infop); abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp, - abi_ulong stringp, int push_ptr); -int loader_exec(const char * filename, char ** argv, char ** envp, - struct target_pt_regs * regs, struct image_info *infop); + abi_ulong stringp); +int loader_exec(const char *filename, char **argv, char **envp, + struct target_pt_regs *regs, struct image_info *infop, + struct bsd_binprm *bprm); -int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, - struct image_info * info); -int load_flt_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, - struct image_info * info); +int load_elf_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs, + struct image_info *info); +int load_flt_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs, + struct image_info *info); +int is_target_elf_binary(int fd); abi_long memcpy_to_target(abi_ulong dest, const void *src, unsigned long len); @@ -153,7 +166,7 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1, abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6); void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2); -extern THREAD CPUState *thread_cpu; +extern __thread CPUState *thread_cpu; void cpu_loop(CPUArchState *env); char *target_strerror(int err); int get_osversion(void); @@ -193,28 +206,41 @@ extern int do_strace; /* signal.c */ void process_pending_signals(CPUArchState *cpu_env); void signal_init(void); -//int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info); -//void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info); -//void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo); long do_sigreturn(CPUArchState *env); long do_rt_sigreturn(CPUArchState *env); +void queue_signal(CPUArchState *env, int sig, target_siginfo_t *info); abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp); /* mmap.c */ int target_mprotect(abi_ulong start, abi_ulong len, int prot); abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, - int flags, int fd, abi_ulong offset); + int flags, int fd, off_t offset); int target_munmap(abi_ulong start, abi_ulong len); abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, abi_ulong new_size, unsigned long flags, abi_ulong new_addr); int target_msync(abi_ulong start, abi_ulong len, int flags); extern unsigned long last_brk; +extern abi_ulong mmap_next_start; +abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size); void mmap_fork_start(void); void mmap_fork_end(int child); /* main.c */ -extern unsigned long x86_stack_size; +extern char qemu_proc_pathname[]; +extern unsigned long target_maxtsiz; +extern unsigned long target_dfldsiz; +extern unsigned long target_maxdsiz; +extern unsigned long target_dflssiz; +extern unsigned long target_maxssiz; +extern unsigned long target_sgrowsiz; + +/* syscall.c */ +abi_long get_errno(abi_long ret); +bool is_error(abi_long ret); + +/* os-sys.c */ +abi_long do_freebsd_sysarch(void *cpu_env, abi_long arg1, abi_long arg2); /* user access */ @@ -226,14 +252,16 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size) return page_check_range((target_ulong)addr, size, type) == 0; } -/* NOTE __get_user and __put_user use host pointers and don't check access. */ -/* These are usually used to access struct data members once the - * struct has been locked - usually with lock_user_struct(). +/* + * NOTE __get_user and __put_user use host pointers and don't check access. + * + * These are usually used to access struct data members once the struct has been + * locked - usually with lock_user_struct(). */ #define __put_user(x, hptr)\ ({\ int size = sizeof(*hptr);\ - switch(size) {\ + switch (size) {\ case 1:\ *(uint8_t *)(hptr) = (uint8_t)(typeof(*hptr))(x);\ break;\ @@ -248,14 +276,14 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size) break;\ default:\ abort();\ - }\ + } \ 0;\ }) #define __get_user(x, hptr) \ ({\ int size = sizeof(*hptr);\ - switch(size) {\ + switch (size) {\ case 1:\ x = (typeof(*hptr))*(uint8_t *)(hptr);\ break;\ @@ -269,24 +297,26 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size) x = (typeof(*hptr))tswap64(*(uint64_t *)(hptr));\ break;\ default:\ - /* avoid warning */\ x = 0;\ abort();\ - }\ + } \ 0;\ }) -/* put_user()/get_user() take a guest address and check access */ -/* These are usually used to access an atomic data type, such as an int, - * that has been passed by address. These internally perform locking - * and unlocking on the data type. +/* + * put_user()/get_user() take a guest address and check access + * + * These are usually used to access an atomic data type, such as an int, that + * has been passed by address. These internally perform locking and unlocking + * on the data type. */ #define put_user(x, gaddr, target_type) \ ({ \ abi_ulong __gaddr = (gaddr); \ target_type *__hptr; \ abi_long __ret; \ - if ((__hptr = lock_user(VERIFY_WRITE, __gaddr, sizeof(target_type), 0))) { \ + __hptr = lock_user(VERIFY_WRITE, __gaddr, sizeof(target_type), 0); \ + if (__hptr) { \ __ret = __put_user((x), __hptr); \ unlock_user(__hptr, __gaddr, sizeof(target_type)); \ } else \ @@ -299,11 +329,11 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size) abi_ulong __gaddr = (gaddr); \ target_type *__hptr; \ abi_long __ret; \ - if ((__hptr = lock_user(VERIFY_READ, __gaddr, sizeof(target_type), 1))) { \ + __hptr = lock_user(VERIFY_READ, __gaddr, sizeof(target_type), 1); \ + if (__hptr) { \ __ret = __get_user((x), __hptr); \ unlock_user(__hptr, __gaddr, 0); \ } else { \ - /* avoid warning */ \ (x) = 0; \ __ret = -TARGET_EFAULT; \ } \ @@ -332,33 +362,41 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size) #define get_user_u8(x, gaddr) get_user((x), (gaddr), uint8_t) #define get_user_s8(x, gaddr) get_user((x), (gaddr), int8_t) -/* copy_from_user() and copy_to_user() are usually used to copy data +/* + * copy_from_user() and copy_to_user() are usually used to copy data * buffers between the target and host. These internally perform * locking/unlocking of the memory. */ abi_long copy_from_user(void *hptr, abi_ulong gaddr, size_t len); abi_long copy_to_user(abi_ulong gaddr, void *hptr, size_t len); -/* Functions for accessing guest memory. The tget and tput functions - read/write single values, byteswapping as necessary. The lock_user function - gets a pointer to a contiguous area of guest memory, but does not perform - any byteswapping. lock_user may return either a pointer to the guest - memory, or a temporary buffer. */ +/* + * Functions for accessing guest memory. The tget and tput functions + * read/write single values, byteswapping as necessary. The lock_user function + * gets a pointer to a contiguous area of guest memory, but does not perform + * any byteswapping. lock_user may return either a pointer to the guest + * memory, or a temporary buffer. + */ -/* Lock an area of guest memory into the host. If copy is true then the - host area will have the same contents as the guest. */ -static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy) +/* + * Lock an area of guest memory into the host. If copy is true then the + * host area will have the same contents as the guest. + */ +static inline void *lock_user(int type, abi_ulong guest_addr, long len, + int copy) { - if (!access_ok(type, guest_addr, len)) + if (!access_ok(type, guest_addr, len)) { return NULL; + } #ifdef DEBUG_REMAP { void *addr; addr = g_malloc(len); - if (copy) + if (copy) { memcpy(addr, g2h_untagged(guest_addr), len); - else + } else { memset(addr, 0, len); + } return addr; } #else @@ -366,26 +404,32 @@ static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy #endif } -/* Unlock an area of guest memory. The first LEN bytes must be - flushed back to guest memory. host_ptr = NULL is explicitly - allowed and does nothing. */ +/* + * Unlock an area of guest memory. The first LEN bytes must be flushed back to + * guest memory. host_ptr = NULL is explicitly allowed and does nothing. + */ static inline void unlock_user(void *host_ptr, abi_ulong guest_addr, long len) { #ifdef DEBUG_REMAP - if (!host_ptr) + if (!host_ptr) { return; - if (host_ptr == g2h_untagged(guest_addr)) + } + if (host_ptr == g2h_untagged(guest_addr)) { return; - if (len > 0) + } + if (len > 0) { memcpy(g2h_untagged(guest_addr), host_ptr, len); + } g_free(host_ptr); #endif } -/* Return the length of a string in target memory or -TARGET_EFAULT if - access error. */ +/* + * Return the length of a string in target memory or -TARGET_EFAULT if access + * error. + */ abi_long target_strlen(abi_ulong gaddr); /* Like lock_user but for null terminated strings. */ @@ -393,8 +437,9 @@ static inline void *lock_user_string(abi_ulong guest_addr) { abi_long len; len = target_strlen(guest_addr); - if (len < 0) + if (len < 0) { return NULL; + } return lock_user(VERIFY_READ, guest_addr, (long)(len + 1), 1); } @@ -404,8 +449,6 @@ static inline void *lock_user_string(abi_ulong guest_addr) #define unlock_user_struct(host_ptr, guest_addr, copy) \ unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0) -#if defined(CONFIG_USE_NPTL) #include -#endif #endif /* QEMU_H */ diff --git a/bsd-user/signal.c b/bsd-user/signal.c index f6f7aa2427d..05b277c6422 100644 --- a/bsd-user/signal.c +++ b/bsd-user/signal.c @@ -16,10 +16,23 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ -#include "qemu/osdep.h" +#include "qemu/osdep.h" #include "qemu.h" -#include "target_signal.h" + +/* + * Stubbed out routines until we merge signal support from bsd-user + * fork. + */ + +/* + * Queue a signal so that it will be send to the virtual CPU as soon as + * possible. + */ +void queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) +{ + qemu_log_mask(LOG_UNIMP, "No signal queueing, dropping signal %d\n", sig); +} void signal_init(void) { @@ -28,3 +41,19 @@ void signal_init(void) void process_pending_signals(CPUArchState *cpu_env) { } + +void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, bool maperr, uintptr_t ra) +{ + qemu_log_mask(LOG_UNIMP, "No signal support for SIGSEGV\n"); + /* unreachable */ + abort(); +} + +void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, uintptr_t ra) +{ + qemu_log_mask(LOG_UNIMP, "No signal support for SIGBUS\n"); + /* unreachable */ + abort(); +} diff --git a/bsd-user/sparc/target_arch_sysarch.h b/bsd-user/sparc/target_arch_sysarch.h deleted file mode 100644 index d0b85ef6bbb..00000000000 --- a/bsd-user/sparc/target_arch_sysarch.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * SPARC sysarch() system call emulation - * - * Copyright (c) 2013 Stacey D. Son - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef BSD_USER_ARCH_SYSARCH_H_ -#define BSD_USER_ARCH_SYSARCH_H_ - -#include "target_syscall.h" - -static inline abi_long do_freebsd_arch_sysarch(void *env, int op, - abi_ulong parms) -{ - int ret = 0; - - switch (op) { - case TARGET_SPARC_SIGTRAMP_INSTALL: - /* XXX not currently handled */ - case TARGET_SPARC_UTRAP_INSTALL: - /* XXX not currently handled */ - default: - ret = -TARGET_EINVAL; - break; - } - - return ret; -} - -static inline void do_freebsd_arch_print_sysarch( - const struct syscallname *name, abi_long arg1, abi_long arg2, - abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6) -{ - - gemu_log("%s(%d, " TARGET_ABI_FMT_lx ", " TARGET_ABI_FMT_lx ", " - TARGET_ABI_FMT_lx ")", name->name, (int)arg1, arg2, arg3, arg4); -} - -#endif /*!BSD_USER_ARCH_SYSARCH_H_ */ diff --git a/bsd-user/sparc/target_signal.h b/bsd-user/sparc/target_signal.h deleted file mode 100644 index 5b2abba40f2..00000000000 --- a/bsd-user/sparc/target_signal.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef TARGET_SIGNAL_H -#define TARGET_SIGNAL_H - -#include "cpu.h" - -/* this struct defines a stack used during syscall handling */ - -typedef struct target_sigaltstack { - abi_ulong ss_sp; - abi_long ss_flags; - abi_ulong ss_size; -} target_stack_t; - - -#ifndef UREG_I6 -#define UREG_I6 6 -#endif -#ifndef UREG_FP -#define UREG_FP UREG_I6 -#endif - -static inline abi_ulong get_sp_from_cpustate(CPUSPARCState *state) -{ - return state->regwptr[UREG_FP]; -} - -#endif /* TARGET_SIGNAL_H */ diff --git a/bsd-user/sparc/target_syscall.h b/bsd-user/sparc/target_syscall.h deleted file mode 100644 index 151284754be..00000000000 --- a/bsd-user/sparc/target_syscall.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * sparc dependent system call definitions - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ -#ifndef TARGET_SYSCALL_H -#define TARGET_SYSCALL_H - -struct target_pt_regs { - abi_ulong psr; - abi_ulong pc; - abi_ulong npc; - abi_ulong y; - abi_ulong u_regs[16]; -}; - -#define UNAME_MACHINE "sun4" -#define TARGET_HW_MACHINE "sparc" -#define TARGET_HW_MACHINE_ARCH "sparc" - -#define TARGET_SPARC_UTRAP_INSTALL 1 -#define TARGET_SPARC_SIGTRAMP_INSTALL 2 - -#endif /* TARGET_SYSCALL_H */ diff --git a/bsd-user/sparc64/target_arch_sysarch.h b/bsd-user/sparc64/target_arch_sysarch.h deleted file mode 100644 index e6f17c15045..00000000000 --- a/bsd-user/sparc64/target_arch_sysarch.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * SPARC64 sysarch() system call emulation - * - * Copyright (c) 2013 Stacey D. Son - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef BSD_USER_ARCH_SYSARCH_H_ -#define BSD_USER_ARCH_SYSARCH_H_ - -#include "target_syscall.h" - -static inline abi_long do_freebsd_arch_sysarch(void *env, int op, - abi_ulong parms) -{ - int ret = 0; - - switch (op) { - case TARGET_SPARC_SIGTRAMP_INSTALL: - /* XXX not currently handled */ - case TARGET_SPARC_UTRAP_INSTALL: - /* XXX not currently handled */ - default: - ret = -TARGET_EINVAL; - break; - } - - return ret; -} - -static inline void do_freebsd_arch_print_sysarch( - const struct syscallname *name, abi_long arg1, abi_long arg2, - abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6) -{ - - gemu_log("%s(%d, " TARGET_ABI_FMT_lx ", " TARGET_ABI_FMT_lx ", " - TARGET_ABI_FMT_lx ")", name->name, (int)arg1, arg2, arg3, arg4); -} - -#endif /*!BSD_USER_ARCH_SYSARCH_H_ */ diff --git a/bsd-user/sparc64/target_signal.h b/bsd-user/sparc64/target_signal.h deleted file mode 100644 index 5b2abba40f2..00000000000 --- a/bsd-user/sparc64/target_signal.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef TARGET_SIGNAL_H -#define TARGET_SIGNAL_H - -#include "cpu.h" - -/* this struct defines a stack used during syscall handling */ - -typedef struct target_sigaltstack { - abi_ulong ss_sp; - abi_long ss_flags; - abi_ulong ss_size; -} target_stack_t; - - -#ifndef UREG_I6 -#define UREG_I6 6 -#endif -#ifndef UREG_FP -#define UREG_FP UREG_I6 -#endif - -static inline abi_ulong get_sp_from_cpustate(CPUSPARCState *state) -{ - return state->regwptr[UREG_FP]; -} - -#endif /* TARGET_SIGNAL_H */ diff --git a/bsd-user/sparc64/target_syscall.h b/bsd-user/sparc64/target_syscall.h deleted file mode 100644 index b7d986a76d4..00000000000 --- a/bsd-user/sparc64/target_syscall.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * sparc64 dependent system call definitions - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ -#ifndef TARGET_SYSCALL_H -#define TARGET_SYSCALL_H - -struct target_pt_regs { - abi_ulong u_regs[16]; - abi_ulong tstate; - abi_ulong pc; - abi_ulong npc; - abi_ulong y; - abi_ulong fprs; -}; - -#define UNAME_MACHINE "sun4u" -#define TARGET_HW_MACHINE "sparc" -#define TARGET_HW_MACHINE_ARCH "sparc64" - -#define TARGET_SPARC_UTRAP_INSTALL 1 -#define TARGET_SPARC_SIGTRAMP_INSTALL 2 - -#endif /* TARGET_SYSCALL_H */ diff --git a/bsd-user/strace.c b/bsd-user/strace.c index 2c3b59caf06..be40b8a20cf 100644 --- a/bsd-user/strace.c +++ b/bsd-user/strace.c @@ -128,14 +128,6 @@ static void print_syscall_ret_addr(const struct syscallname *name, abi_long ret) } } -#if 0 /* currently unused */ -static void -print_syscall_ret_raw(struct syscallname *name, abi_long ret) -{ - gemu_log(" = 0x" TARGET_ABI_FMT_lx "\n", ret); -} -#endif - /* * An array of all of the syscalls we know about */ diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c index adc3d21b542..d3322760f43 100644 --- a/bsd-user/syscall.c +++ b/bsd-user/syscall.c @@ -33,18 +33,18 @@ static abi_ulong target_brk; static abi_ulong target_original_brk; -static inline abi_long get_errno(abi_long ret) +abi_long get_errno(abi_long ret) { - if (ret == -1) + if (ret == -1) { /* XXX need to translate host -> target errnos here */ return -(errno); - else - return ret; + } + return ret; } #define target_to_host_bitmask(x, tbl) (x) -static inline int is_error(abi_long ret) +bool is_error(abi_long ret) { return (abi_ulong)ret >= (abi_ulong)(-4096); } @@ -88,67 +88,6 @@ static abi_long do_obreak(abi_ulong new_brk) return 0; } -#if defined(TARGET_I386) -static abi_long do_freebsd_sysarch(CPUX86State *env, int op, abi_ulong parms) -{ - abi_long ret = 0; - abi_ulong val; - int idx; - - switch(op) { -#ifdef TARGET_ABI32 - case TARGET_FREEBSD_I386_SET_GSBASE: - case TARGET_FREEBSD_I386_SET_FSBASE: - if (op == TARGET_FREEBSD_I386_SET_GSBASE) -#else - case TARGET_FREEBSD_AMD64_SET_GSBASE: - case TARGET_FREEBSD_AMD64_SET_FSBASE: - if (op == TARGET_FREEBSD_AMD64_SET_GSBASE) -#endif - idx = R_GS; - else - idx = R_FS; - if (get_user(val, parms, abi_ulong)) - return -TARGET_EFAULT; - cpu_x86_load_seg(env, idx, 0); - env->segs[idx].base = val; - break; -#ifdef TARGET_ABI32 - case TARGET_FREEBSD_I386_GET_GSBASE: - case TARGET_FREEBSD_I386_GET_FSBASE: - if (op == TARGET_FREEBSD_I386_GET_GSBASE) -#else - case TARGET_FREEBSD_AMD64_GET_GSBASE: - case TARGET_FREEBSD_AMD64_GET_FSBASE: - if (op == TARGET_FREEBSD_AMD64_GET_GSBASE) -#endif - idx = R_GS; - else - idx = R_FS; - val = env->segs[idx].base; - if (put_user(val, parms, abi_ulong)) - return -TARGET_EFAULT; - break; - /* XXX handle the others... */ - default: - ret = -TARGET_EINVAL; - break; - } - return ret; -} -#endif - -#ifdef TARGET_SPARC -static abi_long do_freebsd_sysarch(void *env, int op, abi_ulong parms) -{ - /* XXX handle - * TARGET_FREEBSD_SPARC_UTRAP_INSTALL, - * TARGET_FREEBSD_SPARC_SIGTRAMP_INSTALL - */ - return -TARGET_EINVAL; -} -#endif - #ifdef __FreeBSD__ /* * XXX this uses the undocumented oidfmt interface to find the kind of @@ -199,6 +138,7 @@ static int sysctl_oldcvt(void *holdp, size_t holdlen, uint32_t kind) #else case CTLTYPE_LONG: *(uint64_t *)holdp = tswap64(*(long *)holdp); + break; case CTLTYPE_ULONG: *(uint64_t *)holdp = tswap64(*(unsigned long *)holdp); break; @@ -271,7 +211,7 @@ static abi_long lock_iovec(int type, struct iovec *vec, abi_ulong target_addr, target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1); if (!target_vec) return -TARGET_EFAULT; - for(i = 0;i < count; i++) { + for (i = 0;i < count; i++) { base = tswapl(target_vec[i].iov_base); vec[i].iov_len = tswapl(target_vec[i].iov_len); if (vec[i].iov_len != 0) { @@ -297,7 +237,7 @@ static abi_long unlock_iovec(struct iovec *vec, abi_ulong target_addr, target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1); if (!target_vec) return -TARGET_EFAULT; - for(i = 0;i < count; i++) { + for (i = 0;i < count; i++) { if (target_vec[i].iov_base) { base = tswapl(target_vec[i].iov_base); unlock_user(vec[i].iov_base, base, copy ? vec[i].iov_len : 0); @@ -325,16 +265,16 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1, #endif record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0); - if(do_strace) + if (do_strace) print_freebsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6); - switch(num) { + switch (num) { case TARGET_FREEBSD_NR_exit: #ifdef CONFIG_GPROF _mcleanup(); #endif gdb_exit(arg1); - qemu_plugin_atexit_cb(); + qemu_plugin_user_exit(); /* XXX: should free thread stack and CPU env */ _exit(arg1); ret = 0; /* avoid warning */ @@ -427,16 +367,16 @@ abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1, record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0); - if(do_strace) + if (do_strace) print_netbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6); - switch(num) { + switch (num) { case TARGET_NETBSD_NR_exit: #ifdef CONFIG_GPROF _mcleanup(); #endif gdb_exit(arg1); - qemu_plugin_atexit_cb(); + qemu_plugin_user_exit(); /* XXX: should free thread stack and CPU env */ _exit(arg1); ret = 0; /* avoid warning */ @@ -506,16 +446,16 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1, record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0); - if(do_strace) + if (do_strace) print_openbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6); - switch(num) { + switch (num) { case TARGET_OPENBSD_NR_exit: #ifdef CONFIG_GPROF _mcleanup(); #endif gdb_exit(arg1); - qemu_plugin_atexit_cb(); + qemu_plugin_user_exit(); /* XXX: should free thread stack and CPU env */ _exit(arg1); ret = 0; /* avoid warning */ diff --git a/bsd-user/syscall_defs.h b/bsd-user/syscall_defs.h index 207ddeecbfa..04a1a886d7b 100644 --- a/bsd-user/syscall_defs.h +++ b/bsd-user/syscall_defs.h @@ -1,114 +1,181 @@ -/* $OpenBSD: signal.h,v 1.19 2006/01/08 14:20:16 millert Exp $ */ -/* $NetBSD: signal.h,v 1.21 1996/02/09 18:25:32 christos Exp $ */ +/* + * System call related declarations + * + * Copyright (c) 2013-15 Stacey D. Son (sson at FreeBSD) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _SYSCALL_DEFS_H_ +#define _SYSCALL_DEFS_H_ + +#include + +#include "errno_defs.h" + +#include "freebsd/syscall_nr.h" +#include "netbsd/syscall_nr.h" +#include "openbsd/syscall_nr.h" + +/* + * machine/_types.h + * or x86/_types.h + */ /* - * Copyright (c) 1982, 1986, 1989, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. + * time_t seems to be very inconsistly defined for the different *BSD's... * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * FreeBSD uses a 64bits time_t except on i386 + * so we have to add a special case here. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * On NetBSD time_t is always defined as an int64_t. On OpenBSD time_t + * is always defined as an int. * - * @(#)signal.h 8.2 (Berkeley) 1/21/94 */ +#if (!defined(TARGET_I386)) +typedef int64_t target_freebsd_time_t; +#else +typedef int32_t target_freebsd_time_t; +#endif -#define TARGET_NSIG 32 /* counting 0; could be 33 (mask is 1-32) */ - -#define TARGET_SIGHUP 1 /* hangup */ -#define TARGET_SIGINT 2 /* interrupt */ -#define TARGET_SIGQUIT 3 /* quit */ -#define TARGET_SIGILL 4 /* illegal instruction (not reset when caught) */ -#define TARGET_SIGTRAP 5 /* trace trap (not reset when caught) */ -#define TARGET_SIGABRT 6 /* abort() */ -#define TARGET_SIGIOT SIGABRT /* compatibility */ -#define TARGET_SIGEMT 7 /* EMT instruction */ -#define TARGET_SIGFPE 8 /* floating point exception */ -#define TARGET_SIGKILL 9 /* kill (cannot be caught or ignored) */ -#define TARGET_SIGBUS 10 /* bus error */ -#define TARGET_SIGSEGV 11 /* segmentation violation */ -#define TARGET_SIGSYS 12 /* bad argument to system call */ -#define TARGET_SIGPIPE 13 /* write on a pipe with no one to read it */ -#define TARGET_SIGALRM 14 /* alarm clock */ -#define TARGET_SIGTERM 15 /* software termination signal from kill */ -#define TARGET_SIGURG 16 /* urgent condition on IO channel */ -#define TARGET_SIGSTOP 17 /* sendable stop signal not from tty */ -#define TARGET_SIGTSTP 18 /* stop signal from tty */ -#define TARGET_SIGCONT 19 /* continue a stopped process */ -#define TARGET_SIGCHLD 20 /* to parent on child stop or exit */ -#define TARGET_SIGTTIN 21 /* to readers pgrp upon background tty read */ -#define TARGET_SIGTTOU 22 /* like TTIN for output if (tp->t_local<OSTOP) */ -#define TARGET_SIGIO 23 /* input/output possible signal */ -#define TARGET_SIGXCPU 24 /* exceeded CPU time limit */ -#define TARGET_SIGXFSZ 25 /* exceeded file size limit */ -#define TARGET_SIGVTALRM 26 /* virtual time alarm */ -#define TARGET_SIGPROF 27 /* profiling time alarm */ -#define TARGET_SIGWINCH 28 /* window size changes */ -#define TARGET_SIGINFO 29 /* information request */ -#define TARGET_SIGUSR1 30 /* user defined signal 1 */ -#define TARGET_SIGUSR2 31 /* user defined signal 2 */ +struct target_iovec { + abi_long iov_base; /* Starting address */ + abi_long iov_len; /* Number of bytes */ +}; /* - * Language spec says we must list exactly one parameter, even though we - * actually supply three. Ugh! + * sys/mman.h */ -#define TARGET_SIG_DFL (void (*)(int))0 -#define TARGET_SIG_IGN (void (*)(int))1 -#define TARGET_SIG_ERR (void (*)(int))-1 - -#define TARGET_SA_ONSTACK 0x0001 /* take signal on signal stack */ -#define TARGET_SA_RESTART 0x0002 /* restart system on signal return */ -#define TARGET_SA_RESETHAND 0x0004 /* reset to SIG_DFL when taking signal */ -#define TARGET_SA_NODEFER 0x0010 /* don't mask the signal we're delivering */ -#define TARGET_SA_NOCLDWAIT 0x0020 /* don't create zombies (assign to pid 1) */ -#define TARGET_SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */ -#define TARGET_SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */ -#define TARGET_SA_SIGINFO 0x0040 /* generate siginfo_t */ +#define TARGET_FREEBSD_MAP_RESERVED0080 0x0080 /* previously misimplemented */ + /* MAP_INHERIT */ +#define TARGET_FREEBSD_MAP_RESERVED0100 0x0100 /* previously unimplemented */ + /* MAP_NOEXTEND */ +#define TARGET_FREEBSD_MAP_STACK 0x0400 /* region grows down, like a */ + /* stack */ +#define TARGET_FREEBSD_MAP_NOSYNC 0x0800 /* page to but do not sync */ + /* underlying file */ + +#define TARGET_FREEBSD_MAP_FLAGMASK 0x1ff7 + +#define TARGET_NETBSD_MAP_INHERIT 0x0080 /* region is retained after */ + /* exec */ +#define TARGET_NETBSD_MAP_TRYFIXED 0x0400 /* attempt hint address, even */ + /* within break */ +#define TARGET_NETBSD_MAP_WIRED 0x0800 /* mlock() mapping when it is */ + /* established */ + +#define TARGET_NETBSD_MAP_STACK 0x2000 /* allocated from memory, */ + /* swap space (stack) */ + +#define TARGET_NETBSD_MAP_FLAGMASK 0x3ff7 + +#define TARGET_OPENBSD_MAP_INHERIT 0x0080 /* region is retained after */ + /* exec */ +#define TARGET_OPENBSD_MAP_NOEXTEND 0x0100 /* for MAP_FILE, don't change */ + /* file size */ +#define TARGET_OPENBSD_MAP_TRYFIXED 0x0400 /* attempt hint address, */ + /* even within heap */ + +#define TARGET_OPENBSD_MAP_FLAGMASK 0x17f7 + +/* XXX */ +#define TARGET_BSD_MAP_FLAGMASK 0x3ff7 /* - * Flags for sigprocmask: + * sys/time.h + * sys/timex.h */ -#define TARGET_SIG_BLOCK 1 /* block specified signal set */ -#define TARGET_SIG_UNBLOCK 2 /* unblock specified signal set */ -#define TARGET_SIG_SETMASK 3 /* set specified signal set */ -#define TARGET_BADSIG SIG_ERR +typedef abi_long target_freebsd_suseconds_t; -#define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */ -#define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack */ +/* compare to sys/timespec.h */ +struct target_freebsd_timespec { + target_freebsd_time_t tv_sec; /* seconds */ + abi_long tv_nsec; /* and nanoseconds */ +#if !defined(TARGET_I386) && TARGET_ABI_BITS == 32 + abi_long _pad; +#endif +}; -#include "errno_defs.h" +#define TARGET_CPUCLOCK_WHICH_PID 0 +#define TARGET_CPUCLOCK_WHICH_TID 1 -#include "freebsd/syscall_nr.h" -#include "netbsd/syscall_nr.h" -#include "openbsd/syscall_nr.h" +/* sys/umtx.h */ +struct target_freebsd__umtx_time { + struct target_freebsd_timespec _timeout; + uint32_t _flags; + uint32_t _clockid; +}; -struct target_iovec { - abi_long iov_base; /* Starting address */ - abi_long iov_len; /* Number of bytes */ +struct target_freebsd_timeval { + target_freebsd_time_t tv_sec; /* seconds */ + target_freebsd_suseconds_t tv_usec;/* and microseconds */ +#if !defined(TARGET_I386) && TARGET_ABI_BITS == 32 + abi_long _pad; +#endif +}; + +/* + * sys/resource.h + */ +#if defined(__FreeBSD__) +#define TARGET_RLIM_INFINITY RLIM_INFINITY +#else +#define TARGET_RLIM_INFINITY ((abi_ulong)-1) +#endif + +#define TARGET_RLIMIT_CPU 0 +#define TARGET_RLIMIT_FSIZE 1 +#define TARGET_RLIMIT_DATA 2 +#define TARGET_RLIMIT_STACK 3 +#define TARGET_RLIMIT_CORE 4 +#define TARGET_RLIMIT_RSS 5 +#define TARGET_RLIMIT_MEMLOCK 6 +#define TARGET_RLIMIT_NPROC 7 +#define TARGET_RLIMIT_NOFILE 8 +#define TARGET_RLIMIT_SBSIZE 9 +#define TARGET_RLIMIT_AS 10 +#define TARGET_RLIMIT_NPTS 11 +#define TARGET_RLIMIT_SWAP 12 + +struct target_rlimit { + uint64_t rlim_cur; + uint64_t rlim_max; +}; + +struct target_freebsd_rusage { + struct target_freebsd_timeval ru_utime; /* user time used */ + struct target_freebsd_timeval ru_stime; /* system time used */ + abi_long ru_maxrss; /* maximum resident set size */ + abi_long ru_ixrss; /* integral shared memory size */ + abi_long ru_idrss; /* integral unshared data size */ + abi_long ru_isrss; /* integral unshared stack size */ + abi_long ru_minflt; /* page reclaims */ + abi_long ru_majflt; /* page faults */ + abi_long ru_nswap; /* swaps */ + abi_long ru_inblock; /* block input operations */ + abi_long ru_oublock; /* block output operations */ + abi_long ru_msgsnd; /* messages sent */ + abi_long ru_msgrcv; /* messages received */ + abi_long ru_nsignals; /* signals received */ + abi_long ru_nvcsw; /* voluntary context switches */ + abi_long ru_nivcsw; /* involuntary context switches */ +}; + +struct target_freebsd__wrusage { + struct target_freebsd_rusage wru_self; + struct target_freebsd_rusage wru_children; }; +#endif /* ! _SYSCALL_DEFS_H_ */ diff --git a/bsd-user/uaccess.c b/bsd-user/uaccess.c index 91e2067933d..89163257f4a 100644 --- a/bsd-user/uaccess.c +++ b/bsd-user/uaccess.c @@ -46,7 +46,7 @@ abi_long target_strlen(abi_ulong guest_addr1) int max_len, len; guest_addr = guest_addr1; - for(;;) { + for (;;) { max_len = TARGET_PAGE_SIZE - (guest_addr & ~TARGET_PAGE_MASK); ptr = lock_user(VERIFY_READ, guest_addr, max_len, 1); if (!ptr) diff --git a/bsd-user/x86_64/target_arch.h b/bsd-user/x86_64/target_arch.h new file mode 100644 index 00000000000..e558e1b956e --- /dev/null +++ b/bsd-user/x86_64/target_arch.h @@ -0,0 +1,31 @@ +/* + * Intel x86_64 specific prototypes for bsd-user + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_H_ +#define _TARGET_ARCH_H_ + +/* target_arch_cpu.c */ +void bsd_x86_64_write_dt(void *ptr, unsigned long addr, unsigned long limit, + int flags); +void bsd_x86_64_set_idt(int n, unsigned int dpl); +void bsd_x86_64_set_idt_base(uint64_t base); + +#define target_cpu_set_tls(env, newtls) + +#endif /* !_TARGET_ARCH_H_ */ diff --git a/bsd-user/x86_64/target_arch_cpu.c b/bsd-user/x86_64/target_arch_cpu.c new file mode 100644 index 00000000000..be7bd107200 --- /dev/null +++ b/bsd-user/x86_64/target_arch_cpu.c @@ -0,0 +1,71 @@ +/* + * x86_64 cpu related code + * + * Copyright (c) 2013 Stacey Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include + +#include "qemu/osdep.h" +#include "cpu.h" +#include "qemu.h" +#include "qemu/timer.h" + +#include "target_arch.h" + +static uint64_t *idt_table; + +uint64_t cpu_get_tsc(CPUX86State *env) +{ + return cpu_get_host_ticks(); +} + +void bsd_x86_64_write_dt(void *ptr, unsigned long addr, + unsigned long limit, int flags) +{ + unsigned int e1, e2; + uint32_t *p; + e1 = (addr << 16) | (limit & 0xffff); + e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); + e2 |= flags; + p = ptr; + p[0] = tswap32(e1); + p[1] = tswap32(e2); +} + +static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, + uint64_t addr, unsigned int sel) +{ + uint32_t *p, e1, e2; + e1 = (addr & 0xffff) | (sel << 16); + e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); + p = ptr; + p[0] = tswap32(e1); + p[1] = tswap32(e2); + p[2] = tswap32(addr >> 32); + p[3] = 0; +} + +/* only dpl matters as we do only user space emulation */ +void bsd_x86_64_set_idt(int n, unsigned int dpl) +{ + set_gate64(idt_table + n * 2, 0, dpl, 0, 0); +} + +void bsd_x86_64_set_idt_base(uint64_t base) +{ + idt_table = g2h_untagged(base); +} diff --git a/bsd-user/x86_64/target_arch_cpu.h b/bsd-user/x86_64/target_arch_cpu.h new file mode 100644 index 00000000000..5172b230f09 --- /dev/null +++ b/bsd-user/x86_64/target_arch_cpu.h @@ -0,0 +1,245 @@ +/* + * x86_64 cpu init and loop + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_CPU_H_ +#define _TARGET_ARCH_CPU_H_ + +#include "target_arch.h" + +#define TARGET_DEFAULT_CPU_MODEL "qemu64" + +static inline void target_cpu_init(CPUX86State *env, + struct target_pt_regs *regs) +{ + uint64_t *gdt_table; + + env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; + env->hflags |= HF_PE_MASK | HF_CPL_MASK; + if (env->features[FEAT_1_EDX] & CPUID_SSE) { + env->cr[4] |= CR4_OSFXSR_MASK; + env->hflags |= HF_OSFXSR_MASK; + } + + /* enable 64 bit mode if possible */ + if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { + fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); + exit(1); + } + env->cr[4] |= CR4_PAE_MASK; + env->efer |= MSR_EFER_LMA | MSR_EFER_LME; + env->hflags |= HF_LMA_MASK; + + /* flags setup : we activate the IRQs by default as in user mode */ + env->eflags |= IF_MASK; + + /* register setup */ + env->regs[R_EAX] = regs->rax; + env->regs[R_EBX] = regs->rbx; + env->regs[R_ECX] = regs->rcx; + env->regs[R_EDX] = regs->rdx; + env->regs[R_ESI] = regs->rsi; + env->regs[R_EDI] = regs->rdi; + env->regs[R_EBP] = regs->rbp; + env->regs[R_ESP] = regs->rsp; + env->eip = regs->rip; + + /* interrupt setup */ + env->idt.limit = 511; + + env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + bsd_x86_64_set_idt_base(env->idt.base); + bsd_x86_64_set_idt(0, 0); + bsd_x86_64_set_idt(1, 0); + bsd_x86_64_set_idt(2, 0); + bsd_x86_64_set_idt(3, 3); + bsd_x86_64_set_idt(4, 3); + bsd_x86_64_set_idt(5, 0); + bsd_x86_64_set_idt(6, 0); + bsd_x86_64_set_idt(7, 0); + bsd_x86_64_set_idt(8, 0); + bsd_x86_64_set_idt(9, 0); + bsd_x86_64_set_idt(10, 0); + bsd_x86_64_set_idt(11, 0); + bsd_x86_64_set_idt(12, 0); + bsd_x86_64_set_idt(13, 0); + bsd_x86_64_set_idt(14, 0); + bsd_x86_64_set_idt(15, 0); + bsd_x86_64_set_idt(16, 0); + bsd_x86_64_set_idt(17, 0); + bsd_x86_64_set_idt(18, 0); + bsd_x86_64_set_idt(19, 0); + bsd_x86_64_set_idt(0x80, 3); + + /* segment setup */ + env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; + gdt_table = g2h_untagged(env->gdt.base); + + /* 64 bit code segment */ + bsd_x86_64_write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | DESC_L_MASK + | (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); + + bsd_x86_64_write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | + (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); + + cpu_x86_load_seg(env, R_CS, __USER_CS); + cpu_x86_load_seg(env, R_SS, __USER_DS); + cpu_x86_load_seg(env, R_DS, 0); + cpu_x86_load_seg(env, R_ES, 0); + cpu_x86_load_seg(env, R_FS, 0); + cpu_x86_load_seg(env, R_GS, 0); +} + +static inline void target_cpu_loop(CPUX86State *env) +{ + CPUState *cs = env_cpu(env); + int trapnr; + abi_ulong pc; + /* target_siginfo_t info; */ + + for (;;) { + cpu_exec_start(cs); + trapnr = cpu_exec(cs); + cpu_exec_end(cs); + process_queued_cpu_work(cs); + + switch (trapnr) { + case 0x80: + /* syscall from int $0x80 */ + if (bsd_type == target_freebsd) { + abi_ulong params = (abi_ulong) env->regs[R_ESP] + + sizeof(int32_t); + int32_t syscall_nr = env->regs[R_EAX]; + int32_t arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8; + + if (syscall_nr == TARGET_FREEBSD_NR_syscall) { + get_user_s32(syscall_nr, params); + params += sizeof(int32_t); + } else if (syscall_nr == TARGET_FREEBSD_NR___syscall) { + get_user_s32(syscall_nr, params); + params += sizeof(int64_t); + } + get_user_s32(arg1, params); + params += sizeof(int32_t); + get_user_s32(arg2, params); + params += sizeof(int32_t); + get_user_s32(arg3, params); + params += sizeof(int32_t); + get_user_s32(arg4, params); + params += sizeof(int32_t); + get_user_s32(arg5, params); + params += sizeof(int32_t); + get_user_s32(arg6, params); + params += sizeof(int32_t); + get_user_s32(arg7, params); + params += sizeof(int32_t); + get_user_s32(arg8, params); + env->regs[R_EAX] = do_freebsd_syscall(env, + syscall_nr, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } else { /* if (bsd_type == target_openbsd) */ + env->regs[R_EAX] = do_openbsd_syscall(env, + env->regs[R_EAX], + env->regs[R_EBX], + env->regs[R_ECX], + env->regs[R_EDX], + env->regs[R_ESI], + env->regs[R_EDI], + env->regs[R_EBP]); + } + if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) { + env->regs[R_EAX] = -env->regs[R_EAX]; + env->eflags |= CC_C; + } else { + env->eflags &= ~CC_C; + } + break; + + case EXCP_SYSCALL: + /* syscall from syscall instruction */ + if (bsd_type == target_freebsd) { + env->regs[R_EAX] = do_freebsd_syscall(env, + env->regs[R_EAX], + env->regs[R_EDI], + env->regs[R_ESI], + env->regs[R_EDX], + env->regs[R_ECX], + env->regs[8], + env->regs[9], 0, 0); + } else { /* if (bsd_type == target_openbsd) */ + env->regs[R_EAX] = do_openbsd_syscall(env, + env->regs[R_EAX], + env->regs[R_EDI], + env->regs[R_ESI], + env->regs[R_EDX], + env->regs[10], + env->regs[8], + env->regs[9]); + } + env->eip = env->exception_next_eip; + if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) { + env->regs[R_EAX] = -env->regs[R_EAX]; + env->eflags |= CC_C; + } else { + env->eflags &= ~CC_C; + } + break; + + case EXCP_INTERRUPT: + /* just indicate that signals should be handled asap */ + break; + + case EXCP_ATOMIC: + cpu_exec_step_atomic(cs); + break; + + default: + pc = env->segs[R_CS].base + env->eip; + fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - " + "aborting\n", (long)pc, trapnr); + abort(); + } + process_pending_signals(env); + } +} + +static inline void target_cpu_clone_regs(CPUX86State *env, target_ulong newsp) +{ + if (newsp) { + env->regs[R_ESP] = newsp; + } + env->regs[R_EAX] = 0; +} + +static inline void target_cpu_reset(CPUArchState *cpu) +{ + cpu_reset(env_cpu(cpu)); +} + +#endif /* ! _TARGET_ARCH_CPU_H_ */ diff --git a/bsd-user/x86_64/target_arch_elf.h b/bsd-user/x86_64/target_arch_elf.h new file mode 100644 index 00000000000..c2f85539626 --- /dev/null +++ b/bsd-user/x86_64/target_arch_elf.h @@ -0,0 +1,35 @@ +/* + * x86_64 ELF definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_ARCH_ELF_H_ +#define _TARGET_ARCH_ELF_H_ + +#define ELF_START_MMAP 0x2aaaaab000ULL +#define ELF_ET_DYN_LOAD_ADDR 0x01021000 +#define elf_check_arch(x) (((x) == ELF_ARCH)) + +#define ELF_HWCAP 0 /* FreeBSD doesn't do AT_HWCAP{,2} on x86 */ + +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_X86_64 + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +#endif /* _TARGET_ARCH_ELF_H_ */ diff --git a/bsd-user/x86_64/target_arch_reg.h b/bsd-user/x86_64/target_arch_reg.h new file mode 100644 index 00000000000..00e96245178 --- /dev/null +++ b/bsd-user/x86_64/target_arch_reg.h @@ -0,0 +1,92 @@ +/* + * FreeBSD amd64 register structures + * + * Copyright (c) 2015 Stacey Son + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_REG_H_ +#define _TARGET_ARCH_REG_H_ + +/* See sys/amd64/include/reg.h */ +typedef struct target_reg { + uint64_t r_r15; + uint64_t r_r14; + uint64_t r_r13; + uint64_t r_r12; + uint64_t r_r11; + uint64_t r_r10; + uint64_t r_r9; + uint64_t r_r8; + uint64_t r_rdi; + uint64_t r_rsi; + uint64_t r_rbp; + uint64_t r_rbx; + uint64_t r_rdx; + uint64_t r_rcx; + uint64_t r_rax; + uint32_t r_trapno; + uint16_t r_fs; + uint16_t r_gs; + uint32_t r_err; + uint16_t r_es; + uint16_t r_ds; + uint64_t r_rip; + uint64_t r_cs; + uint64_t r_rflags; + uint64_t r_rsp; + uint64_t r_ss; +} target_reg_t; + +typedef struct target_fpreg { + uint64_t fpr_env[4]; + uint8_t fpr_acc[8][16]; + uint8_t fpr_xacc[16][16]; + uint64_t fpr_spare[12]; +} target_fpreg_t; + +static inline void target_copy_regs(target_reg_t *regs, const CPUX86State *env) +{ + + regs->r_r15 = env->regs[15]; + regs->r_r14 = env->regs[14]; + regs->r_r13 = env->regs[13]; + regs->r_r12 = env->regs[12]; + regs->r_r11 = env->regs[11]; + regs->r_r10 = env->regs[10]; + regs->r_r9 = env->regs[9]; + regs->r_r8 = env->regs[8]; + regs->r_rdi = env->regs[R_EDI]; + regs->r_rsi = env->regs[R_ESI]; + regs->r_rbp = env->regs[R_EBP]; + regs->r_rbx = env->regs[R_EBX]; + regs->r_rdx = env->regs[R_EDX]; + regs->r_rcx = env->regs[R_ECX]; + regs->r_rax = env->regs[R_EAX]; + /* regs->r_trapno = env->regs[R_TRAPNO]; XXX */ + regs->r_fs = env->segs[R_FS].selector & 0xffff; + regs->r_gs = env->segs[R_GS].selector & 0xffff; + regs->r_err = env->error_code; /* XXX ? */ + regs->r_es = env->segs[R_ES].selector & 0xffff; + regs->r_ds = env->segs[R_DS].selector & 0xffff; + regs->r_rip = env->eip; + regs->r_cs = env->segs[R_CS].selector & 0xffff; + regs->r_rflags = env->eflags; + regs->r_rsp = env->regs[R_ESP]; + regs->r_ss = env->segs[R_SS].selector & 0xffff; +} + +#endif /* !_TARGET_ARCH_REG_H_ */ diff --git a/bsd-user/x86_64/target_arch_signal.h b/bsd-user/x86_64/target_arch_signal.h new file mode 100644 index 00000000000..4bb753b08bb --- /dev/null +++ b/bsd-user/x86_64/target_arch_signal.h @@ -0,0 +1,92 @@ +/* + * x86_64 signal definitions + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_ARCH_SIGNAL_H_ +#define _TARGET_ARCH_SIGNAL_H_ + +#include "cpu.h" + +/* Size of the signal trampolin code placed on the stack. */ +#define TARGET_SZSIGCODE 0 + +/* compare to x86/include/_limits.h */ +#define TARGET_MINSIGSTKSZ (512 * 4) /* min sig stack size */ +#define TARGET_SIGSTKSZ (MINSIGSTKSZ + 32768) /* recommended size */ + +struct target_sigcontext { + /* to be added */ +}; + +typedef struct target_mcontext { +} target_mcontext_t; + +typedef struct target_ucontext { + target_sigset_t uc_sigmask; + target_mcontext_t uc_mcontext; + abi_ulong uc_link; + target_stack_t uc_stack; + int32_t uc_flags; + int32_t __spare__[4]; +} target_ucontext_t; + +struct target_sigframe { + abi_ulong sf_signum; + abi_ulong sf_siginfo; /* code or pointer to sf_si */ + abi_ulong sf_ucontext; /* points to sf_uc */ + abi_ulong sf_addr; /* undocumented 4th arg */ + target_ucontext_t sf_uc; /* = *sf_uncontext */ + target_siginfo_t sf_si; /* = *sf_siginfo (SA_SIGINFO case)*/ + uint32_t __spare__[2]; +}; + +/* + * Compare to amd64/amd64/machdep.c sendsig() + * Assumes that target stack frame memory is locked. + */ +static inline abi_long set_sigtramp_args(CPUX86State *regs, + int sig, struct target_sigframe *frame, abi_ulong frame_addr, + struct target_sigaction *ka) +{ + /* XXX return -TARGET_EOPNOTSUPP; */ + return 0; +} + +/* Compare to amd64/amd64/machdep.c get_mcontext() */ +static inline abi_long get_mcontext(CPUX86State *regs, + target_mcontext_t *mcp, int flags) +{ + /* XXX */ + return -TARGET_EOPNOTSUPP; +} + +/* Compare to amd64/amd64/machdep.c set_mcontext() */ +static inline abi_long set_mcontext(CPUX86State *regs, + target_mcontext_t *mcp, int srflag) +{ + /* XXX */ + return -TARGET_EOPNOTSUPP; +} + +static inline abi_long get_ucontext_sigreturn(CPUX86State *regs, + abi_ulong target_sf, abi_ulong *target_uc) +{ + /* XXX */ + *target_uc = 0; + return -TARGET_EOPNOTSUPP; +} + +#endif /* !TARGET_ARCH_SIGNAL_H_ */ diff --git a/bsd-user/x86_64/target_arch_sigtramp.h b/bsd-user/x86_64/target_arch_sigtramp.h new file mode 100644 index 00000000000..29d4a8b55f3 --- /dev/null +++ b/bsd-user/x86_64/target_arch_sigtramp.h @@ -0,0 +1,29 @@ +/* + * Intel x86_64 sigcode for bsd-user + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _TARGET_ARCH_SIGTRAMP_H_ +#define _TARGET_ARCH_SIGTRAMP_H_ + +static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, + unsigned sys_sigreturn) +{ + + return 0; +} +#endif /* _TARGET_ARCH_SIGTRAMP_H_ */ diff --git a/bsd-user/x86_64/target_arch_thread.h b/bsd-user/x86_64/target_arch_thread.h new file mode 100644 index 00000000000..d105e43fd35 --- /dev/null +++ b/bsd-user/x86_64/target_arch_thread.h @@ -0,0 +1,40 @@ +/* + * x86_64 thread support + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_ARCH_THREAD_H_ +#define _TARGET_ARCH_THREAD_H_ + +/* Compare to vm_machdep.c cpu_set_upcall_kse() */ +static inline void target_thread_set_upcall(CPUX86State *regs, abi_ulong entry, + abi_ulong arg, abi_ulong stack_base, abi_ulong stack_size) +{ + /* XXX */ +} + +static inline void target_thread_init(struct target_pt_regs *regs, + struct image_info *infop) +{ + regs->rax = 0; + regs->rsp = infop->start_stack; + regs->rip = infop->entry; + if (bsd_type == target_freebsd) { + regs->rdi = infop->start_stack; + } +} + +#endif /* !_TARGET_ARCH_THREAD_H_ */ diff --git a/bsd-user/x86_64/target_arch_vmparam.h b/bsd-user/x86_64/target_arch_vmparam.h new file mode 100644 index 00000000000..81a915f2e55 --- /dev/null +++ b/bsd-user/x86_64/target_arch_vmparam.h @@ -0,0 +1,46 @@ +/* + * Intel x86_64 VM parameters definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef _TARGET_ARCH_VMPARAM_H_ +#define _TARGET_ARCH_VMPARAM_H_ + +#include "cpu.h" + +/* compare to amd64/include/vmparam.h */ +#define TARGET_MAXTSIZ (128 * MiB) /* max text size */ +#define TARGET_DFLDSIZ (32 * GiB) /* initial data size limit */ +#define TARGET_MAXDSIZ (32 * GiB) /* max data size */ +#define TARGET_DFLSSIZ (8 * MiB) /* initial stack size limit */ +#define TARGET_MAXSSIZ (512 * MiB) /* max stack size */ +#define TARGET_SGROWSIZ (128 * KiB) /* amount to grow stack */ + +#define TARGET_VM_MAXUSER_ADDRESS (0x00007fffff000000UL) + +#define TARGET_USRSTACK (TARGET_VM_MAXUSER_ADDRESS - TARGET_PAGE_SIZE) + +static inline abi_ulong get_sp_from_cpustate(CPUX86State *state) +{ + return state->regs[R_ESP]; +} + +static inline void set_second_rval(CPUX86State *state, abi_ulong retval2) +{ + state->regs[R_EDX] = retval2; +} + +#endif /* !_TARGET_ARCH_VMPARAM_H_ */ diff --git a/bsd-user/x86_64/target_signal.h b/bsd-user/x86_64/target_signal.h deleted file mode 100644 index 659cd401b82..00000000000 --- a/bsd-user/x86_64/target_signal.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef TARGET_SIGNAL_H -#define TARGET_SIGNAL_H - -#include "cpu.h" - -/* this struct defines a stack used during syscall handling */ - -typedef struct target_sigaltstack { - abi_ulong ss_sp; - abi_long ss_flags; - abi_ulong ss_size; -} target_stack_t; - -static inline abi_ulong get_sp_from_cpustate(CPUX86State *state) -{ - return state->regs[R_ESP]; -} - -#endif /* TARGET_SIGNAL_H */ diff --git a/chardev/baum.c b/chardev/baum.c index 5deca778bc4..79d618e3504 100644 --- a/chardev/baum.c +++ b/chardev/baum.c @@ -680,6 +680,7 @@ static const TypeInfo char_braille_type_info = { .instance_finalize = char_braille_finalize, .class_init = char_braille_class_init, }; +module_obj(TYPE_CHARDEV_BRAILLE); static void register_types(void) { diff --git a/chardev/char-fd.c b/chardev/char-fd.c index 1cd62f2779b..93c56913b49 100644 --- a/chardev/char-fd.c +++ b/chardev/char-fd.c @@ -28,6 +28,7 @@ #include "qemu/sockets.h" #include "qapi/error.h" #include "chardev/char.h" +#include "chardev/char-fe.h" #include "io/channel-file.h" #include "chardev/char-fd.h" @@ -38,6 +39,10 @@ static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len) { FDChardev *s = FD_CHARDEV(chr); + if (!s->ioc_out) { + return -1; + } + return io_channel_send(s->ioc_out, buf, len); } @@ -80,10 +85,85 @@ static int fd_chr_read_poll(void *opaque) return s->max_size; } +typedef struct FDSource { + GSource parent; + + GIOCondition cond; +} FDSource; + +static gboolean +fd_source_prepare(GSource *source, + gint *timeout_) +{ + FDSource *src = (FDSource *)source; + + return src->cond != 0; +} + +static gboolean +fd_source_check(GSource *source) +{ + FDSource *src = (FDSource *)source; + + return src->cond != 0; +} + +static gboolean +fd_source_dispatch(GSource *source, GSourceFunc callback, + gpointer user_data) +{ + FDSource *src = (FDSource *)source; + FEWatchFunc func = (FEWatchFunc)callback; + gboolean ret = G_SOURCE_CONTINUE; + + if (src->cond) { + ret = func(NULL, src->cond, user_data); + src->cond = 0; + } + + return ret; +} + +static GSourceFuncs fd_source_funcs = { + fd_source_prepare, + fd_source_check, + fd_source_dispatch, + NULL, NULL, NULL +}; + +static GSource *fd_source_new(FDChardev *chr) +{ + return g_source_new(&fd_source_funcs, sizeof(FDSource)); +} + +static gboolean child_func(GIOChannel *source, + GIOCondition condition, + gpointer data) +{ + FDSource *parent = data; + + parent->cond |= condition; + + return G_SOURCE_CONTINUE; +} + static GSource *fd_chr_add_watch(Chardev *chr, GIOCondition cond) { FDChardev *s = FD_CHARDEV(chr); - return qio_channel_create_watch(s->ioc_out, cond); + g_autoptr(GSource) source = fd_source_new(s); + + if (s->ioc_out) { + g_autoptr(GSource) child = qio_channel_create_watch(s->ioc_out, cond & ~G_IO_IN); + g_source_set_callback(child, (GSourceFunc)child_func, source, NULL); + g_source_add_child_source(source, child); + } + if (s->ioc_in) { + g_autoptr(GSource) child = qio_channel_create_watch(s->ioc_in, cond & ~G_IO_OUT); + g_source_set_callback(child, (GSourceFunc)child_func, source, NULL); + g_source_add_child_source(source, child); + } + + return g_steal_pointer(&source); } static void fd_chr_update_read_handler(Chardev *chr) @@ -131,17 +211,32 @@ void qemu_chr_open_fd(Chardev *chr, int fd_in, int fd_out) { FDChardev *s = FD_CHARDEV(chr); - char *name; - - s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in)); - name = g_strdup_printf("chardev-file-in-%s", chr->label); - qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name); - g_free(name); - s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out)); - name = g_strdup_printf("chardev-file-out-%s", chr->label); - qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name); - g_free(name); - qemu_set_nonblock(fd_out); + g_autofree char *name = NULL; + + if (fd_out >= 0) { + qemu_set_nonblock(fd_out); + } + + if (fd_out == fd_in && fd_in >= 0) { + s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in)); + name = g_strdup_printf("chardev-file-%s", chr->label); + qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name); + s->ioc_out = QIO_CHANNEL(object_ref(s->ioc_in)); + return; + } + + if (fd_in >= 0) { + s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in)); + name = g_strdup_printf("chardev-file-in-%s", chr->label); + qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name); + } + + if (fd_out >= 0) { + s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out)); + g_free(name); + name = g_strdup_printf("chardev-file-out-%s", chr->label); + qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name); + } } static void char_fd_class_init(ObjectClass *oc, void *data) diff --git a/chardev/char-fe.c b/chardev/char-fe.c index 474715c5a92..7789f7be9c8 100644 --- a/chardev/char-fe.c +++ b/chardev/char-fe.c @@ -354,7 +354,7 @@ void qemu_chr_fe_set_open(CharBackend *be, int fe_open) } guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond, - GIOFunc func, void *user_data) + FEWatchFunc func, void *user_data) { Chardev *s = be->chr; GSource *src; diff --git a/chardev/char-mux.c b/chardev/char-mux.c index 72beef29d21..ee2d47b20d9 100644 --- a/chardev/char-mux.c +++ b/chardev/char-mux.c @@ -28,7 +28,7 @@ #include "qemu/option.h" #include "chardev/char.h" #include "sysemu/block-backend.h" -#include "sysemu/sysemu.h" +#include "qapi/qapi-commands-control.h" #include "chardev-internal.h" /* MUX driver for serial I/O splitting */ @@ -158,7 +158,7 @@ static int mux_proc_byte(Chardev *chr, MuxChardev *d, int ch) { const char *term = "QEMU: Terminated\n\r"; qemu_chr_write_all(chr, (uint8_t *)term, strlen(term)); - exit(0); + qmp_quit(NULL); break; } case 's': @@ -387,10 +387,9 @@ void suspend_mux_open(void) static int chardev_options_parsed_cb(Object *child, void *opaque) { Chardev *chr = (Chardev *)child; - ChardevClass *class = CHARDEV_GET_CLASS(chr); - if (!chr->be_open && class->chr_options_parsed) { - class->chr_options_parsed(chr); + if (!chr->be_open && CHARDEV_IS_MUX(chr)) { + open_muxes(chr); } return 0; @@ -413,7 +412,6 @@ static void char_mux_class_init(ObjectClass *oc, void *data) cc->chr_accept_input = mux_chr_accept_input; cc->chr_add_watch = mux_chr_add_watch; cc->chr_be_event = mux_chr_be_event; - cc->chr_options_parsed = open_muxes; cc->chr_update_read_handler = mux_chr_update_read_handlers; } diff --git a/chardev/char-socket.c b/chardev/char-socket.c index daa89fe5d1d..836cfa0bc21 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -468,9 +468,9 @@ static char *qemu_chr_socket_address(SocketChardev *s, const char *prefix) #ifdef CONFIG_LINUX if (sa->has_abstract && sa->abstract) { - abstract = ",abstract"; + abstract = ",abstract=on"; if (sa->has_tight && sa->tight) { - tight = ",tight"; + tight = ",tight=on"; } } #endif @@ -1402,18 +1402,12 @@ static void qmp_chardev_open_socket(Chardev *chr, return; } object_ref(OBJECT(s->tls_creds)); - if (is_listen) { - if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { - error_setg(errp, "%s", - "Expected TLS credentials for server endpoint"); - return; - } - } else { - if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { - error_setg(errp, "%s", - "Expected TLS credentials for client endpoint"); - return; - } + if (!qcrypto_tls_creds_check_endpoint(s->tls_creds, + is_listen + ? QCRYPTO_TLS_CREDS_ENDPOINT_SERVER + : QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, + errp)) { + return; } } s->tls_authz = g_strdup(sock->tls_authz); @@ -1526,7 +1520,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, addr = g_new0(SocketAddressLegacy, 1); if (path) { UnixSocketAddress *q_unix; - addr->type = SOCKET_ADDRESS_LEGACY_KIND_UNIX; + addr->type = SOCKET_ADDRESS_TYPE_UNIX; q_unix = addr->u.q_unix.data = g_new0(UnixSocketAddress, 1); q_unix->path = g_strdup(path); #ifdef CONFIG_LINUX @@ -1536,7 +1530,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, q_unix->abstract = abstract; #endif } else if (host) { - addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET; + addr->type = SOCKET_ADDRESS_TYPE_INET; addr->u.inet.data = g_new(InetSocketAddress, 1); *addr->u.inet.data = (InetSocketAddress) { .host = g_strdup(host), @@ -1549,7 +1543,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, .ipv6 = qemu_opt_get_bool(opts, "ipv6", 0), }; } else if (fd) { - addr->type = SOCKET_ADDRESS_LEGACY_KIND_FD; + addr->type = SOCKET_ADDRESS_TYPE_FD; addr->u.fd.data = g_new(String, 1); addr->u.fd.data->str = g_strdup(fd); } else { diff --git a/chardev/char-udp.c b/chardev/char-udp.c index 16b5dbce582..6756e69924c 100644 --- a/chardev/char-udp.c +++ b/chardev/char-udp.c @@ -165,7 +165,7 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend, qemu_chr_parse_common(opts, qapi_ChardevUdp_base(udp)); addr = g_new0(SocketAddressLegacy, 1); - addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET; + addr->type = SOCKET_ADDRESS_TYPE_INET; addr->u.inet.data = g_new(InetSocketAddress, 1); *addr->u.inet.data = (InetSocketAddress) { .host = g_strdup(host), @@ -180,7 +180,7 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend, if (has_local) { udp->has_local = true; addr = g_new0(SocketAddressLegacy, 1); - addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET; + addr->type = SOCKET_ADDRESS_TYPE_INET; addr->u.inet.data = g_new(InetSocketAddress, 1); *addr->u.inet.data = (InetSocketAddress) { .host = g_strdup(localaddr), diff --git a/chardev/char.c b/chardev/char.c index 398f09df19c..0169d8dde4b 100644 --- a/chardev/char.c +++ b/chardev/char.c @@ -25,7 +25,6 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" #include "monitor/monitor.h" -#include "sysemu/sysemu.h" #include "qemu/config-file.h" #include "qemu/error-report.h" #include "qemu/qemu-print.h" @@ -242,18 +241,15 @@ static void qemu_char_open(Chardev *chr, ChardevBackend *backend, ChardevCommon *common = backend ? backend->u.null.data : NULL; if (common && common->has_logfile) { - int flags = O_WRONLY | O_CREAT; + int flags = O_WRONLY; if (common->has_logappend && common->logappend) { flags |= O_APPEND; } else { flags |= O_TRUNC; } - chr->logfd = qemu_open_old(common->logfile, flags, 0666); + chr->logfd = qemu_create(common->logfile, flags, 0666, errp); if (chr->logfd < 0) { - error_setg_errno(errp, errno, - "Unable to open logfile %s", - common->logfile); return; } } @@ -932,6 +928,12 @@ QemuOptsList qemu_chardev_opts = { },{ .name = "logappend", .type = QEMU_OPT_BOOL, + },{ + .name = "mouse", + .type = QEMU_OPT_BOOL, + },{ + .name = "clipboard", + .type = QEMU_OPT_BOOL, #ifdef CONFIG_LINUX },{ .name = "tight", @@ -1026,27 +1028,31 @@ Chardev *qemu_chardev_new(const char *id, const char *typename, ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, Error **errp) { + ERRP_GUARD(); const ChardevClass *cc; ChardevReturn *ret; - Chardev *chr; + g_autoptr(Chardev) chr = NULL; + + if (qemu_chr_find(id)) { + error_setg(errp, "Chardev with id '%s' already exists", id); + return NULL; + } cc = char_get_class(ChardevBackendKind_str(backend->type), errp); if (!cc) { - return NULL; + goto err; } chr = chardev_new(id, object_class_get_name(OBJECT_CLASS(cc)), backend, NULL, false, errp); if (!chr) { - return NULL; + goto err; } if (!object_property_try_add_child(get_chardevs_root(), id, OBJECT(chr), errp)) { - object_unref(OBJECT(chr)); - return NULL; + goto err; } - object_unref(OBJECT(chr)); ret = g_new0(ChardevReturn, 1); if (CHARDEV_IS_PTY(chr)) { @@ -1055,6 +1061,10 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, } return ret; + +err: + error_prepend(errp, "Failed to add chardev '%s': ", id); + return NULL; } ChardevReturn *qmp_chardev_change(const char *id, ChardevBackend *backend, diff --git a/chardev/meson.build b/chardev/meson.build index 32377af383e..325ba2bdb97 100644 --- a/chardev/meson.build +++ b/chardev/meson.build @@ -35,7 +35,7 @@ if brlapi.found() chardev_modules += { 'baum': module_ss } endif -if config_host.has_key('CONFIG_SPICE') +if spice.found() module_ss = ss.source_set() module_ss.add(when: [spice], if_true: files('spice.c')) chardev_modules += { 'spice': module_ss } diff --git a/chardev/spice.c b/chardev/spice.c index 1104426e3a1..bbffef49136 100644 --- a/chardev/spice.c +++ b/chardev/spice.c @@ -366,6 +366,7 @@ static const TypeInfo char_spice_type_info = { .class_init = char_spice_class_init, .abstract = true, }; +module_obj(TYPE_CHARDEV_SPICE); static void char_spicevmc_class_init(ObjectClass *oc, void *data) { @@ -381,6 +382,7 @@ static const TypeInfo char_spicevmc_type_info = { .parent = TYPE_CHARDEV_SPICE, .class_init = char_spicevmc_class_init, }; +module_obj(TYPE_CHARDEV_SPICEVMC); static void char_spiceport_class_init(ObjectClass *oc, void *data) { @@ -396,6 +398,7 @@ static const TypeInfo char_spiceport_type_info = { .parent = TYPE_CHARDEV_SPICE, .class_init = char_spiceport_class_init, }; +module_obj(TYPE_CHARDEV_SPICEPORT); static void register_types(void) { @@ -405,3 +408,5 @@ static void register_types(void) } type_init(register_types); + +module_dep("ui-spice-core"); diff --git a/chardev/trace-events b/chardev/trace-events index 5ea44082073..027107b0c10 100644 --- a/chardev/trace-events +++ b/chardev/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # wctablet.c wct_init(void) "" diff --git a/chardev/wctablet.c b/chardev/wctablet.c index 95e005f5a56..e8b292c43ca 100644 --- a/chardev/wctablet.c +++ b/chardev/wctablet.c @@ -320,7 +320,6 @@ static void wctablet_chr_finalize(Object *obj) TabletChardev *tablet = WCTABLET_CHARDEV(obj); qemu_input_handler_unregister(tablet->hs); - g_free(tablet); } static void wctablet_chr_open(Chardev *chr, diff --git a/configs/devices/aarch64-softmmu/default.mak b/configs/devices/aarch64-softmmu/default.mak new file mode 100644 index 00000000000..cf43ac8da11 --- /dev/null +++ b/configs/devices/aarch64-softmmu/default.mak @@ -0,0 +1,8 @@ +# Default configuration for aarch64-softmmu + +# We support all the 32 bit boards so need all their config +include ../arm-softmmu/default.mak + +CONFIG_XLNX_ZYNQMP_ARM=y +CONFIG_XLNX_VERSAL=y +CONFIG_SBSA_REF=y diff --git a/configs/devices/aarch64-softmmu/minimal.mak b/configs/devices/aarch64-softmmu/minimal.mak new file mode 100644 index 00000000000..0ebc1dca561 --- /dev/null +++ b/configs/devices/aarch64-softmmu/minimal.mak @@ -0,0 +1,9 @@ +# +# A minimal version of the config that only supports only a few +# virtual machines. This avoids bringing in any of numerous legacy +# features from the 32bit platform (although virt still supports 32bit +# itself) +# + +CONFIG_ARM_VIRT=y +CONFIG_SBSA_REF=y diff --git a/default-configs/devices/alpha-softmmu.mak b/configs/devices/alpha-softmmu/default.mak similarity index 100% rename from default-configs/devices/alpha-softmmu.mak rename to configs/devices/alpha-softmmu/default.mak diff --git a/configs/devices/arm-softmmu/default.mak b/configs/devices/arm-softmmu/default.mak new file mode 100644 index 00000000000..6985a25377a --- /dev/null +++ b/configs/devices/arm-softmmu/default.mak @@ -0,0 +1,44 @@ +# Default configuration for arm-softmmu + +# CONFIG_PCI_DEVICES=n +# CONFIG_TEST_DEVICES=n + +CONFIG_ARM_VIRT=y +CONFIG_CUBIEBOARD=y +CONFIG_EXYNOS4=y +CONFIG_HIGHBANK=y +CONFIG_INTEGRATOR=y +CONFIG_FSL_IMX31=y +CONFIG_MUSICPAL=y +CONFIG_MUSCA=y +CONFIG_CHEETAH=y +CONFIG_SX1=y +CONFIG_NSERIES=y +CONFIG_STELLARIS=y +CONFIG_STM32VLDISCOVERY=y +CONFIG_REALVIEW=y +CONFIG_VERSATILE=y +CONFIG_VEXPRESS=y +CONFIG_ZYNQ=y +CONFIG_MAINSTONE=y +CONFIG_GUMSTIX=y +CONFIG_SPITZ=y +CONFIG_TOSA=y +CONFIG_Z2=y +CONFIG_NPCM7XX=y +CONFIG_COLLIE=y +CONFIG_ASPEED_SOC=y +CONFIG_NETDUINO2=y +CONFIG_NETDUINOPLUS2=y +CONFIG_MPS2=y +CONFIG_RASPI=y +CONFIG_DIGIC=y +CONFIG_SABRELITE=y +CONFIG_EMCRAFT_SF2=y +CONFIG_MICROBIT=y +CONFIG_FSL_IMX25=y +CONFIG_FSL_IMX7=y +CONFIG_FSL_IMX6UL=y +CONFIG_SEMIHOSTING=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +CONFIG_ALLWINNER_H3=y diff --git a/default-configs/devices/avr-softmmu.mak b/configs/devices/avr-softmmu/default.mak similarity index 100% rename from default-configs/devices/avr-softmmu.mak rename to configs/devices/avr-softmmu/default.mak diff --git a/default-configs/devices/cris-softmmu.mak b/configs/devices/cris-softmmu/default.mak similarity index 100% rename from default-configs/devices/cris-softmmu.mak rename to configs/devices/cris-softmmu/default.mak diff --git a/default-configs/devices/hppa-softmmu.mak b/configs/devices/hppa-softmmu/default.mak similarity index 100% rename from default-configs/devices/hppa-softmmu.mak rename to configs/devices/hppa-softmmu/default.mak diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak new file mode 100644 index 00000000000..598c6646dfc --- /dev/null +++ b/configs/devices/i386-softmmu/default.mak @@ -0,0 +1,32 @@ +# Default configuration for i386-softmmu + +# Uncomment the following lines to disable these optional devices: +# +#CONFIG_AMD_IOMMU=n +#CONFIG_APPLESMC=n +#CONFIG_FDC=n +#CONFIG_HPET=n +#CONFIG_HYPERV=n +#CONFIG_ISA_DEBUG=n +#CONFIG_ISA_IPMI_BT=n +#CONFIG_ISA_IPMI_KCS=n +#CONFIG_PCI_IPMI_KCS=n +#CONFIG_PCI_IPMI_BT=n +#CONFIG_IPMI_SSIF=n +#CONFIG_PCI_DEVICES=n +#CONFIG_PVPANIC=n +#CONFIG_QXL=n +#CONFIG_SEV=n +#CONFIG_SGA=n +#CONFIG_TEST_DEVICES=n +#CONFIG_TPM_CRB=n +#CONFIG_TPM_TIS_ISA=n +#CONFIG_VTD=n +#CONFIG_SGX=n + +# Boards: +# +CONFIG_ISAPC=y +CONFIG_I440FX=y +CONFIG_Q35=y +CONFIG_MICROVM=y diff --git a/default-configs/devices/m68k-softmmu.mak b/configs/devices/m68k-softmmu/default.mak similarity index 100% rename from default-configs/devices/m68k-softmmu.mak rename to configs/devices/m68k-softmmu/default.mak diff --git a/default-configs/devices/microblaze-softmmu.mak b/configs/devices/microblaze-softmmu/default.mak similarity index 100% rename from default-configs/devices/microblaze-softmmu.mak rename to configs/devices/microblaze-softmmu/default.mak diff --git a/configs/devices/microblazeel-softmmu/default.mak b/configs/devices/microblazeel-softmmu/default.mak new file mode 100644 index 00000000000..29f7f13816c --- /dev/null +++ b/configs/devices/microblazeel-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for microblazeel-softmmu + +include ../microblaze-softmmu/default.mak diff --git a/configs/devices/mips-softmmu/common.mak b/configs/devices/mips-softmmu/common.mak new file mode 100644 index 00000000000..4801b560511 --- /dev/null +++ b/configs/devices/mips-softmmu/common.mak @@ -0,0 +1,39 @@ +# Common mips*-softmmu CONFIG defines + +# CONFIG_SEMIHOSTING is always required on this architecture +CONFIG_SEMIHOSTING=y + +CONFIG_ISA_BUS=y +CONFIG_PCI=y +CONFIG_PCI_DEVICES=y +CONFIG_VGA_ISA=y +CONFIG_VGA_ISA_MM=y +CONFIG_VGA_CIRRUS=y +CONFIG_VMWARE_VGA=y +CONFIG_SERIAL=y +CONFIG_SERIAL_ISA=y +CONFIG_PARALLEL=y +CONFIG_I8254=y +CONFIG_PCSPK=y +CONFIG_PCKBD=y +CONFIG_FDC=y +CONFIG_ACPI=y +CONFIG_ACPI_PIIX4=y +CONFIG_APM=y +CONFIG_I8257=y +CONFIG_PIIX4=y +CONFIG_IDE_ISA=y +CONFIG_IDE_PIIX=y +CONFIG_PFLASH_CFI01=y +CONFIG_I8259=y +CONFIG_MC146818RTC=y +CONFIG_EMPTY_SLOT=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_ITU=y +CONFIG_MALTA=y +CONFIG_PCNET_PCI=y +CONFIG_MIPSSIM=y +CONFIG_ACPI_SMBUS=y +CONFIG_SMBUS_EEPROM=y +CONFIG_TEST_DEVICES=y +CONFIG_VIRTIO_MMIO=y diff --git a/configs/devices/mips-softmmu/default.mak b/configs/devices/mips-softmmu/default.mak new file mode 100644 index 00000000000..c23d95a83aa --- /dev/null +++ b/configs/devices/mips-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for mips-softmmu + +include common.mak diff --git a/configs/devices/mips64-softmmu/default.mak b/configs/devices/mips64-softmmu/default.mak new file mode 100644 index 00000000000..566672f3c22 --- /dev/null +++ b/configs/devices/mips64-softmmu/default.mak @@ -0,0 +1,4 @@ +# Default configuration for mips64-softmmu + +include ../mips-softmmu/common.mak +CONFIG_JAZZ=y diff --git a/configs/devices/mips64cheri128-softmmu/default.mak b/configs/devices/mips64cheri128-softmmu/default.mak new file mode 100644 index 00000000000..a4f5f83d993 --- /dev/null +++ b/configs/devices/mips64cheri128-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for cheri128-softmmu + +include ../mips64-softmmu/default.mak diff --git a/configs/devices/mips64el-softmmu/default.mak b/configs/devices/mips64el-softmmu/default.mak new file mode 100644 index 00000000000..c610749ac13 --- /dev/null +++ b/configs/devices/mips64el-softmmu/default.mak @@ -0,0 +1,11 @@ +# Default configuration for mips64el-softmmu + +include ../mips-softmmu/common.mak +CONFIG_IDE_VIA=y +CONFIG_FULOONG=y +CONFIG_LOONGSON3V=y +CONFIG_ATI_VGA=y +CONFIG_RTL8139_PCI=y +CONFIG_JAZZ=y +CONFIG_VT82C686=y +CONFIG_MIPS_BOSTON=y diff --git a/configs/devices/mipsel-softmmu/default.mak b/configs/devices/mipsel-softmmu/default.mak new file mode 100644 index 00000000000..009ccb0e2da --- /dev/null +++ b/configs/devices/mipsel-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for mipsel-softmmu + +include ../mips-softmmu/common.mak diff --git a/default-configs/devices/morello-softmmu.mak b/configs/devices/morello-softmmu/default.mak similarity index 100% rename from default-configs/devices/morello-softmmu.mak rename to configs/devices/morello-softmmu/default.mak diff --git a/default-configs/devices/nios2-softmmu.mak b/configs/devices/nios2-softmmu/default.mak similarity index 100% rename from default-configs/devices/nios2-softmmu.mak rename to configs/devices/nios2-softmmu/default.mak diff --git a/default-configs/devices/or1k-softmmu.mak b/configs/devices/or1k-softmmu/default.mak similarity index 100% rename from default-configs/devices/or1k-softmmu.mak rename to configs/devices/or1k-softmmu/default.mak diff --git a/configs/devices/ppc-softmmu/default.mak b/configs/devices/ppc-softmmu/default.mak new file mode 100644 index 00000000000..658a454426e --- /dev/null +++ b/configs/devices/ppc-softmmu/default.mak @@ -0,0 +1,19 @@ +# Default configuration for ppc-softmmu + +# For embedded PPCs: +CONFIG_E500=y +CONFIG_PPC405=y +CONFIG_PPC440=y +CONFIG_VIRTEX=y + +# For Sam460ex +CONFIG_SAM460EX=y + +# For Macs +CONFIG_MAC_OLDWORLD=y +CONFIG_MAC_NEWWORLD=y + +CONFIG_PEGASOS2=y + +# For PReP +CONFIG_PREP=y diff --git a/configs/devices/ppc64-softmmu/default.mak b/configs/devices/ppc64-softmmu/default.mak new file mode 100644 index 00000000000..b90e5bf4558 --- /dev/null +++ b/configs/devices/ppc64-softmmu/default.mak @@ -0,0 +1,10 @@ +# Default configuration for ppc64-softmmu + +# Include all 32-bit boards +include ../ppc-softmmu/default.mak + +# For PowerNV +CONFIG_POWERNV=y + +# For pSeries +CONFIG_PSERIES=y diff --git a/default-configs/devices/riscv32-softmmu.mak b/configs/devices/riscv32-softmmu/default.mak similarity index 100% rename from default-configs/devices/riscv32-softmmu.mak rename to configs/devices/riscv32-softmmu/default.mak diff --git a/configs/devices/riscv32cheri-softmmu/default.mak b/configs/devices/riscv32cheri-softmmu/default.mak new file mode 100644 index 00000000000..c830babc49d --- /dev/null +++ b/configs/devices/riscv32cheri-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for riscv32cheri-softmmu + +include ../riscv32-softmmu/default.mak diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak new file mode 100644 index 00000000000..bc69301fa4a --- /dev/null +++ b/configs/devices/riscv64-softmmu/default.mak @@ -0,0 +1,16 @@ +# Default configuration for riscv64-softmmu + +# Uncomment the following lines to disable these optional devices: +# +#CONFIG_PCI_DEVICES=n +CONFIG_SEMIHOSTING=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y + +# Boards: +# +CONFIG_SPIKE=y +CONFIG_SIFIVE_E=y +CONFIG_SIFIVE_U=y +CONFIG_RISCV_VIRT=y +CONFIG_MICROCHIP_PFSOC=y +CONFIG_SHAKTI_C=y diff --git a/configs/devices/riscv64cheri-softmmu/default.mak b/configs/devices/riscv64cheri-softmmu/default.mak new file mode 100644 index 00000000000..57993434da3 --- /dev/null +++ b/configs/devices/riscv64cheri-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for riscv64cheri-softmmu + +include ../riscv64-softmmu/default.mak diff --git a/default-configs/devices/rx-softmmu.mak b/configs/devices/rx-softmmu/default.mak similarity index 100% rename from default-configs/devices/rx-softmmu.mak rename to configs/devices/rx-softmmu/default.mak diff --git a/default-configs/devices/s390x-softmmu.mak b/configs/devices/s390x-softmmu/default.mak similarity index 100% rename from default-configs/devices/s390x-softmmu.mak rename to configs/devices/s390x-softmmu/default.mak diff --git a/default-configs/devices/sh4-softmmu.mak b/configs/devices/sh4-softmmu/default.mak similarity index 100% rename from default-configs/devices/sh4-softmmu.mak rename to configs/devices/sh4-softmmu/default.mak diff --git a/configs/devices/sh4eb-softmmu/default.mak b/configs/devices/sh4eb-softmmu/default.mak new file mode 100644 index 00000000000..f18d1f65199 --- /dev/null +++ b/configs/devices/sh4eb-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for sh4eb-softmmu + +include ../sh4-softmmu/default.mak diff --git a/default-configs/devices/sparc-softmmu.mak b/configs/devices/sparc-softmmu/default.mak similarity index 100% rename from default-configs/devices/sparc-softmmu.mak rename to configs/devices/sparc-softmmu/default.mak diff --git a/default-configs/devices/sparc64-softmmu.mak b/configs/devices/sparc64-softmmu/default.mak similarity index 100% rename from default-configs/devices/sparc64-softmmu.mak rename to configs/devices/sparc64-softmmu/default.mak diff --git a/configs/devices/tricore-softmmu/default.mak b/configs/devices/tricore-softmmu/default.mak new file mode 100644 index 00000000000..cb8fc286eb2 --- /dev/null +++ b/configs/devices/tricore-softmmu/default.mak @@ -0,0 +1,2 @@ +CONFIG_TRICORE_TESTBOARD=y +CONFIG_TRIBOARD=y diff --git a/configs/devices/x86_64-softmmu/default.mak b/configs/devices/x86_64-softmmu/default.mak new file mode 100644 index 00000000000..ddfc2ea6266 --- /dev/null +++ b/configs/devices/x86_64-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for x86_64-softmmu + +include ../i386-softmmu/default.mak diff --git a/default-configs/devices/xtensa-softmmu.mak b/configs/devices/xtensa-softmmu/default.mak similarity index 100% rename from default-configs/devices/xtensa-softmmu.mak rename to configs/devices/xtensa-softmmu/default.mak diff --git a/configs/devices/xtensaeb-softmmu/default.mak b/configs/devices/xtensaeb-softmmu/default.mak new file mode 100644 index 00000000000..00eafcc292e --- /dev/null +++ b/configs/devices/xtensaeb-softmmu/default.mak @@ -0,0 +1,3 @@ +# Default configuration for Xtensa + +include ../xtensa-softmmu/default.mak diff --git a/configs/targets/aarch64-linux-user.mak b/configs/targets/aarch64-linux-user.mak new file mode 100644 index 00000000000..d0c603c54ec --- /dev/null +++ b/configs/targets/aarch64-linux-user.mak @@ -0,0 +1,5 @@ +TARGET_ARCH=aarch64 +TARGET_BASE_ARCH=arm +TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml +TARGET_HAS_BFLT=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/configs/targets/aarch64-softmmu.mak b/configs/targets/aarch64-softmmu.mak new file mode 100644 index 00000000000..d489e6da830 --- /dev/null +++ b/configs/targets/aarch64-softmmu.mak @@ -0,0 +1,5 @@ +TARGET_ARCH=aarch64 +TARGET_BASE_ARCH=arm +TARGET_SUPPORTS_MTTCG=y +TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml +TARGET_NEED_FDT=y diff --git a/configs/targets/aarch64_be-linux-user.mak b/configs/targets/aarch64_be-linux-user.mak new file mode 100644 index 00000000000..d3ee10c00f3 --- /dev/null +++ b/configs/targets/aarch64_be-linux-user.mak @@ -0,0 +1,6 @@ +TARGET_ARCH=aarch64 +TARGET_BASE_ARCH=arm +TARGET_WORDS_BIGENDIAN=y +TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml +TARGET_HAS_BFLT=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/default-configs/targets/alpha-linux-user.mak b/configs/targets/alpha-linux-user.mak similarity index 100% rename from default-configs/targets/alpha-linux-user.mak rename to configs/targets/alpha-linux-user.mak diff --git a/default-configs/targets/alpha-softmmu.mak b/configs/targets/alpha-softmmu.mak similarity index 100% rename from default-configs/targets/alpha-softmmu.mak rename to configs/targets/alpha-softmmu.mak diff --git a/configs/targets/arm-linux-user.mak b/configs/targets/arm-linux-user.mak new file mode 100644 index 00000000000..3e10d6b15d5 --- /dev/null +++ b/configs/targets/arm-linux-user.mak @@ -0,0 +1,6 @@ +TARGET_ARCH=arm +TARGET_SYSTBL_ABI=common,oabi +TARGET_SYSTBL=syscall.tbl +TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml +TARGET_HAS_BFLT=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/configs/targets/arm-softmmu.mak b/configs/targets/arm-softmmu.mak new file mode 100644 index 00000000000..92c8349b964 --- /dev/null +++ b/configs/targets/arm-softmmu.mak @@ -0,0 +1,4 @@ +TARGET_ARCH=arm +TARGET_SUPPORTS_MTTCG=y +TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml +TARGET_NEED_FDT=y diff --git a/configs/targets/armeb-linux-user.mak b/configs/targets/armeb-linux-user.mak new file mode 100644 index 00000000000..f81e5bf1fe4 --- /dev/null +++ b/configs/targets/armeb-linux-user.mak @@ -0,0 +1,7 @@ +TARGET_ARCH=arm +TARGET_SYSTBL_ABI=common,oabi +TARGET_SYSTBL=syscall.tbl +TARGET_WORDS_BIGENDIAN=y +TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml +TARGET_HAS_BFLT=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/default-configs/targets/avr-softmmu.mak b/configs/targets/avr-softmmu.mak similarity index 100% rename from default-configs/targets/avr-softmmu.mak rename to configs/targets/avr-softmmu.mak diff --git a/default-configs/targets/cris-linux-user.mak b/configs/targets/cris-linux-user.mak similarity index 100% rename from default-configs/targets/cris-linux-user.mak rename to configs/targets/cris-linux-user.mak diff --git a/default-configs/targets/cris-softmmu.mak b/configs/targets/cris-softmmu.mak similarity index 100% rename from default-configs/targets/cris-softmmu.mak rename to configs/targets/cris-softmmu.mak diff --git a/default-configs/targets/hexagon-linux-user.mak b/configs/targets/hexagon-linux-user.mak similarity index 100% rename from default-configs/targets/hexagon-linux-user.mak rename to configs/targets/hexagon-linux-user.mak diff --git a/default-configs/targets/hppa-linux-user.mak b/configs/targets/hppa-linux-user.mak similarity index 100% rename from default-configs/targets/hppa-linux-user.mak rename to configs/targets/hppa-linux-user.mak diff --git a/default-configs/targets/hppa-softmmu.mak b/configs/targets/hppa-softmmu.mak similarity index 100% rename from default-configs/targets/hppa-softmmu.mak rename to configs/targets/hppa-softmmu.mak diff --git a/default-configs/targets/i386-bsd-user.mak b/configs/targets/i386-bsd-user.mak similarity index 100% rename from default-configs/targets/i386-bsd-user.mak rename to configs/targets/i386-bsd-user.mak diff --git a/default-configs/targets/i386-linux-user.mak b/configs/targets/i386-linux-user.mak similarity index 100% rename from default-configs/targets/i386-linux-user.mak rename to configs/targets/i386-linux-user.mak diff --git a/configs/targets/i386-softmmu.mak b/configs/targets/i386-softmmu.mak new file mode 100644 index 00000000000..6b3c99fc86c --- /dev/null +++ b/configs/targets/i386-softmmu.mak @@ -0,0 +1,4 @@ +TARGET_ARCH=i386 +TARGET_SUPPORTS_MTTCG=y +TARGET_NEED_FDT=y +TARGET_XML_FILES= gdb-xml/i386-32bit.xml diff --git a/default-configs/targets/m68k-linux-user.mak b/configs/targets/m68k-linux-user.mak similarity index 100% rename from default-configs/targets/m68k-linux-user.mak rename to configs/targets/m68k-linux-user.mak diff --git a/default-configs/targets/m68k-softmmu.mak b/configs/targets/m68k-softmmu.mak similarity index 100% rename from default-configs/targets/m68k-softmmu.mak rename to configs/targets/m68k-softmmu.mak diff --git a/default-configs/targets/microblaze-linux-user.mak b/configs/targets/microblaze-linux-user.mak similarity index 100% rename from default-configs/targets/microblaze-linux-user.mak rename to configs/targets/microblaze-linux-user.mak diff --git a/default-configs/targets/microblaze-softmmu.mak b/configs/targets/microblaze-softmmu.mak similarity index 100% rename from default-configs/targets/microblaze-softmmu.mak rename to configs/targets/microblaze-softmmu.mak diff --git a/default-configs/targets/microblazeel-linux-user.mak b/configs/targets/microblazeel-linux-user.mak similarity index 100% rename from default-configs/targets/microblazeel-linux-user.mak rename to configs/targets/microblazeel-linux-user.mak diff --git a/default-configs/targets/microblazeel-softmmu.mak b/configs/targets/microblazeel-softmmu.mak similarity index 100% rename from default-configs/targets/microblazeel-softmmu.mak rename to configs/targets/microblazeel-softmmu.mak diff --git a/default-configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak similarity index 100% rename from default-configs/targets/mips-linux-user.mak rename to configs/targets/mips-linux-user.mak diff --git a/default-configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak similarity index 100% rename from default-configs/targets/mips-softmmu.mak rename to configs/targets/mips-softmmu.mak diff --git a/default-configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak similarity index 100% rename from default-configs/targets/mips64-linux-user.mak rename to configs/targets/mips64-linux-user.mak diff --git a/default-configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak similarity index 100% rename from default-configs/targets/mips64-softmmu.mak rename to configs/targets/mips64-softmmu.mak diff --git a/default-configs/targets/mips64cheri-softmmu-common.mak b/configs/targets/mips64cheri-softmmu-common.mak similarity index 100% rename from default-configs/targets/mips64cheri-softmmu-common.mak rename to configs/targets/mips64cheri-softmmu-common.mak diff --git a/default-configs/targets/mips64cheri128-softmmu.mak b/configs/targets/mips64cheri128-softmmu.mak similarity index 100% rename from default-configs/targets/mips64cheri128-softmmu.mak rename to configs/targets/mips64cheri128-softmmu.mak diff --git a/default-configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak similarity index 100% rename from default-configs/targets/mips64el-linux-user.mak rename to configs/targets/mips64el-linux-user.mak diff --git a/default-configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak similarity index 100% rename from default-configs/targets/mips64el-softmmu.mak rename to configs/targets/mips64el-softmmu.mak diff --git a/default-configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak similarity index 100% rename from default-configs/targets/mipsel-linux-user.mak rename to configs/targets/mipsel-linux-user.mak diff --git a/default-configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak similarity index 100% rename from default-configs/targets/mipsel-softmmu.mak rename to configs/targets/mipsel-softmmu.mak diff --git a/default-configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak similarity index 100% rename from default-configs/targets/mipsn32-linux-user.mak rename to configs/targets/mipsn32-linux-user.mak diff --git a/default-configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak similarity index 100% rename from default-configs/targets/mipsn32el-linux-user.mak rename to configs/targets/mipsn32el-linux-user.mak diff --git a/default-configs/targets/morello-softmmu.mak b/configs/targets/morello-softmmu.mak similarity index 100% rename from default-configs/targets/morello-softmmu.mak rename to configs/targets/morello-softmmu.mak diff --git a/default-configs/targets/nios2-linux-user.mak b/configs/targets/nios2-linux-user.mak similarity index 100% rename from default-configs/targets/nios2-linux-user.mak rename to configs/targets/nios2-linux-user.mak diff --git a/default-configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak similarity index 100% rename from default-configs/targets/nios2-softmmu.mak rename to configs/targets/nios2-softmmu.mak diff --git a/default-configs/targets/or1k-linux-user.mak b/configs/targets/or1k-linux-user.mak similarity index 100% rename from default-configs/targets/or1k-linux-user.mak rename to configs/targets/or1k-linux-user.mak diff --git a/default-configs/targets/or1k-softmmu.mak b/configs/targets/or1k-softmmu.mak similarity index 100% rename from default-configs/targets/or1k-softmmu.mak rename to configs/targets/or1k-softmmu.mak diff --git a/default-configs/targets/ppc-linux-user.mak b/configs/targets/ppc-linux-user.mak similarity index 100% rename from default-configs/targets/ppc-linux-user.mak rename to configs/targets/ppc-linux-user.mak diff --git a/default-configs/targets/ppc-softmmu.mak b/configs/targets/ppc-softmmu.mak similarity index 100% rename from default-configs/targets/ppc-softmmu.mak rename to configs/targets/ppc-softmmu.mak diff --git a/default-configs/targets/ppc64-linux-user.mak b/configs/targets/ppc64-linux-user.mak similarity index 100% rename from default-configs/targets/ppc64-linux-user.mak rename to configs/targets/ppc64-linux-user.mak diff --git a/default-configs/targets/ppc64-softmmu.mak b/configs/targets/ppc64-softmmu.mak similarity index 100% rename from default-configs/targets/ppc64-softmmu.mak rename to configs/targets/ppc64-softmmu.mak diff --git a/default-configs/targets/ppc64abi32-linux-user.mak b/configs/targets/ppc64abi32-linux-user.mak similarity index 100% rename from default-configs/targets/ppc64abi32-linux-user.mak rename to configs/targets/ppc64abi32-linux-user.mak diff --git a/default-configs/targets/ppc64le-linux-user.mak b/configs/targets/ppc64le-linux-user.mak similarity index 100% rename from default-configs/targets/ppc64le-linux-user.mak rename to configs/targets/ppc64le-linux-user.mak diff --git a/default-configs/targets/riscv32-linux-user.mak b/configs/targets/riscv32-linux-user.mak similarity index 100% rename from default-configs/targets/riscv32-linux-user.mak rename to configs/targets/riscv32-linux-user.mak diff --git a/default-configs/targets/riscv32-softmmu.mak b/configs/targets/riscv32-softmmu.mak similarity index 100% rename from default-configs/targets/riscv32-softmmu.mak rename to configs/targets/riscv32-softmmu.mak diff --git a/default-configs/targets/riscv32cheri-softmmu.mak b/configs/targets/riscv32cheri-softmmu.mak similarity index 100% rename from default-configs/targets/riscv32cheri-softmmu.mak rename to configs/targets/riscv32cheri-softmmu.mak diff --git a/default-configs/targets/riscv64-linux-user.mak b/configs/targets/riscv64-linux-user.mak similarity index 100% rename from default-configs/targets/riscv64-linux-user.mak rename to configs/targets/riscv64-linux-user.mak diff --git a/default-configs/targets/riscv64-softmmu.mak b/configs/targets/riscv64-softmmu.mak similarity index 100% rename from default-configs/targets/riscv64-softmmu.mak rename to configs/targets/riscv64-softmmu.mak diff --git a/default-configs/targets/riscv64cheri-softmmu.mak b/configs/targets/riscv64cheri-softmmu.mak similarity index 100% rename from default-configs/targets/riscv64cheri-softmmu.mak rename to configs/targets/riscv64cheri-softmmu.mak diff --git a/default-configs/targets/rx-softmmu.mak b/configs/targets/rx-softmmu.mak similarity index 100% rename from default-configs/targets/rx-softmmu.mak rename to configs/targets/rx-softmmu.mak diff --git a/default-configs/targets/s390x-linux-user.mak b/configs/targets/s390x-linux-user.mak similarity index 100% rename from default-configs/targets/s390x-linux-user.mak rename to configs/targets/s390x-linux-user.mak diff --git a/default-configs/targets/s390x-softmmu.mak b/configs/targets/s390x-softmmu.mak similarity index 100% rename from default-configs/targets/s390x-softmmu.mak rename to configs/targets/s390x-softmmu.mak diff --git a/default-configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak similarity index 100% rename from default-configs/targets/sh4-linux-user.mak rename to configs/targets/sh4-linux-user.mak diff --git a/default-configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak similarity index 100% rename from default-configs/targets/sh4-softmmu.mak rename to configs/targets/sh4-softmmu.mak diff --git a/default-configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak similarity index 100% rename from default-configs/targets/sh4eb-linux-user.mak rename to configs/targets/sh4eb-linux-user.mak diff --git a/default-configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak similarity index 100% rename from default-configs/targets/sh4eb-softmmu.mak rename to configs/targets/sh4eb-softmmu.mak diff --git a/default-configs/targets/sparc-linux-user.mak b/configs/targets/sparc-linux-user.mak similarity index 100% rename from default-configs/targets/sparc-linux-user.mak rename to configs/targets/sparc-linux-user.mak diff --git a/default-configs/targets/sparc-softmmu.mak b/configs/targets/sparc-softmmu.mak similarity index 100% rename from default-configs/targets/sparc-softmmu.mak rename to configs/targets/sparc-softmmu.mak diff --git a/default-configs/targets/sparc32plus-linux-user.mak b/configs/targets/sparc32plus-linux-user.mak similarity index 100% rename from default-configs/targets/sparc32plus-linux-user.mak rename to configs/targets/sparc32plus-linux-user.mak diff --git a/default-configs/targets/sparc64-linux-user.mak b/configs/targets/sparc64-linux-user.mak similarity index 87% rename from default-configs/targets/sparc64-linux-user.mak rename to configs/targets/sparc64-linux-user.mak index 846924201ae..9d23ab4a266 100644 --- a/default-configs/targets/sparc64-linux-user.mak +++ b/configs/targets/sparc64-linux-user.mak @@ -1,5 +1,6 @@ TARGET_ARCH=sparc64 TARGET_BASE_ARCH=sparc +TARGET_ABI_DIR=sparc TARGET_SYSTBL_ABI=common,64 TARGET_SYSTBL=syscall.tbl TARGET_ALIGNED_ONLY=y diff --git a/default-configs/targets/sparc64-softmmu.mak b/configs/targets/sparc64-softmmu.mak similarity index 100% rename from default-configs/targets/sparc64-softmmu.mak rename to configs/targets/sparc64-softmmu.mak diff --git a/default-configs/targets/tricore-softmmu.mak b/configs/targets/tricore-softmmu.mak similarity index 100% rename from default-configs/targets/tricore-softmmu.mak rename to configs/targets/tricore-softmmu.mak diff --git a/default-configs/targets/x86_64-bsd-user.mak b/configs/targets/x86_64-bsd-user.mak similarity index 100% rename from default-configs/targets/x86_64-bsd-user.mak rename to configs/targets/x86_64-bsd-user.mak diff --git a/default-configs/targets/x86_64-linux-user.mak b/configs/targets/x86_64-linux-user.mak similarity index 100% rename from default-configs/targets/x86_64-linux-user.mak rename to configs/targets/x86_64-linux-user.mak diff --git a/configs/targets/x86_64-softmmu.mak b/configs/targets/x86_64-softmmu.mak new file mode 100644 index 00000000000..197817c9434 --- /dev/null +++ b/configs/targets/x86_64-softmmu.mak @@ -0,0 +1,5 @@ +TARGET_ARCH=x86_64 +TARGET_BASE_ARCH=i386 +TARGET_SUPPORTS_MTTCG=y +TARGET_NEED_FDT=y +TARGET_XML_FILES= gdb-xml/i386-64bit.xml diff --git a/default-configs/targets/xtensa-linux-user.mak b/configs/targets/xtensa-linux-user.mak similarity index 80% rename from default-configs/targets/xtensa-linux-user.mak rename to configs/targets/xtensa-linux-user.mak index fc95cc60f58..420b30a68d9 100644 --- a/default-configs/targets/xtensa-linux-user.mak +++ b/configs/targets/xtensa-linux-user.mak @@ -1,5 +1,4 @@ TARGET_ARCH=xtensa TARGET_SYSTBL_ABI=common TARGET_SYSTBL=syscall.tbl -TARGET_ALIGNED_ONLY=y TARGET_HAS_BFLT=y diff --git a/configs/targets/xtensa-softmmu.mak b/configs/targets/xtensa-softmmu.mak new file mode 100644 index 00000000000..f075557bfa9 --- /dev/null +++ b/configs/targets/xtensa-softmmu.mak @@ -0,0 +1,2 @@ +TARGET_ARCH=xtensa +TARGET_SUPPORTS_MTTCG=y diff --git a/default-configs/targets/xtensaeb-linux-user.mak b/configs/targets/xtensaeb-linux-user.mak similarity index 83% rename from default-configs/targets/xtensaeb-linux-user.mak rename to configs/targets/xtensaeb-linux-user.mak index cfc3518118c..1ea0f1ba915 100644 --- a/default-configs/targets/xtensaeb-linux-user.mak +++ b/configs/targets/xtensaeb-linux-user.mak @@ -1,6 +1,5 @@ TARGET_ARCH=xtensa TARGET_SYSTBL_ABI=common TARGET_SYSTBL=syscall.tbl -TARGET_ALIGNED_ONLY=y TARGET_WORDS_BIGENDIAN=y TARGET_HAS_BFLT=y diff --git a/configs/targets/xtensaeb-softmmu.mak b/configs/targets/xtensaeb-softmmu.mak new file mode 100644 index 00000000000..405cf5acbb4 --- /dev/null +++ b/configs/targets/xtensaeb-softmmu.mak @@ -0,0 +1,3 @@ +TARGET_ARCH=xtensa +TARGET_WORDS_BIGENDIAN=y +TARGET_SUPPORTS_MTTCG=y diff --git a/configure b/configure index cc203001f9d..3ffd89689a4 100755 --- a/configure +++ b/configure @@ -142,11 +142,11 @@ lines: ${BASH_LINENO[*]}" } do_cc() { - do_compiler "$cc" "$@" + do_compiler "$cc" $CPU_CFLAGS "$@" } do_cxx() { - do_compiler "$cxx" "$@" + do_compiler "$cxx" $CPU_CFLAGS "$@" } # Append $2 to the variable named $1, with space separation @@ -158,8 +158,8 @@ update_cxxflags() { # Set QEMU_CXXFLAGS from QEMU_CFLAGS by filtering out those # options which some versions of GCC's C++ compiler complain about # because they only make sense for C programs. - QEMU_CXXFLAGS="$QEMU_CXXFLAGS -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS" - CONFIGURE_CXXFLAGS=$(echo "$CONFIGURE_CFLAGS" | sed s/-std=gnu99/-std=gnu++11/) + QEMU_CXXFLAGS="-D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS" + CONFIGURE_CXXFLAGS=$(echo "$CONFIGURE_CFLAGS" | sed s/-std=gnu11/-std=gnu++11/) for arg in $QEMU_CFLAGS; do case $arg in -Wstrict-prototypes|-Wmissing-prototypes|-Wnested-externs|\ @@ -174,14 +174,14 @@ update_cxxflags() { compile_object() { local_cflags="$1" - do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC + do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC } compile_prog() { local_cflags="$1" local_ldflags="$2" - do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -o $TMPE $TMPC \ - $LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags + do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -o $TMPE $TMPC \ + $LDFLAGS $EXTRA_LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags } # symbolically link $1 to $2. Portable version of "ln -sf". @@ -216,10 +216,6 @@ version_ge () { done } -have_backend () { - echo "$trace_backends" | grep "$1" >/dev/null -} - glob() { eval test -z '"${1#'"$2"'}"' } @@ -240,11 +236,11 @@ interp_prefix="/usr/gnemul/qemu-%M" static="no" cross_compile="no" cross_prefix="" -audio_drv_list="" +audio_drv_list="default" block_drv_rw_whitelist="" block_drv_ro_whitelist="" +block_drv_whitelist_tools="no" host_cc="cc" -audio_win_int="" libs_qga="" debug_info="yes" lto="false" @@ -256,31 +252,11 @@ gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb") if test -e "$source_path/.git" then git_submodules_action="update" - git_submodules="ui/keycodemapdb" - git_submodules="$git_submodules tests/fp/berkeley-testfloat-3" - git_submodules="$git_submodules tests/fp/berkeley-softfloat-3" else git_submodules_action="ignore" - git_submodules="" - - if ! test -f "$source_path/ui/keycodemapdb/README" - then - echo - echo "ERROR: missing file $source_path/ui/keycodemapdb/README" - echo - echo "This is not a GIT checkout but module content appears to" - echo "be missing. Do not use 'git archive' or GitHub download links" - echo "to acquire QEMU source archives. Non-GIT builds are only" - echo "supported with source archives linked from:" - echo - echo " https://www.qemu.org/download/#source" - echo - echo "Developers working with GIT can use scripts/archive-source.sh" - echo "if they need to create valid source archives." - echo - exit 1 - fi fi + +git_submodules="ui/keycodemapdb" git="git" # Don't accept a target_list environment variable. @@ -310,48 +286,21 @@ for opt do esac done -brlapi="auto" -curl="auto" -iconv="auto" -curses="auto" -docs="auto" -fdt="auto" -netmap="no" -sdl="auto" -sdl_image="auto" -coreaudio="auto" -virtiofsd="auto" -virtfs="auto" -libudev="auto" -mpath="auto" -vnc="enabled" -sparse="auto" -vde="$default_feature" -vnc_sasl="auto" -vnc_jpeg="auto" -vnc_png="auto" -xkbcommon="auto" -xen="$default_feature" +EXTRA_CFLAGS="" +EXTRA_CXXFLAGS="" +EXTRA_LDFLAGS="" + xen_ctrl_version="$default_feature" -xen_pci_passthrough="auto" -linux_aio="$default_feature" -linux_io_uring="$default_feature" -cap_ng="auto" -attr="auto" xfs="$default_feature" -tcg="enabled" membarrier="$default_feature" +vhost_kernel="$default_feature" vhost_net="$default_feature" vhost_crypto="$default_feature" vhost_scsi="$default_feature" vhost_vsock="$default_feature" vhost_user="no" -vhost_user_blk_server="auto" vhost_user_fs="$default_feature" -kvm="auto" -hax="auto" -hvf="auto" -whpx="auto" +vhost_vdpa="$default_feature" rdma="$default_feature" pvrdma="$default_feature" gprof="no" @@ -363,82 +312,43 @@ sanitizers="no" tsan="no" fortify_source="$default_feature" strip_opt="yes" -tcg_interpreter="false" -bigendian="no" mingw32="no" gcov="no" EXESUF="" -HOST_DSOSUF=".so" modules="no" module_upgrades="no" prefix="/usr/local" qemu_suffix="qemu" -slirp="auto" -oss_lib="" bsd="no" linux="no" solaris="no" profiler="no" -cocoa="auto" softmmu="yes" linux_user="no" bsd_user="no" -blobs="true" pkgversion="" pie="" qom_cast_debug="yes" trace_backends="log" trace_file="trace" -spice="$default_feature" -rbd="auto" -smartcard="$default_feature" -u2f="auto" -libusb="$default_feature" -usb_redir="$default_feature" opengl="$default_feature" cpuid_h="no" avx2_opt="$default_feature" -capstone="auto" -lzo="auto" -snappy="auto" -bzip2="auto" -lzfse="auto" -zstd="auto" guest_agent="$default_feature" guest_agent_with_vss="no" guest_agent_ntddscsi="no" -guest_agent_msi="auto" vss_win32_sdk="$default_feature" win_sdk="no" want_tools="$default_feature" -libiscsi="auto" -libnfs="auto" coroutine="" coroutine_pool="$default_feature" debug_stack_usage="no" crypto_afalg="no" -cfi="false" -cfi_debug="false" -seccomp="auto" -glusterfs="auto" -gtk="auto" tls_priority="NORMAL" -gnutls="$default_feature" -nettle="$default_feature" -nettle_xts="no" -gcrypt="$default_feature" -gcrypt_hmac="no" -gcrypt_xts="no" -qemu_private_xts="yes" -auth_pam="$default_feature" -vte="$default_feature" -virglrenderer="$default_feature" tpm="$default_feature" libssh="$default_feature" live_block_migration=${default_feature:-yes} numa="$default_feature" -tcmalloc="no" -jemalloc="no" replication=${default_feature:-yes} bochs=${default_feature:-yes} cloop=${default_feature:-yes} @@ -448,26 +358,34 @@ vdi=${default_feature:-yes} vvfat=${default_feature:-yes} qed=${default_feature:-yes} parallels=${default_feature:-yes} -sheepdog="no" -libxml2="$default_feature" debug_mutex="no" -libpmem="$default_feature" -default_devices="true" -plugins="no" -fuzzing="no" +plugins="$default_feature" rng_none="no" secret_keyring="$default_feature" -libdaxctl="$default_feature" meson="" +meson_args="" ninja="" +gio="$default_feature" skip_meson=no -gettext="auto" -fuse="auto" -fuse_lseek="auto" -multiprocess="auto" +slirp_smbd="$default_feature" -malloc_trim="auto" -gio="$default_feature" +# The following Meson options are handled manually (still they +# are included in the automatically generated help message) + +# 1. Track which submodules are needed +capstone="auto" +fdt="auto" +slirp="auto" + +# 2. Support --with/--without option +default_devices="true" + +# 3. Automatically enable/disable other options +tcg="enabled" +cfi="false" + +# 4. Detection partly done in configure +xen=${default_feature:+disabled} # parse CC options second for opt do @@ -482,13 +400,13 @@ for opt do ;; --cpu=*) cpu="$optarg" ;; - --extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg" - QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg" - ;; - --extra-cxxflags=*) QEMU_CXXFLAGS="$QEMU_CXXFLAGS $optarg" + --extra-cflags=*) + EXTRA_CFLAGS="$EXTRA_CFLAGS $optarg" + EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg" + ;; + --extra-cxxflags=*) EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg" ;; - --extra-ldflags=*) QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg" - EXTRA_LDFLAGS="$optarg" + --extra-ldflags=*) EXTRA_LDFLAGS="$EXTRA_LDFLAGS $optarg" ;; --enable-debug-info) debug_info="yes" ;; @@ -544,21 +462,20 @@ query_pkg_config() { pkg_config=query_pkg_config sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}" -# If the user hasn't specified ARFLAGS, default to 'rv', just as make does. -ARFLAGS="${ARFLAGS-rv}" - # default flags for all hosts # We use -fwrapv to tell the compiler that we require a C dialect where # left shift of signed integers is well defined and has the expected # 2s-complement style results. (Both clang and gcc agree that it # provides these semantics.) -QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS" +QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv" QEMU_CFLAGS="-Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS" QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS" QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS" +QEMU_LDFLAGS= + # Flags that are needed during configure but later taken care of by Meson -CONFIGURE_CFLAGS="-std=gnu99 -Wall" +CONFIGURE_CFLAGS="-std=gnu11 -Wall" CONFIGURE_LDFLAGS= @@ -586,15 +503,6 @@ int main(void) { return 0; } EOF } -write_c_fuzzer_skeleton() { - cat > $TMPC < -#include -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; } -EOF -} - if check_define __linux__ ; then targetos="Linux" elif check_define _WIN32 ; then @@ -627,9 +535,6 @@ fi # cross-compiling to one of these OSes then you'll need to specify # the correct CPU with the --cpu option. case $targetos in -Darwin) - HOST_DSOSUF=".dylib" - ;; SunOS) # $(uname -m) returns i86pc even on an x86_64 box, so default based on isainfo if test -z "$cpu" && test "$(isainfo -k)" = "amd64"; then @@ -673,11 +578,7 @@ elif check_define __s390__ ; then cpu="s390" fi elif check_define __riscv ; then - if check_define _LP64 ; then - cpu="riscv64" - else - cpu="riscv32" - fi + cpu="riscv" elif check_define __arm__ ; then cpu="arm" elif check_define __aarch64__ ; then @@ -690,7 +591,7 @@ ARCH= # Normalise host CPU name and set ARCH. # Note that this case should only have supported host CPUs, not guests. case "$cpu" in - ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64) + ppc|ppc64|s390x|sparc64|x32|riscv) ;; ppc64le) ARCH="ppc64" @@ -727,52 +628,34 @@ fi case $targetos in MINGW32*) mingw32="yes" - audio_possible_drivers="dsound sdl" - if check_include dsound.h; then - audio_drv_list="dsound" - else - audio_drv_list="" - fi supported_os="yes" + plugins="no" pie="no" ;; GNU/kFreeBSD) bsd="yes" - audio_drv_list="oss try-sdl" - audio_possible_drivers="oss sdl pa" ;; FreeBSD) bsd="yes" + bsd_user="yes" make="${MAKE-gmake}" - audio_drv_list="oss try-sdl" - audio_possible_drivers="oss sdl pa" # needed for kinfo_getvmmap(3) in libutil.h - netmap="" # enable netmap autodetect ;; DragonFly) bsd="yes" make="${MAKE-gmake}" - audio_drv_list="oss try-sdl" - audio_possible_drivers="oss sdl pa" ;; NetBSD) bsd="yes" make="${MAKE-gmake}" - audio_drv_list="oss try-sdl" - audio_possible_drivers="oss sdl" - oss_lib="-lossaudio" ;; OpenBSD) bsd="yes" make="${MAKE-gmake}" - audio_drv_list="try-sdl" - audio_possible_drivers="sdl" ;; Darwin) bsd="yes" darwin="yes" - audio_drv_list="try-coreaudio try-sdl" - audio_possible_drivers="coreaudio sdl" # Disable attempts to use ObjectiveC features in os/object.h since they # won't work when we're compiling with gcc as a C compiler. QEMU_CFLAGS="-DOS_OBJECT_USE_OBJC=0 $QEMU_CFLAGS" @@ -781,10 +664,6 @@ SunOS) solaris="yes" make="${MAKE-gmake}" smbd="${SMBD-/usr/sfw/sbin/smbd}" - if test -f /usr/include/sys/soundcard.h ; then - audio_drv_list="oss try-sdl" - fi - audio_possible_drivers="oss sdl" # needed for CMSG_ macros in sys/socket.h QEMU_CFLAGS="-D_XOPEN_SOURCE=600 $QEMU_CFLAGS" # needed for TIOCWIN* defines in termios.h @@ -792,23 +671,16 @@ SunOS) ;; Haiku) haiku="yes" - QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS -D_BSD_SOURCE $QEMU_CFLAGS" + pie="no" + QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS -D_BSD_SOURCE -fPIC $QEMU_CFLAGS" ;; Linux) - audio_drv_list="try-pa oss" - audio_possible_drivers="oss alsa sdl pa" linux="yes" linux_user="yes" vhost_user=${default_feature:-yes} ;; esac -if [ "$bsd" = "yes" ] ; then - if [ "$darwin" != "yes" ] ; then - bsd_user="yes" - fi -fi - : ${make=${MAKE-make}} # We prefer python 3.x. A bare 'python' is traditionally @@ -837,8 +709,6 @@ do fi done -: ${smbd=${SMBD-/usr/sbin/smbd}} - # Default objcc to clang if available, otherwise use CC if has clang; then objcc=clang @@ -848,7 +718,6 @@ fi if test "$mingw32" = "yes" ; then EXESUF=".exe" - HOST_DSOSUF=".dll" # MinGW needs -mthreads for TLS and macro _MT. CONFIGURE_CFLAGS="-mthreads $CONFIGURE_CFLAGS" write_c_skeleton; @@ -859,6 +728,18 @@ fi werror="" +. $source_path/scripts/meson-buildoptions.sh + +meson_options= +meson_option_parse() { + meson_options="$meson_options $(_meson_option_parse "$@")" + if test $? -eq 1; then + echo "ERROR: unknown option $1" + echo "Try '$0 --help' for more information" + exit 1 + fi +} + for opt do optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)') case "$opt" in @@ -932,17 +813,24 @@ for opt do error_exit "Can't mix --target-list-exclude with --target-list" fi ;; - --enable-trace-backends=*) trace_backends="$optarg" - ;; - # XXX: backwards compatibility - --enable-trace-backend=*) trace_backends="$optarg" - ;; --with-trace-file=*) trace_file="$optarg" ;; --with-default-devices) default_devices="true" ;; --without-default-devices) default_devices="false" ;; + --with-devices-*[!a-zA-Z0-9_-]*=*) error_exit "Passed bad --with-devices-FOO option" + ;; + --with-devices-*) device_arch=${opt#--with-devices-}; + device_arch=${device_arch%%=*} + cf=$source_path/configs/devices/$device_arch-softmmu/$optarg.mak + if test -f "$cf"; then + device_archs="$device_archs $device_arch" + eval "devices_${device_arch}=\$optarg" + else + error_exit "File $cf does not exist" + fi + ;; --without-default-features) # processed above ;; --enable-gprof) gprof="yes" @@ -987,50 +875,20 @@ for opt do # configure to be used by RPM and similar macros that set # lots of directory switches by default. ;; - --disable-sdl) sdl="disabled" - ;; - --enable-sdl) sdl="enabled" - ;; - --disable-sdl-image) sdl_image="disabled" - ;; - --enable-sdl-image) sdl_image="enabled" - ;; --disable-qom-cast-debug) qom_cast_debug="no" ;; --enable-qom-cast-debug) qom_cast_debug="yes" ;; - --disable-virtfs) virtfs="disabled" - ;; - --enable-virtfs) virtfs="enabled" - ;; - --disable-libudev) libudev="disabled" - ;; - --enable-libudev) libudev="enabled" - ;; - --disable-virtiofsd) virtiofsd="disabled" - ;; - --enable-virtiofsd) virtiofsd="enabled" - ;; - --disable-mpath) mpath="disabled" - ;; - --enable-mpath) mpath="enabled" - ;; - --disable-vnc) vnc="disabled" - ;; - --enable-vnc) vnc="enabled" - ;; - --disable-gettext) gettext="disabled" - ;; - --enable-gettext) gettext="enabled" - ;; - --oss-lib=*) oss_lib="$optarg" - ;; --audio-drv-list=*) audio_drv_list="$optarg" ;; --block-drv-rw-whitelist=*|--block-drv-whitelist=*) block_drv_rw_whitelist=$(echo "$optarg" | sed -e 's/,/ /g') ;; --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g') ;; + --enable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="yes" + ;; + --disable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="no" + ;; --enable-debug-tcg) debug_tcg="yes" ;; --disable-debug-tcg) debug_tcg="no" @@ -1061,102 +919,27 @@ for opt do ;; --disable-tsan) tsan="no" ;; - --enable-sparse) sparse="enabled" - ;; - --disable-sparse) sparse="disabled" - ;; --disable-strip) strip_opt="no" ;; - --disable-vnc-sasl) vnc_sasl="disabled" - ;; - --enable-vnc-sasl) vnc_sasl="enabled" - ;; - --disable-vnc-jpeg) vnc_jpeg="disabled" - ;; - --enable-vnc-jpeg) vnc_jpeg="enabled" - ;; - --disable-vnc-png) vnc_png="disabled" - ;; - --enable-vnc-png) vnc_png="enabled" - ;; --disable-slirp) slirp="disabled" ;; --enable-slirp) slirp="enabled" ;; --enable-slirp=git) slirp="internal" ;; - --enable-slirp=system) slirp="system" - ;; - --disable-vde) vde="no" - ;; - --enable-vde) vde="yes" - ;; - --disable-netmap) netmap="no" - ;; - --enable-netmap) netmap="yes" + --enable-slirp=*) slirp="$optarg" ;; --disable-xen) xen="disabled" ;; --enable-xen) xen="enabled" ;; - --disable-xen-pci-passthrough) xen_pci_passthrough="disabled" - ;; - --enable-xen-pci-passthrough) xen_pci_passthrough="enabled" - ;; - --disable-brlapi) brlapi="disabled" - ;; - --enable-brlapi) brlapi="enabled" - ;; - --disable-kvm) kvm="disabled" - ;; - --enable-kvm) kvm="enabled" - ;; - --disable-hax) hax="disabled" - ;; - --enable-hax) hax="enabled" - ;; - --disable-hvf) hvf="disabled" - ;; - --enable-hvf) hvf="enabled" - ;; - --disable-whpx) whpx="disabled" - ;; - --enable-whpx) whpx="enabled" - ;; - --disable-tcg-interpreter) tcg_interpreter="false" - ;; - --enable-tcg-interpreter) tcg_interpreter="true" - ;; - --disable-cap-ng) cap_ng="disabled" - ;; - --enable-cap-ng) cap_ng="enabled" - ;; --disable-tcg) tcg="disabled" + plugins="no" ;; --enable-tcg) tcg="enabled" ;; - --disable-malloc-trim) malloc_trim="disabled" - ;; - --enable-malloc-trim) malloc_trim="enabled" - ;; - --disable-spice) spice="no" - ;; - --enable-spice) spice="yes" - ;; - --disable-libiscsi) libiscsi="disabled" - ;; - --enable-libiscsi) libiscsi="enabled" - ;; - --disable-libnfs) libnfs="disabled" - ;; - --enable-libnfs) libnfs="enabled" - ;; --enable-profiler) profiler="yes" ;; - --disable-cocoa) cocoa="disabled" - ;; - --enable-cocoa) cocoa="enabled" - ;; --disable-system) softmmu="no" ;; --enable-system) softmmu="yes" @@ -1200,48 +983,18 @@ for opt do ;; --disable-cfi) cfi="false" ;; - --enable-cfi-debug) cfi_debug="true" - ;; - --disable-cfi-debug) cfi_debug="false" - ;; - --disable-curses) curses="disabled" - ;; - --enable-curses) curses="enabled" - ;; - --disable-iconv) iconv="disabled" - ;; - --enable-iconv) iconv="enabled" - ;; - --disable-curl) curl="disabled" - ;; - --enable-curl) curl="enabled" - ;; --disable-fdt) fdt="disabled" ;; --enable-fdt) fdt="enabled" ;; --enable-fdt=git) fdt="internal" ;; - --enable-fdt=system) fdt="system" - ;; - --disable-linux-aio) linux_aio="no" - ;; - --enable-linux-aio) linux_aio="yes" - ;; - --disable-linux-io-uring) linux_io_uring="no" - ;; - --enable-linux-io-uring) linux_io_uring="yes" - ;; - --disable-attr) attr="disabled" - ;; - --enable-attr) attr="enabled" + --enable-fdt=*) fdt="$optarg" ;; --disable-membarrier) membarrier="no" ;; --enable-membarrier) membarrier="yes" ;; - --disable-blobs) blobs="false" - ;; --with-pkgversion=*) pkgversion="$optarg" ;; --with-coroutine=*) coroutine="$optarg" @@ -1256,10 +1009,6 @@ for opt do ;; --disable-crypto-afalg) crypto_afalg="no" ;; - --disable-docs) docs="disabled" - ;; - --enable-docs) docs="enabled" - ;; --disable-vhost-net) vhost_net="no" ;; --enable-vhost-net) vhost_net="yes" @@ -1276,10 +1025,6 @@ for opt do ;; --enable-vhost-vsock) vhost_vsock="yes" ;; - --disable-vhost-user-blk-server) vhost_user_blk_server="disabled" - ;; - --enable-vhost-user-blk-server) vhost_user_blk_server="enabled" - ;; --disable-vhost-user-fs) vhost_user_fs="no" ;; --enable-vhost-user-fs) vhost_user_fs="yes" @@ -1288,60 +1033,16 @@ for opt do ;; --enable-opengl) opengl="yes" ;; - --disable-rbd) rbd="disabled" - ;; - --enable-rbd) rbd="enabled" - ;; --disable-xfsctl) xfs="no" ;; --enable-xfsctl) xfs="yes" ;; - --disable-smartcard) smartcard="no" - ;; - --enable-smartcard) smartcard="yes" - ;; - --disable-u2f) u2f="disabled" - ;; - --enable-u2f) u2f="enabled" - ;; - --disable-libusb) libusb="no" - ;; - --enable-libusb) libusb="yes" - ;; - --disable-usb-redir) usb_redir="no" - ;; - --enable-usb-redir) usb_redir="yes" - ;; --disable-zlib-test) ;; - --disable-lzo) lzo="disabled" - ;; - --enable-lzo) lzo="enabled" - ;; - --disable-snappy) snappy="disabled" - ;; - --enable-snappy) snappy="enabled" - ;; - --disable-bzip2) bzip2="disabled" - ;; - --enable-bzip2) bzip2="enabled" - ;; - --enable-lzfse) lzfse="enabled" - ;; - --disable-lzfse) lzfse="disabled" - ;; - --disable-zstd) zstd="disabled" - ;; - --enable-zstd) zstd="enabled" - ;; --enable-guest-agent) guest_agent="yes" ;; --disable-guest-agent) guest_agent="no" ;; - --enable-guest-agent-msi) guest_agent_msi="enabled" - ;; - --disable-guest-agent-msi) guest_agent_msi="disabled" - ;; --with-vss-sdk) vss_win32_sdk="" ;; --with-vss-sdk=*) vss_win32_sdk="$optarg" @@ -1358,12 +1059,6 @@ for opt do ;; --disable-tools) want_tools="no" ;; - --enable-seccomp) seccomp="enabled" - ;; - --disable-seccomp) seccomp="disabled" - ;; - --disable-glusterfs) glusterfs="disabled" - ;; --disable-avx2) avx2_opt="no" ;; --enable-avx2) avx2_opt="yes" @@ -1372,9 +1067,6 @@ for opt do ;; --enable-avx512f) avx512f_opt="yes" ;; - - --enable-glusterfs) glusterfs="enabled" - ;; --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane) echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2 ;; @@ -1384,28 +1076,8 @@ for opt do --enable-uuid|--disable-uuid) echo "$0: $opt is obsolete, UUID support is always built" >&2 ;; - --disable-gtk) gtk="disabled" - ;; - --enable-gtk) gtk="enabled" - ;; --tls-priority=*) tls_priority="$optarg" ;; - --disable-gnutls) gnutls="no" - ;; - --enable-gnutls) gnutls="yes" - ;; - --disable-nettle) nettle="no" - ;; - --enable-nettle) nettle="yes" - ;; - --disable-gcrypt) gcrypt="no" - ;; - --enable-gcrypt) gcrypt="yes" - ;; - --disable-auth-pam) auth_pam="no" - ;; - --enable-auth-pam) auth_pam="yes" - ;; --enable-rdma) rdma="yes" ;; --disable-rdma) rdma="no" @@ -1414,14 +1086,6 @@ for opt do ;; --disable-pvrdma) pvrdma="no" ;; - --disable-vte) vte="no" - ;; - --enable-vte) vte="yes" - ;; - --disable-virglrenderer) virglrenderer="no" - ;; - --enable-virglrenderer) virglrenderer="yes" - ;; --disable-tpm) tpm="no" ;; --enable-tpm) tpm="yes" @@ -1438,18 +1102,6 @@ for opt do ;; --enable-numa) numa="yes" ;; - --disable-libxml2) libxml2="no" - ;; - --enable-libxml2) libxml2="yes" - ;; - --disable-tcmalloc) tcmalloc="no" - ;; - --enable-tcmalloc) tcmalloc="yes" - ;; - --disable-jemalloc) jemalloc="no" - ;; - --enable-jemalloc) jemalloc="yes" - ;; --disable-replication) replication="no" ;; --enable-replication) replication="yes" @@ -1486,10 +1138,6 @@ for opt do ;; --enable-parallels) parallels="yes" ;; - --disable-sheepdog) sheepdog="no" - ;; - --enable-sheepdog) sheepdog="yes" - ;; --disable-vhost-user) vhost_user="no" ;; --enable-vhost-user) vhost_user="yes" @@ -1508,18 +1156,10 @@ for opt do ;; --enable-capstone=git) capstone="internal" ;; - --enable-capstone=system) capstone="system" + --enable-capstone=*) capstone="$optarg" ;; --with-git=*) git="$optarg" ;; - --enable-git-update) - git_submodules_action="update" - echo "--enable-git-update deprecated, use --with-git-submodules=update" - ;; - --disable-git-update) - git_submodules_action="validate" - echo "--disable-git-update deprecated, use --with-git-submodules=validate" - ;; --with-git-submodules=*) git_submodules_action="$optarg" ;; @@ -1527,15 +1167,11 @@ for opt do ;; --disable-debug-mutex) debug_mutex=no ;; - --enable-libpmem) libpmem=yes - ;; - --disable-libpmem) libpmem=no - ;; - --enable-xkbcommon) xkbcommon="enabled" - ;; - --disable-xkbcommon) xkbcommon="disabled" - ;; - --enable-plugins) plugins="yes" + --enable-plugins) if test "$mingw32" = "yes"; then + error_exit "TCG plugins not currently supported on Windows platforms" + else + plugins="yes" + fi ;; --disable-plugins) plugins="no" ;; @@ -1543,10 +1179,6 @@ for opt do ;; --disable-containers) use_containers="no" ;; - --enable-fuzzing) fuzzing=yes - ;; - --disable-fuzzing) fuzzing=no - ;; --gdb=*) gdb_bin="$optarg" ;; --enable-rng-none) rng_none=yes @@ -1557,25 +1189,25 @@ for opt do ;; --disable-keyring) secret_keyring="no" ;; - --enable-libdaxctl) libdaxctl=yes - ;; - --disable-libdaxctl) libdaxctl=no + --enable-gio) gio=yes ;; - --enable-fuse) fuse="enabled" + --disable-gio) gio=no ;; - --disable-fuse) fuse="disabled" + --enable-slirp-smbd) slirp_smbd=yes ;; - --enable-fuse-lseek) fuse_lseek="enabled" + --disable-slirp-smbd) slirp_smbd=no ;; - --disable-fuse-lseek) fuse_lseek="disabled" + # backwards compatibility options + --enable-trace-backend=*) meson_option_parse "--enable-trace-backends=$optarg" "$optarg" ;; - --enable-multiprocess) multiprocess="enabled" + --disable-blobs) meson_option_parse --disable-install-blobs "" ;; - --disable-multiprocess) multiprocess="disabled" + --enable-tcmalloc) meson_option_parse --enable-malloc=tcmalloc tcmalloc ;; - --enable-gio) gio=yes + --enable-jemalloc) meson_option_parse --enable-malloc=jemalloc jemalloc ;; - --disable-gio) gio=no + # everything else has the same name in configure and meson + --enable-* | --disable-*) meson_option_parse "$opt" "$optarg" ;; *) echo "ERROR: unknown option $opt" @@ -1585,6 +1217,11 @@ for opt do esac done +# test for any invalid configuration combinations +if test "$plugins" = "yes" -a "$tcg" = "disabled"; then + error_exit "Can't enable plugins on non-TCG builds" +fi + case $git_submodules_action in update|validate) if test ! -e "$source_path/.git"; then @@ -1593,6 +1230,28 @@ case $git_submodules_action in fi ;; ignore) + if ! test -f "$source_path/ui/keycodemapdb/README" + then + echo + echo "ERROR: missing GIT submodules" + echo + if test -e "$source_path/.git"; then + echo "--with-git-submodules=ignore specified but submodules were not" + echo "checked out. Please initialize and update submodules." + else + echo "This is not a GIT checkout but module content appears to" + echo "be missing. Do not use 'git archive' or GitHub download links" + echo "to acquire QEMU source archives. Non-GIT builds are only" + echo "supported with source archives linked from:" + echo + echo " https://www.qemu.org/download/#source" + echo + echo "Developers working with GIT can use scripts/archive-source.sh" + echo "if they need to create valid source archives." + fi + echo + exit 1 + fi ;; *) echo "ERROR: invalid --with-git-submodules= value '$git_submodules_action'" @@ -1618,51 +1277,27 @@ firmwarepath="${firmwarepath:-$datadir/qemu-firmware}" localedir="${localedir:-$datadir/locale}" case "$cpu" in - ppc) - CPU_CFLAGS="-m32" - QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS" - ;; - ppc64) - CPU_CFLAGS="-m64" - QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" - ;; - sparc) - CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" - QEMU_LDFLAGS="-m32 -mv8plus $QEMU_LDFLAGS" - ;; - sparc64) - CPU_CFLAGS="-m64 -mcpu=ultrasparc" - QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" - ;; - s390) - CPU_CFLAGS="-m31" - QEMU_LDFLAGS="-m31 $QEMU_LDFLAGS" - ;; - s390x) - CPU_CFLAGS="-m64" - QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" - ;; - i386) - CPU_CFLAGS="-m32" - QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS" - ;; - x86_64) - # ??? Only extremely old AMD cpus do not have cmpxchg16b. - # If we truly care, we should simply detect this case at - # runtime and generate the fallback to serial emulation. - CPU_CFLAGS="-m64 -mcx16" - QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" - ;; - x32) - CPU_CFLAGS="-mx32" - QEMU_LDFLAGS="-mx32 $QEMU_LDFLAGS" - ;; + ppc) CPU_CFLAGS="-m32" ;; + ppc64) CPU_CFLAGS="-m64" ;; + sparc) CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" ;; + sparc64) CPU_CFLAGS="-m64 -mcpu=ultrasparc" ;; + s390) CPU_CFLAGS="-m31" ;; + s390x) CPU_CFLAGS="-m64" ;; + i386) CPU_CFLAGS="-m32" ;; + x32) CPU_CFLAGS="-mx32" ;; + + # ??? Only extremely old AMD cpus do not have cmpxchg16b. + # If we truly care, we should simply detect this case at + # runtime and generate the fallback to serial emulation. + x86_64) CPU_CFLAGS="-m64 -mcx16" ;; + # No special flags required for other host CPUs esac -eval "cross_cc_${cpu}=\$host_cc" -cross_cc_vars="$cross_cc_vars cross_cc_${cpu}" -QEMU_CFLAGS="$CPU_CFLAGS $QEMU_CFLAGS" +if eval test -z "\${cross_cc_$cpu}"; then + eval "cross_cc_${cpu}=\$cc" + cross_cc_vars="$cross_cc_vars cross_cc_${cpu}" +fi # For user-mode emulation the host arch has to be one we explicitly # support, even if we're using TCI. @@ -1672,18 +1307,18 @@ if [ "$ARCH" = "unknown" ]; then fi default_target_list="" -deprecated_targets_list=ppc64abi32-linux-user,lm32-softmmu,unicore32-softmmu +deprecated_targets_list=ppc64abi32-linux-user deprecated_features="" mak_wilds="" if [ "$softmmu" = "yes" ]; then - mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-softmmu.mak" + mak_wilds="${mak_wilds} $source_path/configs/targets/*-softmmu.mak" fi if [ "$linux_user" = "yes" ]; then - mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-linux-user.mak" + mak_wilds="${mak_wilds} $source_path/configs/targets/*-linux-user.mak" fi if [ "$bsd_user" = "yes" ]; then - mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-bsd-user.mak" + mak_wilds="${mak_wilds} $source_path/configs/targets/*-bsd-user.mak" fi # If the user doesn't explicitly specify a deprecated target we will @@ -1703,9 +1338,6 @@ for config in $mak_wilds; do fi done -# Enumerate public trace backends for --help output -trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' "$source_path"/scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/')) - if test x"$show_help" = x"yes" ; then cat << EOF @@ -1732,8 +1364,8 @@ Advanced options (experts only): build time --cxx=CXX use C++ compiler CXX [$cxx] --objcc=OBJCC use Objective-C compiler OBJCC [$objcc] - --extra-cflags=CFLAGS append extra C compiler flags QEMU_CFLAGS - --extra-cxxflags=CXXFLAGS append extra C++ compiler flags QEMU_CXXFLAGS + --extra-cflags=CFLAGS append extra C compiler flags CFLAGS + --extra-cxxflags=CXXFLAGS append extra C++ compiler flags CXXFLAGS --extra-ldflags=LDFLAGS append extra linker flags LDFLAGS --cross-cc-ARCH=CC use compiler when building ARCH guest test cases --cross-cc-flags-ARCH= use compiler flags when building ARCH guest tests @@ -1764,35 +1396,30 @@ Advanced options (experts only): --without-default-features default all --enable-* options to "disabled" --without-default-devices do not include any device that is not needed to start the emulator (only use if you are including - desired devices in default-configs/devices/) + desired devices in configs/devices/) + --with-devices-ARCH=NAME override default configs/devices --enable-debug enable common debug build options --enable-sanitizers enable default sanitizers --enable-tsan enable thread sanitizer --disable-strip disable stripping binaries --disable-werror disable compilation abort on warning --disable-stack-protector disable compiler-provided stack protection - --audio-drv-list=LIST set audio drivers list: - Available drivers: $audio_possible_drivers + --audio-drv-list=LIST set audio drivers to try if -audiodev is not used --block-drv-whitelist=L Same as --block-drv-rw-whitelist=L --block-drv-rw-whitelist=L set block driver read-write whitelist - (affects only QEMU, not qemu-img) + (by default affects only QEMU, not tools like qemu-img) --block-drv-ro-whitelist=L set block driver read-only whitelist - (affects only QEMU, not qemu-img) - --enable-trace-backends=B Set trace backend - Available backends: $trace_backend_list + (by default affects only QEMU, not tools like qemu-img) + --enable-block-drv-whitelist-in-tools + use block whitelist also in tools instead of only QEMU --with-trace-file=NAME Full PATH,NAME of file to store traces Default:trace- - --disable-slirp disable SLIRP userspace network connectivity - --enable-tcg-interpreter enable TCI (TCG with bytecode interpreter, experimental and slow) - --enable-malloc-trim enable libc malloc_trim() for memory optimization - --oss-lib path to OSS library --cpu=CPU Build for host CPU [$cpu] --with-coroutine=BACKEND coroutine backend. Supported options: ucontext, sigaltstack, windows --enable-gcov enable test coverage analysis with gcov - --disable-blobs disable installing provided firmware blobs --with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent --with-win-sdk=SDK-path path to Windows Platform SDK (to build VSS .tlb) --tls-priority default TLS protocol/cipher priority string @@ -1804,110 +1431,41 @@ Advanced options (experts only): enable plugins via shared library loading --disable-containers don't use containers for cross-building --gdb=GDB-path gdb to use for gdbstub tests [$gdb_bin] - -Optional features, enabled with --enable-FEATURE and -disabled with --disable-FEATURE, default is enabled if available -(unless built with --without-default-features): - +EOF + meson_options_help +cat << EOF system all system emulation targets user supported user emulation targets linux-user all linux usermode emulation targets bsd-user all BSD usermode emulation targets - docs build documentation guest-agent build the QEMU Guest Agent - guest-agent-msi build guest agent Windows MSI installation package pie Position Independent Executables modules modules support (non-Windows) module-upgrades try to load modules from alternate paths for upgrades debug-tcg TCG debugging (default is disabled) debug-info debugging information lto Enable Link-Time Optimization. - sparse sparse checker safe-stack SafeStack Stack Smash Protection. Depends on clang/llvm >= 3.7 and requires coroutine backend ucontext. - cfi Enable Control-Flow Integrity for indirect function calls. - In case of a cfi violation, QEMU is terminated with SIGILL - Depends on lto and is incompatible with modules - Automatically enables Link-Time Optimization (lto) - cfi-debug In case of a cfi violation, a message containing the line that - triggered the error is written to stderr. After the error, - QEMU is still terminated with SIGILL - gnutls GNUTLS cryptography support - nettle nettle cryptography support - gcrypt libgcrypt cryptography support - auth-pam PAM access control - sdl SDL UI - sdl-image SDL Image support for icons - gtk gtk UI - vte vte support for the gtk UI - curses curses UI - iconv font glyph conversion support - vnc VNC UI support - vnc-sasl SASL encryption for VNC server - vnc-jpeg JPEG lossy compression for VNC server - vnc-png PNG compression for VNC server - cocoa Cocoa UI (Mac OS X only) - virtfs VirtFS - virtiofsd build virtiofs daemon (virtiofsd) - libudev Use libudev to enumerate host devices - mpath Multipath persistent reservation passthrough - xen xen backend driver support - xen-pci-passthrough PCI passthrough support for Xen - brlapi BrlAPI (Braile) - curl curl connectivity membarrier membarrier system call (for Linux 4.14+ or Windows) - fdt fdt device tree - kvm KVM acceleration support - hax HAX acceleration support - hvf Hypervisor.framework acceleration support - whpx Windows Hypervisor Platform acceleration support rdma Enable RDMA-based migration pvrdma Enable PVRDMA support - vde support for vde network - netmap support for netmap network - linux-aio Linux AIO support - linux-io-uring Linux io_uring support - cap-ng libcap-ng support - attr attr and xattr support vhost-net vhost-net kernel acceleration support vhost-vsock virtio sockets device support vhost-scsi vhost-scsi kernel target support vhost-crypto vhost-user-crypto backend support vhost-kernel vhost kernel backend support vhost-user vhost-user backend support - vhost-user-blk-server vhost-user-blk server support vhost-vdpa vhost-vdpa kernel backend support - spice spice - rbd rados block device (rbd) - libiscsi iscsi support - libnfs nfs support - smartcard smartcard support (libcacard) - u2f U2F support (u2f-emu) - libusb libusb (for usb passthrough) live-block-migration Block migration in the main migration stream - usb-redir usb network redirection support - lzo support of lzo compression library - snappy support of snappy compression library - bzip2 support of bzip2 compression library - (for reading bzip2-compressed dmg images) - lzfse support of lzfse compression library - (for reading lzfse-compressed dmg images) - zstd support for zstd compression library - (for migration compression and qcow2 cluster compression) - seccomp seccomp support coroutine-pool coroutine freelist (better performance) - glusterfs GlusterFS backend tpm TPM support libssh ssh block device support numa libnuma support - libxml2 for Parallels image format - tcmalloc tcmalloc support - jemalloc jemalloc support avx2 AVX2 optimization support avx512f AVX512F optimization support replication replication support opengl opengl support - virglrenderer virgl rendering support xfsctl xfsctl support qom-cast-debug cast debugging support tools build qemu-io, qemu-nbd and qemu-img tools @@ -1919,18 +1477,11 @@ disabled with --disable-FEATURE, default is enabled if available vvfat vvfat image format support qed qed image format support parallels parallels image format support - sheepdog sheepdog block driver support (deprecated) crypto-afalg Linux AF_ALG crypto backend driver - capstone capstone disassembler support debug-mutex mutex debugging support - libpmem libpmem support - xkbcommon xkbcommon support rng-none dummy RNG, avoid using /dev/(u)random and getrandom() - libdaxctl libdaxctl support - fuse FUSE block device export - fuse-lseek SEEK_HOLE/SEEK_DATA support for FUSE exports - multiprocess Out of process device emulation support gio libgio support + slirp-smbd use smbd (at path --smbd=*) in slirp networking NOTE: The object files are built at the place where configure is launched EOF @@ -1963,7 +1514,7 @@ python_version=$($python -c 'import sys; print("%d.%d.%d" % (sys.version_info[0] python="$python -B" if test -z "$meson"; then - if test "$explicit_python" = no && has meson && version_ge "$(meson --version)" 0.55.3; then + if test "$explicit_python" = no && has meson && version_ge "$(meson --version)" 0.59.3; then meson=meson elif test $git_submodules_action != 'ignore' ; then meson=git @@ -2053,17 +1604,17 @@ fi cat > $TMPC << EOF #if defined(__clang_major__) && defined(__clang_minor__) # ifdef __apple_build_version__ -# if __clang_major__ < 5 || (__clang_major__ == 5 && __clang_minor__ < 1) -# error You need at least XCode Clang v5.1 to compile QEMU +# if __clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 0) +# error You need at least XCode Clang v10.0 to compile QEMU # endif # else -# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 4) -# error You need at least Clang v3.4 to compile QEMU +# if __clang_major__ < 6 || (__clang_major__ == 6 && __clang_minor__ < 0) +# error You need at least Clang v6.0 to compile QEMU # endif # endif #elif defined(__GNUC__) && defined(__GNUC_MINOR__) -# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) -# error You need at least GCC v4.8 to compile QEMU +# if __GNUC__ < 7 || (__GNUC__ == 7 && __GNUC_MINOR__ < 4) +# error You need at least GCC v7.4.0 to compile QEMU # endif #else # error You either need GCC or Clang to compiler QEMU @@ -2071,7 +1622,7 @@ cat > $TMPC << EOF int main (void) { return 0; } EOF if ! compile_prog "" "" ; then - error_exit "You need at least GCC v4.8 or Clang v3.4 (or XCode Clang v5.1)" + error_exit "You need at least GCC v7.4 or Clang v6.0 (or XCode Clang v10.0)" fi # Accumulate -Wfoo and -Wno-bar separately. @@ -2175,22 +1726,16 @@ if test "$modules" = "no" && test "$module_upgrades" = "yes" ; then error_exit "Can't enable module-upgrades as Modules are not enabled" fi -# Static linking is not possible with modules or PIE +# Static linking is not possible with plugins, modules or PIE if test "$static" = "yes" ; then if test "$modules" = "yes" ; then error_exit "static and modules are mutually incompatible" fi -fi - -# Unconditional check for compiler __thread support - cat > $TMPC << EOF -static __thread int tls_var; -int main(void) { return tls_var; } -EOF - -if ! compile_prog "-Werror" "" ; then - error_exit "Your compiler does not support the __thread specifier for " \ - "Thread-Local Storage (TLS). Please upgrade to a version that does." + if test "$plugins" = "yes"; then + error_exit "static and plugins are mutually incompatible" + else + plugins="no" + fi fi cat > $TMPC << EOF @@ -2267,19 +1812,9 @@ EOF fi fi -######################################### -# Solaris specific configure tool chain decisions - -if test "$solaris" = "yes" ; then - if has ar; then - : - else - if test -f /usr/ccs/bin/ar ; then - error_exit "No path includes ar" \ - "Add /usr/ccs/bin to your path and rerun configure" - fi - error_exit "No path includes ar" - fi +if test "$tcg" = "enabled"; then + git_submodules="$git_submodules tests/fp/berkeley-testfloat-3" + git_submodules="$git_submodules tests/fp/berkeley-softfloat-3" fi if test -z "${target_list+xxx}" ; then @@ -2346,24 +1881,27 @@ feature_not_found() { # --- # big/little endian test cat > $TMPC << EOF +#include short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, }; short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, }; -extern int foo(short *, short *); -int main(int argc, char *argv[]) { - return foo(big_endian, little_endian); +int main(int argc, char *argv[]) +{ + return printf("%s %s\n", (char *)big_endian, (char *)little_endian); } EOF -if compile_object ; then - if strings -a $TMPO | grep -q BiGeNdIaN ; then +if compile_prog ; then + if strings -a $TMPE | grep -q BiGeNdIaN ; then bigendian="yes" - elif strings -a $TMPO | grep -q LiTtLeEnDiAn ; then + elif strings -a $TMPE | grep -q LiTtLeEnDiAn ; then bigendian="no" else - echo "big/little test failed (won't work with -flto=thin)" + echo big/little test failed + exit 1 fi else - echo "big/little test failed (failed to compile test binary)" + echo big/little test failed + exit 1 fi ########################################## @@ -2376,66 +1914,20 @@ if test -z "$want_tools"; then fi fi -########################################## -# Disable features only meaningful for system-mode emulation -if test "$softmmu" = "no"; then - audio_drv_list="" -fi +######################################### +# vhost interdependencies and host support -########################################## -# Some versions of Mac OS X incorrectly define SIZE_MAX -cat > $TMPC << EOF -#include -#include -int main(int argc, char *argv[]) { - return printf("%zu", SIZE_MAX); -} -EOF -have_broken_size_max=no -if ! compile_object -Werror ; then - have_broken_size_max=yes -fi - -########################################## -# L2TPV3 probe - -cat > $TMPC < -#include -int main(void) { return sizeof(struct mmsghdr); } -EOF -if compile_prog "" "" ; then - l2tpv3=yes -else - l2tpv3=no -fi - -cat > $TMPC < -int main(int argc, char *argv[]) { - return mlockall(MCL_FUTURE); -} -EOF -if compile_prog "" "" ; then - have_mlockall=yes -else - have_mlockall=no -fi - -######################################### -# vhost interdependencies and host support - -# vhost backends -if test "$vhost_user" = "yes" && test "$linux" != "yes"; then - error_exit "vhost-user is only available on Linux" -fi -test "$vhost_vdpa" = "" && vhost_vdpa=$linux -if test "$vhost_vdpa" = "yes" && test "$linux" != "yes"; then - error_exit "vhost-vdpa is only available on Linux" -fi -test "$vhost_kernel" = "" && vhost_kernel=$linux -if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then - error_exit "vhost-kernel is only available on Linux" +# vhost backends +if test "$vhost_user" = "yes" && test "$linux" != "yes"; then + error_exit "vhost-user is only available on Linux" +fi +test "$vhost_vdpa" = "" && vhost_vdpa=$linux +if test "$vhost_vdpa" = "yes" && test "$linux" != "yes"; then + error_exit "vhost-vdpa is only available on Linux" +fi +test "$vhost_kernel" = "" && vhost_kernel=$linux +if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then + error_exit "vhost-kernel is only available on Linux" fi # vhost-kernel devices @@ -2481,25 +1973,6 @@ if ! has "$pkg_config_exe"; then error_exit "pkg-config binary '$pkg_config_exe' not found" fi -########################################## -# NPTL probe - -if test "$linux_user" = "yes"; then - cat > $TMPC < -#include -int main(void) { -#if !defined(CLONE_SETTLS) || !defined(FUTEX_WAIT) -#error bork -#endif - return 0; -} -EOF - if ! compile_object ; then - feature_not_found "nptl" "Install glibc and linux kernel headers." - fi -fi - ########################################## # xen probe @@ -2806,234 +2279,6 @@ EOF fi fi -########################################## -# GNUTLS probe - -if test "$gnutls" != "no"; then - pass="no" - if $pkg_config --exists "gnutls >= 3.1.18"; then - gnutls_cflags=$($pkg_config --cflags gnutls) - gnutls_libs=$($pkg_config --libs gnutls) - # Packaging for the static libraries is not always correct. - # At least ubuntu 18.04 ships only shared libraries. - write_c_skeleton - if compile_prog "" "$gnutls_libs" ; then - pass="yes" - fi - fi - if test "$pass" = "no" && test "$gnutls" = "yes"; then - feature_not_found "gnutls" "Install gnutls devel >= 3.1.18" - else - gnutls="$pass" - fi -fi - - -# If user didn't give a --disable/enable-gcrypt flag, -# then mark as disabled if user requested nettle -# explicitly -if test -z "$gcrypt" -then - if test "$nettle" = "yes" - then - gcrypt="no" - fi -fi - -# If user didn't give a --disable/enable-nettle flag, -# then mark as disabled if user requested gcrypt -# explicitly -if test -z "$nettle" -then - if test "$gcrypt" = "yes" - then - nettle="no" - fi -fi - -has_libgcrypt() { - if ! has "libgcrypt-config" - then - return 1 - fi - - if test -n "$cross_prefix" - then - host=$(libgcrypt-config --host) - if test "$host-" != $cross_prefix - then - return 1 - fi - fi - - maj=`libgcrypt-config --version | awk -F . '{print $1}'` - min=`libgcrypt-config --version | awk -F . '{print $2}'` - - if test $maj != 1 || test $min -lt 5 - then - return 1 - fi - - return 0 -} - - -if test "$nettle" != "no"; then - pass="no" - if $pkg_config --exists "nettle >= 2.7.1"; then - nettle_cflags=$($pkg_config --cflags nettle) - nettle_libs=$($pkg_config --libs nettle) - nettle_version=$($pkg_config --modversion nettle) - # Link test to make sure the given libraries work (e.g for static). - write_c_skeleton - if compile_prog "" "$nettle_libs" ; then - if test -z "$gcrypt"; then - gcrypt="no" - fi - pass="yes" - fi - fi - if test "$pass" = "yes" - then - cat > $TMPC << EOF -#include -int main(void) { - return 0; -} -EOF - if compile_prog "$nettle_cflags" "$nettle_libs" ; then - nettle_xts=yes - qemu_private_xts=no - fi - fi - if test "$pass" = "no" && test "$nettle" = "yes"; then - feature_not_found "nettle" "Install nettle devel >= 2.7.1" - else - nettle="$pass" - fi -fi - -if test "$gcrypt" != "no"; then - pass="no" - if has_libgcrypt; then - gcrypt_cflags=$(libgcrypt-config --cflags) - gcrypt_libs=$(libgcrypt-config --libs) - # Debian has removed -lgpg-error from libgcrypt-config - # as it "spreads unnecessary dependencies" which in - # turn breaks static builds... - if test "$static" = "yes" - then - gcrypt_libs="$gcrypt_libs -lgpg-error" - fi - - # Link test to make sure the given libraries work (e.g for static). - write_c_skeleton - if compile_prog "" "$gcrypt_libs" ; then - pass="yes" - fi - fi - if test "$pass" = "yes"; then - gcrypt="yes" - cat > $TMPC << EOF -#include -int main(void) { - gcry_mac_hd_t handle; - gcry_mac_open(&handle, GCRY_MAC_HMAC_MD5, - GCRY_MAC_FLAG_SECURE, NULL); - return 0; -} -EOF - if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then - gcrypt_hmac=yes - fi - cat > $TMPC << EOF -#include -int main(void) { - gcry_cipher_hd_t handle; - gcry_cipher_open(&handle, GCRY_CIPHER_AES, GCRY_CIPHER_MODE_XTS, 0); - return 0; -} -EOF - if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then - gcrypt_xts=yes - qemu_private_xts=no - fi - elif test "$gcrypt" = "yes"; then - feature_not_found "gcrypt" "Install gcrypt devel >= 1.5.0" - else - gcrypt="no" - fi -fi - - -if test "$gcrypt" = "yes" && test "$nettle" = "yes" -then - error_exit "Only one of gcrypt & nettle can be enabled" -fi - -########################################## -# libtasn1 - only for the TLS creds/session test suite - -tasn1=yes -tasn1_cflags="" -tasn1_libs="" -if $pkg_config --exists "libtasn1"; then - tasn1_cflags=$($pkg_config --cflags libtasn1) - tasn1_libs=$($pkg_config --libs libtasn1) -else - tasn1=no -fi - - -########################################## -# PAM probe - -if test "$auth_pam" != "no"; then - cat > $TMPC < -#include -int main(void) { - const char *service_name = "qemu"; - const char *user = "frank"; - const struct pam_conv pam_conv = { 0 }; - pam_handle_t *pamh = NULL; - pam_start(service_name, user, &pam_conv, &pamh); - return 0; -} -EOF - if compile_prog "" "-lpam" ; then - auth_pam=yes - else - if test "$auth_pam" = "yes"; then - feature_not_found "PAM" "Install PAM development package" - else - auth_pam=no - fi - fi -fi - -########################################## -# VTE probe - -if test "$vte" != "no"; then - vteminversion="0.32.0" - if $pkg_config --exists "vte-2.91"; then - vtepackage="vte-2.91" - else - vtepackage="vte-2.90" - fi - if $pkg_config --exists "$vtepackage >= $vteminversion"; then - vte_cflags=$($pkg_config --cflags $vtepackage) - vte_libs=$($pkg_config --libs $vtepackage) - vteversion=$($pkg_config --modversion $vtepackage) - vte="yes" - elif test "$vte" = "yes"; then - feature_not_found "vte" "Install libvte-2.90/2.91 devel" - else - vte="no" - fi -fi - ########################################## # RDMA needs OpenFabrics libraries if test "$rdma" != "no" ; then @@ -3151,192 +2396,77 @@ EOF fi ########################################## -# vde libraries probe -if test "$vde" != "no" ; then - vde_libs="-lvdeplug" - cat > $TMPC << EOF -#include -int main(void) +# plugin linker support probe + +if test "$plugins" != "no"; then + + ######################################### + # See if --dynamic-list is supported by the linker + + ld_dynamic_list="no" + cat > $TMPTXT <=11). The upper bound (15) is meant to simulate -# a minor/major version number. Minor new features will be marked with values up -# to 15, and if something happens that requires a change to the backend we will -# move above 15, submit the backend fixes and modify this two bounds. -if test "$netmap" != "no" ; then - cat > $TMPC << EOF -#include -#include -#include -#include -#if (NETMAP_API < 11) || (NETMAP_API > 15) -#error -#endif -int main(void) { return 0; } -EOF - if compile_prog "" "" ; then - netmap=yes - else - if test "$netmap" = "yes" ; then - feature_not_found "netmap" - fi - netmap=no - fi -fi + cat > $TMPC < +void foo(void); + +void foo(void) +{ + printf("foo\n"); +} -########################################## -# detect CoreAudio -if test "$coreaudio" != "no" ; then - coreaudio_libs="-framework CoreAudio" - cat > $TMPC << EOF -#include int main(void) { - return (int)AudioGetCurrentHostTime(); + foo(); + return 0; } EOF - if compile_prog "" "$coreaudio_libs" ; then - coreaudio=yes - else - coreaudio=no - fi -fi - -########################################## -# Sound support libraries probe - -audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/,/ /g') -for drv in $audio_drv_list; do - case $drv in - alsa | try-alsa) - if $pkg_config alsa --exists; then - alsa_libs=$($pkg_config alsa --libs) - alsa_cflags=$($pkg_config alsa --cflags) - alsa=yes - if test "$drv" = "try-alsa"; then - audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa/alsa/') - fi - else - if test "$drv" = "try-alsa"; then - audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa//') - else - error_exit "$drv check failed" \ - "Make sure to have the $drv libs and headers installed." - fi - fi - ;; - pa | try-pa) - if $pkg_config libpulse --exists; then - libpulse=yes - pulse_libs=$($pkg_config libpulse --libs) - pulse_cflags=$($pkg_config libpulse --cflags) - if test "$drv" = "try-pa"; then - audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa/pa/') - fi - else - if test "$drv" = "try-pa"; then - audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa//') - else - error_exit "$drv check failed" \ - "Make sure to have the $drv libs and headers installed." - fi + if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then + ld_dynamic_list="yes" fi - ;; - sdl) - if test "$sdl" = "no"; then - error_exit "sdl not found or disabled, can not use sdl audio driver" - fi - ;; + ######################################### + # See if -exported_symbols_list is supported by the linker - try-sdl) - if test "$sdl" = "no"; then - audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl//') - else - audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl/sdl/') - fi - ;; + ld_exported_symbols_list="no" + cat > $TMPTXT <" || { - error_exit "Unknown driver '$drv' selected" \ - "Possible drivers are: $audio_possible_drivers" - } - ;; - esac -done +fi ########################################## # glib support probe -glib_req_ver=2.48 +glib_req_ver=2.56 glib_modules=gthread-2.0 if test "$modules" = yes; then glib_modules="$glib_modules gmodule-export-2.0" -fi -if test "$plugins" = yes; then - glib_modules="$glib_modules gmodule-2.0" +elif test "$plugins" = "yes"; then + glib_modules="$glib_modules gmodule-no-export-2.0" fi for i in $glib_modules; do @@ -3361,7 +2491,7 @@ if ! test "$gio" = "no"; then gio_cflags=$($pkg_config --cflags gio-2.0) gio_libs=$($pkg_config --libs gio-2.0) gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0) - if [ ! -x "$gdbus_codegen" ]; then + if ! has "$gdbus_codegen"; then gdbus_codegen= fi # Check that the libraries actually work -- Ubuntu 18.04 ships @@ -3423,18 +2553,6 @@ if ! compile_prog "$glib_cflags" "$glib_libs" ; then "build target" fi -# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage -cat > $TMPC << EOF -#include -int main(void) { return 0; } -EOF -if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then - if cc_has_warning_flag "-Wno-unknown-attributes"; then - glib_cflags="-Wno-unknown-attributes $glib_cflags" - CONFIGURE_CFLAGS="-Wno-unknown-attributes $CONFIGURE_CFLAGS" - fi -fi - # Silence clang warnings triggered by glib < 2.57.2 cat > $TMPC << EOF #include @@ -3471,137 +2589,20 @@ if test "$modules" = yes; then fi ########################################## -# pthread probe -PTHREADLIBS_LIST="-pthread -lpthread -lpthreadGC2" - -pthread=no -cat > $TMPC << EOF -#include -static void *f(void *p) { return NULL; } -int main(void) { - pthread_t thread; - pthread_create(&thread, 0, f, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - pthread=yes -else - for pthread_lib in $PTHREADLIBS_LIST; do - if compile_prog "" "$pthread_lib" ; then - pthread=yes - break - fi - done -fi - -if test "$mingw32" != yes && test "$pthread" = no; then - error_exit "pthread check failed" \ - "Make sure to have the pthread libs and headers installed." -fi - -# check for pthread_setname_np with thread id -pthread_setname_np_w_tid=no -cat > $TMPC << EOF -#include - -static void *f(void *p) { return NULL; } -int main(void) -{ - pthread_t thread; - pthread_create(&thread, 0, f, 0); - pthread_setname_np(thread, "QEMU"); - return 0; -} -EOF -if compile_prog "" "$pthread_lib" ; then - pthread_setname_np_w_tid=yes -fi - -# check for pthread_setname_np without thread id -pthread_setname_np_wo_tid=no -cat > $TMPC << EOF -#include - -static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; } -int main(void) -{ - pthread_t thread; - pthread_create(&thread, 0, f, 0); - return 0; -} -EOF -if compile_prog "" "$pthread_lib" ; then - pthread_setname_np_wo_tid=yes -fi - -########################################## -# libssh probe -if test "$libssh" != "no" ; then - if $pkg_config --exists libssh; then - libssh_cflags=$($pkg_config libssh --cflags) - libssh_libs=$($pkg_config libssh --libs) - libssh=yes - else - if test "$libssh" = "yes" ; then - error_exit "libssh required for --enable-libssh" +# libssh probe +if test "$libssh" != "no" ; then + if $pkg_config --exists "libssh >= 0.8.7"; then + libssh_cflags=$($pkg_config libssh --cflags) + libssh_libs=$($pkg_config libssh --libs) + libssh=yes + else + if test "$libssh" = "yes" ; then + error_exit "libssh required for --enable-libssh" fi libssh=no fi fi -########################################## -# Check for libssh 0.8 -# This is done like this instead of using the LIBSSH_VERSION_* and -# SSH_VERSION_* macros because some distributions in the past shipped -# snapshots of the future 0.8 from Git, and those snapshots did not -# have updated version numbers (still referring to 0.7.0). - -if test "$libssh" = "yes"; then - cat > $TMPC < -int main(void) { return ssh_get_server_publickey(NULL, NULL); } -EOF - if compile_prog "$libssh_cflags" "$libssh_libs"; then - libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags" - fi -fi - -########################################## -# linux-aio probe - -if test "$linux_aio" != "no" ; then - cat > $TMPC < -#include -#include -int main(void) { io_setup(0, NULL); io_set_eventfd(NULL, 0); eventfd(0, 0); return 0; } -EOF - if compile_prog "" "-laio" ; then - linux_aio=yes - else - if test "$linux_aio" = "yes" ; then - feature_not_found "linux AIO" "Install libaio devel" - fi - linux_aio=no - fi -fi -########################################## -# linux-io-uring probe - -if test "$linux_io_uring" != "no" ; then - if $pkg_config liburing; then - linux_io_uring_cflags=$($pkg_config --cflags liburing) - linux_io_uring_libs=$($pkg_config --libs liburing) - linux_io_uring=yes - else - if test "$linux_io_uring" = "yes" ; then - feature_not_found "linux io_uring" "Install liburing devel" - fi - linux_io_uring=no - fi -fi - ########################################## # TPM emulation is only on POSIX @@ -3617,40 +2618,18 @@ elif test "$tpm" = "yes"; then fi fi -########################################## -# iovec probe -cat > $TMPC < -#include -#include -int main(void) { return sizeof(struct iovec); } -EOF -iovec=no -if compile_prog "" "" ; then - iovec=yes -fi - ########################################## # fdt probe case "$fdt" in auto | enabled | internal) # Simpler to always update submodule, even if not needed. - if test "$git_submodules_action" != "ignore"; then - git_submodules="${git_submodules} dtc" - fi + git_submodules="${git_submodules} dtc" ;; esac ########################################## -# opengl probe (for sdl2, gtk, milkymist-tmu2) - -gbm="no" -if $pkg_config gbm; then - gbm_cflags="$($pkg_config --cflags gbm)" - gbm_libs="$($pkg_config --libs gbm)" - gbm="yes" -fi +# opengl probe (for sdl2, gtk) if test "$opengl" != "no" ; then epoxy=no @@ -3679,566 +2658,47 @@ EOF fi ########################################## -# libxml2 probe -if test "$libxml2" != "no" ; then - if $pkg_config --exists libxml-2.0; then - libxml2="yes" - libxml2_cflags=$($pkg_config --cflags libxml-2.0) - libxml2_libs=$($pkg_config --libs libxml-2.0) - else - if test "$libxml2" = "yes"; then - feature_not_found "libxml2" "Install libxml2 devel" - fi - libxml2="no" - fi -fi - -# Check for inotify functions when we are building linux-user -# emulator. This is done because older glibc versions don't -# have syscall stubs for these implemented. In that case we -# don't provide them even if kernel supports them. -# -inotify=no -cat > $TMPC << EOF -#include - -int -main(void) -{ - /* try to start inotify */ - return inotify_init(); -} -EOF -if compile_prog "" "" ; then - inotify=yes -fi - -inotify1=no -cat > $TMPC << EOF -#include - -int -main(void) -{ - /* try to start inotify */ - return inotify_init1(0); -} -EOF -if compile_prog "" "" ; then - inotify1=yes -fi - -# check if pipe2 is there -pipe2=no -cat > $TMPC << EOF -#include -#include - -int main(void) -{ - int pipefd[2]; - return pipe2(pipefd, O_CLOEXEC); -} -EOF -if compile_prog "" "" ; then - pipe2=yes -fi - -# check if accept4 is there -accept4=no -cat > $TMPC << EOF -#include -#include - -int main(void) -{ - accept4(0, NULL, NULL, SOCK_CLOEXEC); - return 0; -} -EOF -if compile_prog "" "" ; then - accept4=yes -fi - -# check if tee/splice is there. vmsplice was added same time. -splice=no -cat > $TMPC << EOF -#include -#include -#include - -int main(void) -{ - int len, fd = 0; - len = tee(STDIN_FILENO, STDOUT_FILENO, INT_MAX, SPLICE_F_NONBLOCK); - splice(STDIN_FILENO, NULL, fd, NULL, len, SPLICE_F_MOVE); - return 0; -} -EOF -if compile_prog "" "" ; then - splice=yes -fi - -########################################## -# libnuma probe - -if test "$numa" != "no" ; then - cat > $TMPC << EOF -#include -int main(void) { return numa_available(); } -EOF - - if compile_prog "" "-lnuma" ; then - numa=yes - numa_libs="-lnuma" - else - if test "$numa" = "yes" ; then - feature_not_found "numa" "install numactl devel" - fi - numa=no - fi -fi - -malloc=system -if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then - echo "ERROR: tcmalloc && jemalloc can't be used at the same time" - exit 1 -elif test "$tcmalloc" = "yes" ; then - malloc=tcmalloc -elif test "$jemalloc" = "yes" ; then - malloc=jemalloc -fi - -########################################## -# signalfd probe -signalfd="no" -cat > $TMPC << EOF -#include -#include -#include -int main(void) { return syscall(SYS_signalfd, -1, NULL, _NSIG / 8); } -EOF - -if compile_prog "" "" ; then - signalfd=yes -fi - -# check if optreset global is declared by -optreset="no" -cat > $TMPC << EOF -#include -int main(void) { return optreset; } -EOF - -if compile_prog "" "" ; then - optreset=yes -fi - -# check if eventfd is supported -eventfd=no -cat > $TMPC << EOF -#include - -int main(void) -{ - return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); -} -EOF -if compile_prog "" "" ; then - eventfd=yes -fi - -# check if memfd is supported -memfd=no -cat > $TMPC << EOF -#include - -int main(void) -{ - return memfd_create("foo", MFD_ALLOW_SEALING); -} -EOF -if compile_prog "" "" ; then - memfd=yes -fi - -# check for usbfs -have_usbfs=no -if test "$linux_user" = "yes"; then - cat > $TMPC << EOF -#include - -#ifndef USBDEVFS_GET_CAPABILITIES -#error "USBDEVFS_GET_CAPABILITIES undefined" -#endif - -#ifndef USBDEVFS_DISCONNECT_CLAIM -#error "USBDEVFS_DISCONNECT_CLAIM undefined" -#endif - -int main(void) -{ - return 0; -} -EOF - if compile_prog "" ""; then - have_usbfs=yes - fi -fi - -# check for fallocate -fallocate=no -cat > $TMPC << EOF -#include - -int main(void) -{ - fallocate(0, 0, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - fallocate=yes -fi - -# check for fallocate hole punching -fallocate_punch_hole=no -cat > $TMPC << EOF -#include -#include - -int main(void) -{ - fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - fallocate_punch_hole=yes -fi - -# check that fallocate supports range zeroing inside the file -fallocate_zero_range=no -cat > $TMPC << EOF -#include -#include - -int main(void) -{ - fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - fallocate_zero_range=yes -fi - -# check for posix_fallocate -posix_fallocate=no -cat > $TMPC << EOF -#include - -int main(void) -{ - posix_fallocate(0, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - posix_fallocate=yes -fi - -# check for sync_file_range -sync_file_range=no -cat > $TMPC << EOF -#include - -int main(void) -{ - sync_file_range(0, 0, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - sync_file_range=yes -fi - -# check for linux/fiemap.h and FS_IOC_FIEMAP -fiemap=no -cat > $TMPC << EOF -#include -#include -#include - -int main(void) -{ - ioctl(0, FS_IOC_FIEMAP, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - fiemap=yes -fi - -# check for dup3 -dup3=no -cat > $TMPC << EOF -#include - -int main(void) -{ - dup3(0, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - dup3=yes -fi - -# check for ppoll support -ppoll=no -cat > $TMPC << EOF -#include - -int main(void) -{ - struct pollfd pfd = { .fd = 0, .events = 0, .revents = 0 }; - ppoll(&pfd, 1, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - ppoll=yes -fi - -# check for prctl(PR_SET_TIMERSLACK , ... ) support -prctl_pr_set_timerslack=no -cat > $TMPC << EOF -#include - -int main(void) -{ - prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - prctl_pr_set_timerslack=yes -fi - -# check for epoll support -epoll=no -cat > $TMPC << EOF -#include - -int main(void) -{ - epoll_create(0); - return 0; -} -EOF -if compile_prog "" "" ; then - epoll=yes -fi - -# epoll_create1 is a later addition -# so we must check separately for its presence -epoll_create1=no -cat > $TMPC << EOF -#include - -int main(void) -{ - /* Note that we use epoll_create1 as a value, not as - * a function being called. This is necessary so that on - * old SPARC glibc versions where the function was present in - * the library but not declared in the header file we will - * fail the configure check. (Otherwise we will get a compiler - * warning but not an error, and will proceed to fail the - * qemu compile where we compile with -Werror.) - */ - return (int)(uintptr_t)&epoll_create1; -} -EOF -if compile_prog "" "" ; then - epoll_create1=yes -fi - -# check for sendfile support -sendfile=no -cat > $TMPC << EOF -#include - -int main(void) -{ - return sendfile(0, 0, 0, 0); -} -EOF -if compile_prog "" "" ; then - sendfile=yes -fi - -# check for timerfd support (glibc 2.8 and newer) -timerfd=no -cat > $TMPC << EOF -#include - -int main(void) -{ - return(timerfd_create(CLOCK_REALTIME, 0)); -} -EOF -if compile_prog "" "" ; then - timerfd=yes -fi - -# check for setns and unshare support -setns=no -cat > $TMPC << EOF -#include - -int main(void) -{ - int ret; - ret = setns(0, 0); - ret = unshare(0); - return ret; -} -EOF -if compile_prog "" "" ; then - setns=yes -fi - -# clock_adjtime probe -clock_adjtime=no -cat > $TMPC < -#include - -int main(void) -{ - return clock_adjtime(0, 0); -} -EOF -clock_adjtime=no -if compile_prog "" "" ; then - clock_adjtime=yes -fi - -# syncfs probe -syncfs=no -cat > $TMPC < - -int main(void) -{ - return syncfs(0); -} -EOF -syncfs=no -if compile_prog "" "" ; then - syncfs=yes -fi - -# Search for bswap_32 function -byteswap_h=no -cat > $TMPC << EOF -#include -int main(void) { return bswap_32(0); } -EOF -if compile_prog "" "" ; then - byteswap_h=yes -fi - -# Search for bswap32 function -bswap_h=no -cat > $TMPC << EOF -#include -#include -#include -int main(void) { return bswap32(0); } -EOF -if compile_prog "" "" ; then - bswap_h=yes -fi - -# Check whether we have openpty() in either libc or libutil -cat > $TMPC << EOF -extern int openpty(int *am, int *as, char *name, void *termp, void *winp); -int main(void) { return openpty(0, 0, 0, 0, 0); } -EOF - -have_openpty="no" -if compile_prog "" "" ; then - have_openpty="yes" -else - if compile_prog "" "-lutil" ; then - have_openpty="yes" - fi -fi - -########################################## -# spice probe -if test "$spice" != "no" ; then +# libnuma probe + +if test "$numa" != "no" ; then cat > $TMPC << EOF -#include -int main(void) { spice_server_new(); return 0; } +#include +int main(void) { return numa_available(); } EOF - spice_cflags=$($pkg_config --cflags spice-protocol spice-server 2>/dev/null) - spice_libs=$($pkg_config --libs spice-protocol spice-server 2>/dev/null) - if $pkg_config --atleast-version=0.12.5 spice-server && \ - $pkg_config --atleast-version=0.12.3 spice-protocol && \ - compile_prog "$spice_cflags" "$spice_libs" ; then - spice="yes" + + if compile_prog "" "-lnuma" ; then + numa=yes + numa_libs="-lnuma" else - if test "$spice" = "yes" ; then - feature_not_found "spice" \ - "Install spice-server(>=0.12.5) and spice-protocol(>=0.12.3) devel" + if test "$numa" = "yes" ; then + feature_not_found "numa" "install numactl devel" fi - spice="no" + numa=no fi fi -# check for smartcard support -if test "$smartcard" != "no"; then - if $pkg_config --atleast-version=2.5.1 libcacard; then - libcacard_cflags=$($pkg_config --cflags libcacard) - libcacard_libs=$($pkg_config --libs libcacard) - smartcard="yes" - else - if test "$smartcard" = "yes"; then - feature_not_found "smartcard" "Install libcacard devel" - fi - smartcard="no" - fi -fi +# check for usbfs +have_usbfs=no +if test "$linux_user" = "yes"; then + cat > $TMPC << EOF +#include -# check for libusb -if test "$libusb" != "no" ; then - if $pkg_config --atleast-version=1.0.13 libusb-1.0; then - libusb="yes" - libusb_cflags=$($pkg_config --cflags libusb-1.0) - libusb_libs=$($pkg_config --libs libusb-1.0) - else - if test "$libusb" = "yes"; then - feature_not_found "libusb" "Install libusb devel >= 1.0.13" - fi - libusb="no" - fi -fi +#ifndef USBDEVFS_GET_CAPABILITIES +#error "USBDEVFS_GET_CAPABILITIES undefined" +#endif -# check for usbredirparser for usb network redirection support -if test "$usb_redir" != "no" ; then - if $pkg_config --atleast-version=0.6 libusbredirparser-0.5; then - usb_redir="yes" - usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5) - usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5) - else - if test "$usb_redir" = "yes"; then - feature_not_found "usb-redir" "Install usbredir devel" - fi - usb_redir="no" - fi +#ifndef USBDEVFS_DISCONNECT_CLAIM +#error "USBDEVFS_DISCONNECT_CLAIM undefined" +#endif + +int main(void) +{ + return 0; +} +EOF + if compile_prog "" ""; then + have_usbfs=yes + fi fi ########################################## @@ -4319,203 +2779,16 @@ EOF fi fi -########################################## -# virgl renderer probe - -if test "$virglrenderer" != "no" ; then - cat > $TMPC << EOF -#include -int main(void) { virgl_renderer_poll(); return 0; } -EOF - virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null) - virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null) - virgl_version=$($pkg_config --modversion virglrenderer 2>/dev/null) - if $pkg_config virglrenderer >/dev/null 2>&1 && \ - compile_prog "$virgl_cflags" "$virgl_libs" ; then - virglrenderer="yes" - else - if test "$virglrenderer" = "yes" ; then - feature_not_found "virglrenderer" - fi - virglrenderer="no" - fi -fi - ########################################## # capstone case "$capstone" in auto | enabled | internal) # Simpler to always update submodule, even if not needed. - if test "$git_submodules_action" != "ignore"; then - git_submodules="${git_submodules} capstone" - fi + git_submodules="${git_submodules} capstone" ;; esac -########################################## -# check if we have fdatasync - -fdatasync=no -cat > $TMPC << EOF -#include -int main(void) { -#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0 -return fdatasync(0); -#else -#error Not supported -#endif -} -EOF -if compile_prog "" "" ; then - fdatasync=yes -fi - -########################################## -# check if we have madvise - -madvise=no -cat > $TMPC << EOF -#include -#include -#include -int main(void) { return madvise(NULL, 0, MADV_DONTNEED); } -EOF -if compile_prog "" "" ; then - madvise=yes -fi - -########################################## -# check if we have posix_madvise - -posix_madvise=no -cat > $TMPC << EOF -#include -#include -int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); } -EOF -if compile_prog "" "" ; then - posix_madvise=yes -fi - -########################################## -# check if we have posix_memalign() - -posix_memalign=no -cat > $TMPC << EOF -#include -int main(void) { - void *p; - return posix_memalign(&p, 8, 8); -} -EOF -if compile_prog "" "" ; then - posix_memalign=yes -fi - -########################################## -# check if we have posix_syslog - -posix_syslog=no -cat > $TMPC << EOF -#include -int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; } -EOF -if compile_prog "" "" ; then - posix_syslog=yes -fi - -########################################## -# check if we have sem_timedwait - -sem_timedwait=no -cat > $TMPC << EOF -#include -int main(void) { sem_t s; struct timespec t = {0}; return sem_timedwait(&s, &t); } -EOF -if compile_prog "" "" ; then - sem_timedwait=yes -fi - -########################################## -# check if we have strchrnul - -strchrnul=no -cat > $TMPC << EOF -#include -int main(void); -// Use a haystack that the compiler shouldn't be able to constant fold -char *haystack = (char*)&main; -int main(void) { return strchrnul(haystack, 'x') != &haystack[6]; } -EOF -if compile_prog "" "" ; then - strchrnul=yes -fi - -######################################### -# check if we have st_atim - -st_atim=no -cat > $TMPC << EOF -#include -#include -int main(void) { return offsetof(struct stat, st_atim); } -EOF -if compile_prog "" "" ; then - st_atim=yes -fi - -########################################## -# check if trace backend exists - -$python "$source_path/scripts/tracetool.py" "--backends=$trace_backends" --check-backends > /dev/null 2> /dev/null -if test "$?" -ne 0 ; then - error_exit "invalid trace backends" \ - "Please choose supported trace backends." -fi - -########################################## -# For 'ust' backend, test if ust headers are present -if have_backend "ust"; then - cat > $TMPC << EOF -#include -int main(void) { return 0; } -EOF - if compile_prog "" "-Wl,--no-as-needed -ldl" ; then - if $pkg_config lttng-ust --exists; then - lttng_ust_libs=$($pkg_config --libs lttng-ust) - else - lttng_ust_libs="-llttng-ust -ldl" - fi - if $pkg_config liburcu-bp --exists; then - urcu_bp_libs=$($pkg_config --libs liburcu-bp) - else - urcu_bp_libs="-lurcu-bp" - fi - else - error_exit "Trace backend 'ust' missing lttng-ust header files" - fi -fi - -########################################## -# For 'dtrace' backend, test if 'dtrace' command is present -if have_backend "dtrace"; then - if ! has 'dtrace' ; then - error_exit "dtrace command is not found in PATH $PATH" - fi - trace_backend_stap="no" - if has 'stap' ; then - trace_backend_stap="yes" - - # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol - # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility - # instead. QEMU --enable-modules depends on this because the SystemTap - # semaphores are linked into the main binary and not the module's shared - # object. - QEMU_CFLAGS="$QEMU_CFLAGS -DSTAP_SDT_V2" - fi -fi - ########################################## # check and set a backend for coroutine @@ -4644,65 +2917,6 @@ else # "$safe_stack" = "" fi fi -########################################## -# check if we have open_by_handle_at - -open_by_handle_at=no -cat > $TMPC << EOF -#include -#if !defined(AT_EMPTY_PATH) -# error missing definition -#else -int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); } -#endif -EOF -if compile_prog "" "" ; then - open_by_handle_at=yes -fi - -######################################## -# check if we have linux/magic.h - -linux_magic_h=no -cat > $TMPC << EOF -#include -int main(void) { - return 0; -} -EOF -if compile_prog "" "" ; then - linux_magic_h=yes -fi - -######################################## -# check if we have valgrind/valgrind.h - -valgrind_h=no -cat > $TMPC << EOF -#include -int main(void) { - return 0; -} -EOF -if compile_prog "" "" ; then - valgrind_h=yes -fi - -######################################## -# check if environ is declared - -has_environ=no -cat > $TMPC << EOF -#include -int main(void) { - environ = 0; - return 0; -} -EOF -if compile_prog "" "" ; then - has_environ=yes -fi - ######################################## # check if cpuid.h is usable. @@ -4747,7 +2961,7 @@ static int bar(void *a) { int main(int argc, char *argv[]) { return bar(argv[0]); } #pragma clang attribute pop EOF - if compile_object "" ; then + if compile_object "-Werror" ; then avx2_opt="yes" else avx2_opt="no" @@ -4777,7 +2991,7 @@ int main(int argc, char *argv[]) return bar(argv[0]); } EOF - if ! compile_object "" ; then + if ! compile_object "-Werror" ; then avx512f_opt="no" fi else @@ -4821,118 +3035,20 @@ EOF atomic128=yes fi fi - -cmpxchg128=no -if test "$int128" = yes && test "$atomic128" = no; then - cat > $TMPC << EOF -int main(void) -{ - unsigned __int128 x = 0, y = 0; - __sync_val_compare_and_swap_16(&x, y, x); - return 0; -} -EOF - if compile_prog "" "" ; then - cmpxchg128=yes - fi -fi - -######################################### -# See if 64-bit atomic operations are supported. -# Note that without __atomic builtins, we can only -# assume atomic loads/stores max at pointer size. - -cat > $TMPC << EOF -#include -int main(void) -{ - uint64_t x = 0, y = 0; -#ifdef __ATOMIC_RELAXED - y = __atomic_load_n(&x, __ATOMIC_RELAXED); - __atomic_store_n(&x, y, __ATOMIC_RELAXED); - __atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); - __atomic_exchange_n(&x, y, __ATOMIC_RELAXED); - __atomic_fetch_add(&x, y, __ATOMIC_RELAXED); -#else - typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1]; - __sync_lock_test_and_set(&x, y); - __sync_val_compare_and_swap(&x, y, 0); - __sync_fetch_and_add(&x, y); -#endif - return 0; -} -EOF -if compile_prog "" "" ; then - atomic64=yes -fi - -######################################### -# See if --dynamic-list is supported by the linker -ld_dynamic_list="no" -if test "$static" = "no" ; then - cat > $TMPTXT < $TMPC < -void foo(void); - -void foo(void) -{ - printf("foo\n"); -} - -int main(void) -{ - foo(); - return 0; -} -EOF - - if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then - ld_dynamic_list="yes" - fi -fi - -######################################### -# See if -exported_symbols_list is supported by the linker - -ld_exported_symbols_list="no" -if test "$static" = "no" ; then - cat > $TMPTXT < $TMPC << EOF -#include -int main(void) { - return getauxval(AT_HWCAP) == 0; + +cmpxchg128=no +if test "$int128" = yes && test "$atomic128" = no; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + __sync_val_compare_and_swap_16(&x, y, x); + return 0; } EOF -if compile_prog "" "" ; then - getauxval=yes + if compile_prog "" "" ; then + cmpxchg128=yes + fi fi ######################################## @@ -4977,35 +3093,6 @@ if test "$fortify_source" != "no"; then fi fi -############################################### -# Check if copy_file_range is provided by glibc -have_copy_file_range=no -cat > $TMPC << EOF -#include -int main(void) { - copy_file_range(0, NULL, 0, NULL, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - have_copy_file_range=yes -fi - -########################################## -# check if struct fsxattr is available via linux/fs.h - -have_fsxattr=no -cat > $TMPC << EOF -#include -struct fsxattr foo; -int main(void) { - return 0; -} -EOF -if compile_prog "" "" ; then - have_fsxattr=yes -fi - ########################################## # check for usable membarrier system call if test "$membarrier" = "yes"; then @@ -5037,46 +3124,6 @@ else membarrier=no fi -########################################## -# check if rtnetlink.h exists and is useful -have_rtnetlink=no -cat > $TMPC << EOF -#include -int main(void) { - return IFLA_PROTO_DOWN; -} -EOF -if compile_prog "" "" ; then - have_rtnetlink=yes -fi - -########################################## -# check for usable AF_VSOCK environment -have_af_vsock=no -cat > $TMPC << EOF -#include -#include -#include -#if !defined(AF_VSOCK) -# error missing AF_VSOCK flag -#endif -#include -int main(void) { - int sock, ret; - struct sockaddr_vm svm; - socklen_t len = sizeof(svm); - sock = socket(AF_VSOCK, SOCK_STREAM, 0); - ret = getpeername(sock, (struct sockaddr *)&svm, &len); - if ((ret == -1) && (errno == ENOTCONN)) { - return 0; - } - return -1; -} -EOF -if compile_prog "" "" ; then - have_af_vsock=yes -fi - ########################################## # check for usable AF_ALG environment have_afalg=no @@ -5103,63 +3150,6 @@ then fi -################################################# -# check for sysmacros.h - -have_sysmacros=no -cat > $TMPC << EOF -#include -int main(void) { - return makedev(0, 0); -} -EOF -if compile_prog "" "" ; then - have_sysmacros=yes -fi - -########################################## -# check for _Static_assert() - -have_static_assert=no -cat > $TMPC << EOF -_Static_assert(1, "success"); -int main(void) { - return 0; -} -EOF -if compile_prog "" "" ; then - have_static_assert=yes -fi - -########################################## -# check for utmpx.h, it is missing e.g. on OpenBSD - -have_utmpx=no -cat > $TMPC << EOF -#include -struct utmpx user_info; -int main(void) { - return 0; -} -EOF -if compile_prog "" "" ; then - have_utmpx=yes -fi - -########################################## -# check for getrandom() - -have_getrandom=no -cat > $TMPC << EOF -#include -int main(void) { - return getrandom(0, 0, GRND_NONBLOCK); -} -EOF -if compile_prog "" "" ; then - have_getrandom=yes -fi - ########################################## # checks for sanitizers @@ -5207,18 +3197,6 @@ EOF fi fi -########################################## -# checks for fuzzer -if test "$fuzzing" = "yes" && test -z "${LIB_FUZZING_ENGINE+xxx}"; then - write_c_fuzzer_skeleton - if compile_prog "$CPU_CFLAGS -Werror -fsanitize=fuzzer" ""; then - have_fuzzer=yes - else - error_exit "Your compiler doesn't support -fsanitize=fuzzer" - exit 1 - fi -fi - # Thread sanitizer is, for now, much noisier than the other sanitizers; # keep it separate until that is not the case. if test "$tsan" = "yes" && test "$sanitizers" = "yes"; then @@ -5243,50 +3221,29 @@ EOF fi fi -########################################## -# check for libpmem - -if test "$libpmem" != "no"; then - if $pkg_config --exists "libpmem"; then - libpmem="yes" - libpmem_libs=$($pkg_config --libs libpmem) - libpmem_cflags=$($pkg_config --cflags libpmem) - else - if test "$libpmem" = "yes" ; then - feature_not_found "libpmem" "Install nvml or pmdk" - fi - libpmem="no" - fi -fi - -########################################## -# check for libdaxctl - -if test "$libdaxctl" != "no"; then - if $pkg_config --atleast-version=57 "libdaxctl"; then - libdaxctl="yes" - libdaxctl_libs=$($pkg_config --libs libdaxctl) - libdaxctl_cflags=$($pkg_config --cflags libdaxctl) - else - if test "$libdaxctl" = "yes" ; then - feature_not_found "libdaxctl" "Install libdaxctl" - fi - libdaxctl="no" - fi -fi - ########################################## # check for slirp case "$slirp" in auto | enabled | internal) # Simpler to always update submodule, even if not needed. - if test "$git_submodules_action" != "ignore"; then - git_submodules="${git_submodules} slirp" - fi + git_submodules="${git_submodules} slirp" ;; esac +# Check for slirp smbd dupport +: ${smbd=${SMBD-/usr/sbin/smbd}} +if test "$slirp_smbd" != "no" ; then + if test "$mingw32" = "yes" ; then + if test "$slirp_smbd" = "yes" ; then + error_exit "Host smbd not supported on this platform." + fi + slirp_smbd=no + else + slirp_smbd=yes + fi +fi + ########################################## # check for usable __NR_keyctl syscall @@ -5398,11 +3355,6 @@ if test "$mingw32" = "yes" ; then done fi -# We can only support ivshmem if we have eventfd -if [ "$eventfd" = "yes" ]; then - ivshmem=yes -fi - # Probe for guest agent support/options if [ "$guest_agent" != "no" ]; then @@ -5449,15 +3401,20 @@ if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \ fi # Only build s390-ccw bios if we're on s390x and the compiler has -march=z900 +# or -march=z10 (which is the lowest architecture level that Clang supports) if test "$cpu" = "s390x" ; then write_c_skeleton - if compile_prog "-march=z900" ""; then + compile_prog "-march=z900" "" + has_z900=$? + if [ $has_z900 = 0 ] || compile_object "-march=z10 -msoft-float -Werror"; then + if [ $has_z900 != 0 ]; then + echo "WARNING: Your compiler does not support the z900!" + echo " The s390-ccw bios will only work with guest CPUs >= z10." + fi roms="$roms s390-ccw" # SLOF is required for building the s390-ccw firmware on s390x, # since it is using the libnet code from SLOF for network booting. - if test "$git_submodules_action" != "ignore"; then - git_submodules="${git_submodules} roms/SLOF" - fi + git_submodules="${git_submodules} roms/SLOF" fi fi @@ -5480,7 +3437,7 @@ EOF update_cxxflags - if do_cxx $CXXFLAGS $CONFIGURE_CXXFLAGS $QEMU_CXXFLAGS -o $TMPE $TMPCXX $TMPO $QEMU_LDFLAGS; then + if do_cxx $CXXFLAGS $EXTRA_CXXFLAGS $CONFIGURE_CXXFLAGS $QEMU_CXXFLAGS -o $TMPE $TMPCXX $TMPO $QEMU_LDFLAGS; then # C++ compiler $cxx works ok with C compiler $cc : else @@ -5521,9 +3478,6 @@ fi if test "$strip_opt" = "yes" ; then echo "STRIP=${strip}" >> $config_host_mak fi -if test "$bigendian" = "yes" ; then - echo "HOST_WORDS_BIGENDIAN=y" >> $config_host_mak -fi if test "$mingw32" = "yes" ; then echo "CONFIG_WIN32=y" >> $config_host_mak if test "$guest_agent_with_vss" = "yes" ; then @@ -5568,47 +3522,18 @@ fi if test "$guest_agent" = "yes" ; then echo "CONFIG_GUEST_AGENT=y" >> $config_host_mak fi -echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak -if test "$vde" = "yes" ; then - echo "CONFIG_VDE=y" >> $config_host_mak - echo "VDE_LIBS=$vde_libs" >> $config_host_mak -fi -if test "$netmap" = "yes" ; then - echo "CONFIG_NETMAP=y" >> $config_host_mak -fi -if test "$l2tpv3" = "yes" ; then - echo "CONFIG_L2TPV3=y" >> $config_host_mak +if test "$slirp_smbd" = "yes" ; then + echo "CONFIG_SLIRP_SMBD=y" >> $config_host_mak + echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak fi if test "$gprof" = "yes" ; then echo "CONFIG_GPROF=y" >> $config_host_mak fi -echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak -for drv in $audio_drv_list; do - def=CONFIG_AUDIO_$(echo $drv | LC_ALL=C tr '[a-z]' '[A-Z]') - echo "$def=y" >> $config_host_mak -done -if test "$alsa" = "yes" ; then - echo "CONFIG_ALSA=y" >> $config_host_mak -fi -echo "ALSA_LIBS=$alsa_libs" >> $config_host_mak -echo "ALSA_CFLAGS=$alsa_cflags" >> $config_host_mak -if test "$libpulse" = "yes" ; then - echo "CONFIG_LIBPULSE=y" >> $config_host_mak -fi -echo "PULSE_LIBS=$pulse_libs" >> $config_host_mak -echo "PULSE_CFLAGS=$pulse_cflags" >> $config_host_mak -echo "COREAUDIO_LIBS=$coreaudio_libs" >> $config_host_mak -echo "DSOUND_LIBS=$dsound_libs" >> $config_host_mak -echo "OSS_LIBS=$oss_libs" >> $config_host_mak -if test "$libjack" = "yes" ; then - echo "CONFIG_LIBJACK=y" >> $config_host_mak -fi -echo "JACK_LIBS=$jack_libs" >> $config_host_mak -if test "$audio_win_int" = "yes" ; then - echo "CONFIG_AUDIO_WIN_INT=y" >> $config_host_mak -fi echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak +if test "$block_drv_whitelist_tools" = "yes" ; then + echo "CONFIG_BDRV_WHITELIST_TOOLS=y" >> $config_host_mak +fi if test "$xfs" = "yes" ; then echo "CONFIG_XFS=y" >> $config_host_mak fi @@ -5625,169 +3550,25 @@ fi if test "$module_upgrades" = "yes"; then echo "CONFIG_MODULE_UPGRADES=y" >> $config_host_mak fi -if test "$pipe2" = "yes" ; then - echo "CONFIG_PIPE2=y" >> $config_host_mak -fi -if test "$accept4" = "yes" ; then - echo "CONFIG_ACCEPT4=y" >> $config_host_mak -fi -if test "$splice" = "yes" ; then - echo "CONFIG_SPLICE=y" >> $config_host_mak -fi -if test "$eventfd" = "yes" ; then - echo "CONFIG_EVENTFD=y" >> $config_host_mak -fi -if test "$memfd" = "yes" ; then - echo "CONFIG_MEMFD=y" >> $config_host_mak -fi if test "$have_usbfs" = "yes" ; then echo "CONFIG_USBFS=y" >> $config_host_mak fi -if test "$fallocate" = "yes" ; then - echo "CONFIG_FALLOCATE=y" >> $config_host_mak -fi -if test "$fallocate_punch_hole" = "yes" ; then - echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak -fi -if test "$fallocate_zero_range" = "yes" ; then - echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak -fi -if test "$posix_fallocate" = "yes" ; then - echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak -fi -if test "$sync_file_range" = "yes" ; then - echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak -fi -if test "$fiemap" = "yes" ; then - echo "CONFIG_FIEMAP=y" >> $config_host_mak -fi -if test "$dup3" = "yes" ; then - echo "CONFIG_DUP3=y" >> $config_host_mak -fi -if test "$ppoll" = "yes" ; then - echo "CONFIG_PPOLL=y" >> $config_host_mak -fi -if test "$prctl_pr_set_timerslack" = "yes" ; then - echo "CONFIG_PRCTL_PR_SET_TIMERSLACK=y" >> $config_host_mak -fi -if test "$epoll" = "yes" ; then - echo "CONFIG_EPOLL=y" >> $config_host_mak -fi -if test "$epoll_create1" = "yes" ; then - echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak -fi -if test "$sendfile" = "yes" ; then - echo "CONFIG_SENDFILE=y" >> $config_host_mak -fi -if test "$timerfd" = "yes" ; then - echo "CONFIG_TIMERFD=y" >> $config_host_mak -fi -if test "$setns" = "yes" ; then - echo "CONFIG_SETNS=y" >> $config_host_mak -fi -if test "$clock_adjtime" = "yes" ; then - echo "CONFIG_CLOCK_ADJTIME=y" >> $config_host_mak -fi -if test "$syncfs" = "yes" ; then - echo "CONFIG_SYNCFS=y" >> $config_host_mak -fi -if test "$inotify" = "yes" ; then - echo "CONFIG_INOTIFY=y" >> $config_host_mak -fi -if test "$inotify1" = "yes" ; then - echo "CONFIG_INOTIFY1=y" >> $config_host_mak -fi -if test "$sem_timedwait" = "yes" ; then - echo "CONFIG_SEM_TIMEDWAIT=y" >> $config_host_mak -fi -if test "$strchrnul" = "yes" ; then - echo "HAVE_STRCHRNUL=y" >> $config_host_mak -fi -if test "$st_atim" = "yes" ; then - echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak -fi -if test "$byteswap_h" = "yes" ; then - echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak -fi -if test "$bswap_h" = "yes" ; then - echo "CONFIG_MACHINE_BSWAP_H=y" >> $config_host_mak -fi if test "$gio" = "yes" ; then echo "CONFIG_GIO=y" >> $config_host_mak echo "GIO_CFLAGS=$gio_cflags" >> $config_host_mak echo "GIO_LIBS=$gio_libs" >> $config_host_mak +fi +if test "$gdbus_codegen" != "" ; then echo "GDBUS_CODEGEN=$gdbus_codegen" >> $config_host_mak fi echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak -if test "$gnutls" = "yes" ; then - echo "CONFIG_GNUTLS=y" >> $config_host_mak - echo "GNUTLS_CFLAGS=$gnutls_cflags" >> $config_host_mak - echo "GNUTLS_LIBS=$gnutls_libs" >> $config_host_mak -fi -if test "$gcrypt" = "yes" ; then - echo "CONFIG_GCRYPT=y" >> $config_host_mak - if test "$gcrypt_hmac" = "yes" ; then - echo "CONFIG_GCRYPT_HMAC=y" >> $config_host_mak - fi - echo "GCRYPT_CFLAGS=$gcrypt_cflags" >> $config_host_mak - echo "GCRYPT_LIBS=$gcrypt_libs" >> $config_host_mak -fi -if test "$nettle" = "yes" ; then - echo "CONFIG_NETTLE=y" >> $config_host_mak - echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak - echo "NETTLE_CFLAGS=$nettle_cflags" >> $config_host_mak - echo "NETTLE_LIBS=$nettle_libs" >> $config_host_mak -fi -if test "$qemu_private_xts" = "yes" ; then - echo "CONFIG_QEMU_PRIVATE_XTS=y" >> $config_host_mak -fi -if test "$tasn1" = "yes" ; then - echo "CONFIG_TASN1=y" >> $config_host_mak -fi -if test "$auth_pam" = "yes" ; then - echo "CONFIG_AUTH_PAM=y" >> $config_host_mak -fi -if test "$have_broken_size_max" = "yes" ; then - echo "HAVE_BROKEN_SIZE_MAX=y" >> $config_host_mak -fi -if test "$have_openpty" = "yes" ; then - echo "HAVE_OPENPTY=y" >> $config_host_mak -fi -# Work around a system header bug with some kernel/XFS header -# versions where they both try to define 'struct fsxattr': -# xfs headers will not try to redefine structs from linux headers -# if this macro is set. -if test "$have_fsxattr" = "yes" ; then - echo "HAVE_FSXATTR=y" >> $config_host_mak -fi -if test "$have_copy_file_range" = "yes" ; then - echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak -fi -if test "$vte" = "yes" ; then - echo "CONFIG_VTE=y" >> $config_host_mak - echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak - echo "VTE_LIBS=$vte_libs" >> $config_host_mak -fi -if test "$virglrenderer" = "yes" ; then - echo "CONFIG_VIRGL=y" >> $config_host_mak - echo "VIRGL_CFLAGS=$virgl_cflags" >> $config_host_mak - echo "VIRGL_LIBS=$virgl_libs" >> $config_host_mak -fi if test "$xen" = "enabled" ; then echo "CONFIG_XEN_BACKEND=y" >> $config_host_mak echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak echo "XEN_CFLAGS=$xen_cflags" >> $config_host_mak echo "XEN_LIBS=$xen_libs" >> $config_host_mak fi -if test "$linux_aio" = "yes" ; then - echo "CONFIG_LINUX_AIO=y" >> $config_host_mak -fi -if test "$linux_io_uring" = "yes" ; then - echo "CONFIG_LINUX_IO_URING=y" >> $config_host_mak - echo "LINUX_IO_URING_CFLAGS=$linux_io_uring_cflags" >> $config_host_mak - echo "LINUX_IO_URING_LIBS=$linux_io_uring_libs" >> $config_host_mak -fi if test "$vhost_scsi" = "yes" ; then echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak fi @@ -5821,52 +3602,11 @@ fi if test "$vhost_user_fs" = "yes" ; then echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak fi -if test "$iovec" = "yes" ; then - echo "CONFIG_IOVEC=y" >> $config_host_mak -fi if test "$membarrier" = "yes" ; then echo "CONFIG_MEMBARRIER=y" >> $config_host_mak fi -if test "$signalfd" = "yes" ; then - echo "CONFIG_SIGNALFD=y" >> $config_host_mak -fi -if test "$optreset" = "yes" ; then - echo "HAVE_OPTRESET=y" >> $config_host_mak -fi -if test "$fdatasync" = "yes" ; then - echo "CONFIG_FDATASYNC=y" >> $config_host_mak -fi -if test "$madvise" = "yes" ; then - echo "CONFIG_MADVISE=y" >> $config_host_mak -fi -if test "$posix_madvise" = "yes" ; then - echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak -fi -if test "$posix_memalign" = "yes" ; then - echo "CONFIG_POSIX_MEMALIGN=y" >> $config_host_mak -fi -if test "$spice" = "yes" ; then - echo "CONFIG_SPICE=y" >> $config_host_mak - echo "SPICE_CFLAGS=$spice_cflags" >> $config_host_mak - echo "SPICE_LIBS=$spice_libs" >> $config_host_mak -fi - -if test "$smartcard" = "yes" ; then - echo "CONFIG_SMARTCARD=y" >> $config_host_mak - echo "SMARTCARD_CFLAGS=$libcacard_cflags" >> $config_host_mak - echo "SMARTCARD_LIBS=$libcacard_libs" >> $config_host_mak -fi - -if test "$libusb" = "yes" ; then - echo "CONFIG_USB_LIBUSB=y" >> $config_host_mak - echo "LIBUSB_CFLAGS=$libusb_cflags" >> $config_host_mak - echo "LIBUSB_LIBS=$libusb_libs" >> $config_host_mak -fi - -if test "$usb_redir" = "yes" ; then - echo "CONFIG_USB_REDIR=y" >> $config_host_mak - echo "USB_REDIR_CFLAGS=$usb_redir_cflags" >> $config_host_mak - echo "USB_REDIR_LIBS=$usb_redir_libs" >> $config_host_mak +if test "$tcg" = "enabled" -a "$tcg_interpreter" = "true" ; then + echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak fi if test "$opengl" = "yes" ; then @@ -5875,13 +3615,6 @@ if test "$opengl" = "yes" ; then echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak fi -if test "$gbm" = "yes" ; then - echo "CONFIG_GBM=y" >> $config_host_mak - echo "GBM_LIBS=$gbm_libs" >> $config_host_mak - echo "GBM_CFLAGS=$gbm_cflags" >> $config_host_mak -fi - - if test "$avx2_opt" = "yes" ; then echo "CONFIG_AVX2_OPT=y" >> $config_host_mak fi @@ -5914,18 +3647,6 @@ if test "$crypto_afalg" = "yes" ; then echo "CONFIG_AF_ALG=y" >> $config_host_mak fi -if test "$open_by_handle_at" = "yes" ; then - echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak -fi - -if test "$linux_magic_h" = "yes" ; then - echo "CONFIG_LINUX_MAGIC_H=y" >> $config_host_mak -fi - -if test "$valgrind_h" = "yes" ; then - echo "CONFIG_VALGRIND_H=y" >> $config_host_mak -fi - if test "$have_asan_iface_fiber" = "yes" ; then echo "CONFIG_ASAN_IFACE_FIBER=y" >> $config_host_mak fi @@ -5934,10 +3655,6 @@ if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then echo "CONFIG_TSAN=y" >> $config_host_mak fi -if test "$has_environ" = "yes" ; then - echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak -fi - if test "$cpuid_h" = "yes" ; then echo "CONFIG_CPUID_H=y" >> $config_host_mak fi @@ -5954,14 +3671,6 @@ if test "$cmpxchg128" = "yes" ; then echo "CONFIG_CMPXCHG128=y" >> $config_host_mak fi -if test "$atomic64" = "yes" ; then - echo "CONFIG_ATOMIC64=y" >> $config_host_mak -fi - -if test "$getauxval" = "yes" ; then - echo "CONFIG_GETAUXVAL=y" >> $config_host_mak -fi - if test "$libssh" = "yes" ; then echo "CONFIG_LIBSSH=y" >> $config_host_mak echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak @@ -5976,45 +3685,6 @@ if test "$tpm" = "yes"; then echo 'CONFIG_TPM=y' >> $config_host_mak fi -echo "TRACE_BACKENDS=$trace_backends" >> $config_host_mak -if have_backend "nop"; then - echo "CONFIG_TRACE_NOP=y" >> $config_host_mak -fi -if have_backend "simple"; then - echo "CONFIG_TRACE_SIMPLE=y" >> $config_host_mak - # Set the appropriate trace file. - trace_file="\"$trace_file-\" FMT_pid" -fi -if have_backend "log"; then - echo "CONFIG_TRACE_LOG=y" >> $config_host_mak -fi -if have_backend "ust"; then - echo "CONFIG_TRACE_UST=y" >> $config_host_mak - echo "LTTNG_UST_LIBS=$lttng_ust_libs" >> $config_host_mak - echo "URCU_BP_LIBS=$urcu_bp_libs" >> $config_host_mak -fi -if have_backend "dtrace"; then - echo "CONFIG_TRACE_DTRACE=y" >> $config_host_mak - if test "$trace_backend_stap" = "yes" ; then - echo "CONFIG_TRACE_SYSTEMTAP=y" >> $config_host_mak - fi -fi -if have_backend "ftrace"; then - if test "$linux" = "yes" ; then - echo "CONFIG_TRACE_FTRACE=y" >> $config_host_mak - else - feature_not_found "ftrace(trace backend)" "ftrace requires Linux" - fi -fi -if have_backend "syslog"; then - if test "$posix_syslog" = "yes" ; then - echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak - else - feature_not_found "syslog(trace backend)" "syslog not available" - fi -fi -echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak - if test "$rdma" = "yes" ; then echo "CONFIG_RDMA=y" >> $config_host_mak echo "RDMA_LIBS=$rdma_libs" >> $config_host_mak @@ -6024,69 +3694,14 @@ if test "$pvrdma" = "yes" ; then echo "CONFIG_PVRDMA=y" >> $config_host_mak fi -if test "$have_rtnetlink" = "yes" ; then - echo "CONFIG_RTNETLINK=y" >> $config_host_mak -fi - -if test "$libxml2" = "yes" ; then - echo "CONFIG_LIBXML2=y" >> $config_host_mak - echo "LIBXML2_CFLAGS=$libxml2_cflags" >> $config_host_mak - echo "LIBXML2_LIBS=$libxml2_libs" >> $config_host_mak -fi - if test "$replication" = "yes" ; then echo "CONFIG_REPLICATION=y" >> $config_host_mak fi -if test "$have_af_vsock" = "yes" ; then - echo "CONFIG_AF_VSOCK=y" >> $config_host_mak -fi - -if test "$have_sysmacros" = "yes" ; then - echo "CONFIG_SYSMACROS=y" >> $config_host_mak -fi - -if test "$have_static_assert" = "yes" ; then - echo "CONFIG_STATIC_ASSERT=y" >> $config_host_mak -fi - -if test "$have_utmpx" = "yes" ; then - echo "HAVE_UTMPX=y" >> $config_host_mak -fi -if test "$have_getrandom" = "yes" ; then - echo "CONFIG_GETRANDOM=y" >> $config_host_mak -fi -if test "$ivshmem" = "yes" ; then - echo "CONFIG_IVSHMEM=y" >> $config_host_mak -fi if test "$debug_mutex" = "yes" ; then echo "CONFIG_DEBUG_MUTEX=y" >> $config_host_mak fi -# Hold two types of flag: -# CONFIG_THREAD_SETNAME_BYTHREAD - we've got a way of setting the name on -# a thread we have a handle to -# CONFIG_PTHREAD_SETNAME_NP_W_TID - A way of doing it on a particular -# platform -if test "$pthread_setname_np_w_tid" = "yes" ; then - echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak - echo "CONFIG_PTHREAD_SETNAME_NP_W_TID=y" >> $config_host_mak -elif test "$pthread_setname_np_wo_tid" = "yes" ; then - echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak - echo "CONFIG_PTHREAD_SETNAME_NP_WO_TID=y" >> $config_host_mak -fi - -if test "$libpmem" = "yes" ; then - echo "CONFIG_LIBPMEM=y" >> $config_host_mak - echo "LIBPMEM_LIBS=$libpmem_libs" >> $config_host_mak - echo "LIBPMEM_CFLAGS=$libpmem_cflags" >> $config_host_mak -fi - -if test "$libdaxctl" = "yes" ; then - echo "CONFIG_LIBDAXCTL=y" >> $config_host_mak - echo "LIBDAXCTL_LIBS=$libdaxctl_libs" >> $config_host_mak -fi - if test "$bochs" = "yes" ; then echo "CONFIG_BOCHS=y" >> $config_host_mak fi @@ -6111,33 +3726,6 @@ fi if test "$parallels" = "yes" ; then echo "CONFIG_PARALLELS=y" >> $config_host_mak fi -if test "$sheepdog" = "yes" ; then - add_to deprecated_features "sheepdog" - echo "CONFIG_SHEEPDOG=y" >> $config_host_mak -fi -if test "$have_mlockall" = "yes" ; then - echo "HAVE_MLOCKALL=y" >> $config_host_mak -fi -if test "$fuzzing" = "yes" ; then - # If LIB_FUZZING_ENGINE is set, assume we are running on OSS-Fuzz, and the - # needed CFLAGS have already been provided - if test -z "${LIB_FUZZING_ENGINE+xxx}" ; then - # Add CFLAGS to tell clang to add fuzzer-related instrumentation to all the - # compiled code. - QEMU_CFLAGS="$QEMU_CFLAGS -fsanitize=fuzzer-no-link" - # To build non-fuzzer binaries with --enable-fuzzing, link everything with - # fsanitize=fuzzer-no-link. Otherwise, the linker will be unable to bind - # the fuzzer-related callbacks added by instrumentation. - QEMU_LDFLAGS="$QEMU_LDFLAGS -fsanitize=fuzzer-no-link" - # For the actual fuzzer binaries, we need to link against the libfuzzer - # library. Provide the flags for doing this in FUZZ_EXE_LDFLAGS. The meson - # rule for the fuzzer adds these to the link_args. They need to be - # configurable, to support OSS-Fuzz - FUZZ_EXE_LDFLAGS="-fsanitize=fuzzer" - else - FUZZ_EXE_LDFLAGS="$LIB_FUZZING_ENGINE" - fi -fi if test "$plugins" = "yes" ; then echo "CONFIG_PLUGIN=y" >> $config_host_mak @@ -6177,22 +3765,16 @@ echo "GENISOIMAGE=$genisoimage" >> $config_host_mak echo "MESON=$meson" >> $config_host_mak echo "NINJA=$ninja" >> $config_host_mak echo "CC=$cc" >> $config_host_mak +echo "HOST_CC=$host_cc" >> $config_host_mak if $iasl -h > /dev/null 2>&1; then echo "CONFIG_IASL=$iasl" >> $config_host_mak fi -echo "CXX=$cxx" >> $config_host_mak -echo "OBJCC=$objcc" >> $config_host_mak echo "AR=$ar" >> $config_host_mak -echo "ARFLAGS=$ARFLAGS" >> $config_host_mak echo "AS=$as" >> $config_host_mak echo "CCAS=$ccas" >> $config_host_mak echo "CPP=$cpp" >> $config_host_mak echo "OBJCOPY=$objcopy" >> $config_host_mak echo "LD=$ld" >> $config_host_mak -echo "RANLIB=$ranlib" >> $config_host_mak -echo "NM=$nm" >> $config_host_mak -echo "PKG_CONFIG=$pkg_config_exe" >> $config_host_mak -echo "WINDRES=$windres" >> $config_host_mak echo "CFLAGS_NOPIE=$CFLAGS_NOPIE" >> $config_host_mak echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak echo "QEMU_CXXFLAGS=$QEMU_CXXFLAGS" >> $config_host_mak @@ -6201,18 +3783,7 @@ echo "GLIB_LIBS=$glib_libs" >> $config_host_mak echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak echo "EXESUF=$EXESUF" >> $config_host_mak -echo "HOST_DSOSUF=$HOST_DSOSUF" >> $config_host_mak echo "LIBS_QGA=$libs_qga" >> $config_host_mak -echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak -echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak -if test "$gcov" = "yes" ; then - echo "CONFIG_GCOV=y" >> $config_host_mak -fi - -if test "$fuzzing" != "no"; then - echo "CONFIG_FUZZ=y" >> $config_host_mak -fi -echo "FUZZ_EXE_LDFLAGS=$FUZZ_EXE_LDFLAGS" >> $config_host_mak if test "$rng_none" = "yes"; then echo "CONFIG_RNG_NONE=y" >> $config_host_mak @@ -6289,21 +3860,20 @@ fi # UNLINK is used to remove symlinks from older development versions # that might get into the way when doing "git update" without doing # a "make distclean" in between. -DIRS="tests tests/tcg tests/tcg/lm32 tests/qapi-schema tests/qtest/libqos" +DIRS="tests tests/tcg tests/qapi-schema tests/qtest/libqos" DIRS="$DIRS tests/qtest tests/qemu-iotests tests/vm tests/fp tests/qgraph" DIRS="$DIRS docs docs/interop fsdev scsi" DIRS="$DIRS pc-bios/optionrom pc-bios/s390-ccw" DIRS="$DIRS roms/seabios" DIRS="$DIRS contrib/plugins/" LINKS="Makefile" -LINKS="$LINKS tests/tcg/lm32/Makefile" LINKS="$LINKS tests/tcg/Makefile.target" LINKS="$LINKS pc-bios/optionrom/Makefile" LINKS="$LINKS pc-bios/s390-ccw/Makefile" LINKS="$LINKS roms/seabios/Makefile" LINKS="$LINKS pc-bios/qemu-icon.bmp" LINKS="$LINKS .gdbinit scripts" # scripts needed by relative path in .gdbinit -LINKS="$LINKS tests/acceptance tests/data" +LINKS="$LINKS tests/avocado tests/data" LINKS="$LINKS tests/qemu-iotests/check" LINKS="$LINKS python" LINKS="$LINKS contrib/plugins/Makefile " @@ -6318,7 +3888,9 @@ for bios_file in \ $source_path/pc-bios/openbios-* \ $source_path/pc-bios/u-boot.* \ $source_path/pc-bios/edk2-*.fd.bz2 \ - $source_path/pc-bios/palcode-* + $source_path/pc-bios/palcode-* \ + $source_path/pc-bios/qemu_vga.ndrv + do LINKS="$LINKS pc-bios/$(basename $bios_file)" done @@ -6356,34 +3928,46 @@ for rom in seabios; do echo "RANLIB=$ranlib" >> $config_mak done +config_mak=pc-bios/optionrom/config.mak +echo "# Automatically generated by configure - do not modify" > $config_mak +echo "TOPSRC_DIR=$source_path" >> $config_mak + if test "$skip_meson" = no; then -cross="config-meson.cross.new" -meson_quote() { + cross="config-meson.cross.new" + meson_quote() { + test $# = 0 && return echo "'$(echo $* | sed "s/ /','/g")'" -} + } -echo "# Automatically generated by configure - do not modify" > $cross -echo "[properties]" >> $cross -test -z "$cxx" && echo "link_language = 'c'" >> $cross -echo "[built-in options]" >> $cross -echo "c_args = [${CFLAGS:+$(meson_quote $CFLAGS)}]" >> $cross -echo "cpp_args = [${CXXFLAGS:+$(meson_quote $CXXFLAGS)}]" >> $cross -echo "c_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross -echo "cpp_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross -echo "[binaries]" >> $cross -echo "c = [$(meson_quote $cc)]" >> $cross -test -n "$cxx" && echo "cpp = [$(meson_quote $cxx)]" >> $cross -test -n "$objcc" && echo "objc = [$(meson_quote $objcc)]" >> $cross -echo "ar = [$(meson_quote $ar)]" >> $cross -echo "nm = [$(meson_quote $nm)]" >> $cross -echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross -echo "ranlib = [$(meson_quote $ranlib)]" >> $cross -if has $sdl2_config; then - echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross -fi -echo "strip = [$(meson_quote $strip)]" >> $cross -echo "windres = [$(meson_quote $windres)]" >> $cross -if test "$cross_compile" = "yes"; then + echo "# Automatically generated by configure - do not modify" > $cross + echo "[properties]" >> $cross + + # unroll any custom device configs + for a in $device_archs; do + eval "c=\$devices_${a}" + echo "${a}-softmmu = '$c'" >> $cross + done + + test -z "$cxx" && echo "link_language = 'c'" >> $cross + echo "[built-in options]" >> $cross + echo "c_args = [$(meson_quote $CFLAGS $EXTRA_CFLAGS)]" >> $cross + echo "cpp_args = [$(meson_quote $CXXFLAGS $EXTRA_CXXFLAGS)]" >> $cross + echo "c_link_args = [$(meson_quote $CFLAGS $LDFLAGS $EXTRA_CFLAGS $EXTRA_LDFLAGS)]" >> $cross + echo "cpp_link_args = [$(meson_quote $CXXFLAGS $LDFLAGS $EXTRA_CXXFLAGS $EXTRA_LDFLAGS)]" >> $cross + echo "[binaries]" >> $cross + echo "c = [$(meson_quote $cc $CPU_CFLAGS)]" >> $cross + test -n "$cxx" && echo "cpp = [$(meson_quote $cxx $CPU_CFLAGS)]" >> $cross + test -n "$objcc" && echo "objc = [$(meson_quote $objcc $CPU_CFLAGS)]" >> $cross + echo "ar = [$(meson_quote $ar)]" >> $cross + echo "nm = [$(meson_quote $nm)]" >> $cross + echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross + echo "ranlib = [$(meson_quote $ranlib)]" >> $cross + if has $sdl2_config; then + echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross + fi + echo "strip = [$(meson_quote $strip)]" >> $cross + echo "windres = [$(meson_quote $windres)]" >> $cross + if test "$cross_compile" = "yes"; then cross_arg="--cross-file config-meson.cross" echo "[host_machine]" >> $cross if test "$mingw32" = "yes" ; then @@ -6399,7 +3983,7 @@ if test "$cross_compile" = "yes"; then i386) echo "cpu_family = 'x86'" >> $cross ;; - x86_64) + x86_64|x32) echo "cpu_family = 'x86_64'" >> $cross ;; ppc64le) @@ -6415,17 +3999,14 @@ if test "$cross_compile" = "yes"; then else echo "endian = 'little'" >> $cross fi -else + else cross_arg="--native-file config-meson.cross" -fi -mv $cross config-meson.cross + fi + mv $cross config-meson.cross -rm -rf meson-private meson-info meson-logs -unset staticpic -if ! version_ge "$($meson --version)" 0.56.0; then - staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi) -fi -NINJA=$ninja $meson setup \ + rm -rf meson-private meson-info meson-logs + run_meson() { + NINJA=$ninja $meson setup \ --prefix "$prefix" \ --libdir "$libdir" \ --libexecdir "$libexecdir" \ @@ -6436,48 +4017,58 @@ NINJA=$ninja $meson setup \ --sysconfdir "$sysconfdir" \ --localedir "$localedir" \ --localstatedir "$local_statedir" \ + -Daudio_drv_list=$audio_drv_list \ + -Ddefault_devices=$default_devices \ -Ddocdir="$docdir" \ -Dqemu_firmwarepath="$firmwarepath" \ -Dqemu_suffix="$qemu_suffix" \ + -Dsphinx_build="$sphinx_build" \ + -Dtrace_file="$trace_file" \ -Doptimization=$(if test "$debug" = yes; then echo 0; else echo 2; fi) \ -Ddebug=$(if test "$debug_info" = yes; then echo true; else echo false; fi) \ -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ - ${staticpic:+-Db_staticpic=$staticpic} \ -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ - -Db_lto=$lto -Dcfi=$cfi -Dcfi_debug=$cfi_debug \ - -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \ - -Dkvm=$kvm -Dhax=$hax -Dwhpx=$whpx -Dhvf=$hvf \ - -Dxen=$xen -Dxen_pci_passthrough=$xen_pci_passthrough -Dtcg=$tcg \ - -Dcocoa=$cocoa -Dgtk=$gtk -Dmpath=$mpath -Dsdl=$sdl -Dsdl_image=$sdl_image \ - -Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg -Dvnc_png=$vnc_png \ - -Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f -Dvirtiofsd=$virtiofsd \ - -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt -Dbrlapi=$brlapi \ - -Dcurl=$curl -Dglusterfs=$glusterfs -Dbzip2=$bzip2 -Dlibiscsi=$libiscsi \ - -Dlibnfs=$libnfs -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\ - -Drbd=$rbd -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse \ - -Dzstd=$zstd -Dseccomp=$seccomp -Dvirtfs=$virtfs -Dcap_ng=$cap_ng \ - -Dattr=$attr -Ddefault_devices=$default_devices \ - -Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \ - -Dvhost_user_blk_server=$vhost_user_blk_server -Dmultiprocess=$multiprocess \ - -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi \ - $(if test "$default_features" = no; then echo "-Dauto_features=disabled"; fi) \ - -Dtcg_interpreter=$tcg_interpreter \ - $cross_arg \ - "$PWD" "$source_path" - -if test "$?" -ne 0 ; then - error_exit "meson setup failed" -fi + -Db_lto=$lto -Dcfi=$cfi -Dtcg=$tcg -Dxen=$xen \ + -Dcapstone=$capstone -Dfdt=$fdt -Dslirp=$slirp \ + $(test -n "${LIB_FUZZING_ENGINE+xxx}" && echo "-Dfuzzing_engine=$LIB_FUZZING_ENGINE") \ + $(if test "$default_feature" = no; then echo "-Dauto_features=disabled"; fi) \ + "$@" $cross_arg "$PWD" "$source_path" + } + eval run_meson $meson_options + if test "$?" -ne 0 ; then + error_exit "meson setup failed" + fi +else + if test -f meson-private/cmd_line.txt; then + # Adjust old command line options whose type was changed + # Avoids having to use "setup --wipe" when Meson is upgraded + perl -i -ne ' + s/^gettext = true$/gettext = auto/; + s/^gettext = false$/gettext = disabled/; + /^b_staticpic/ && next; + print;' meson-private/cmd_line.txt + fi fi if test -n "${deprecated_features}"; then echo "Warning, deprecated features enabled." - echo "Please see docs/system/deprecated.rst" + echo "Please see docs/about/deprecated.rst" echo " features: ${deprecated_features}" fi +# Create list of config switches that should be poisoned in common code... +# but filter out CONFIG_TCG and CONFIG_USER_ONLY which are special. +target_configs_h=$(ls *-config-devices.h *-config-target.h 2>/dev/null) +if test -n "$target_configs_h" ; then + sed -n -e '/CONFIG_TCG/d' -e '/CONFIG_USER_ONLY/d' \ + -e '/^#define / { s///; s/ .*//; s/^/#pragma GCC poison /p; }' \ + $target_configs_h | sort -u > config-poison.h +else + :> config-poison.h +fi + # Save the configure command line for later reuse. cat <config.status #!/bin/sh @@ -6507,9 +4098,12 @@ preserve_env AR preserve_env AS preserve_env CC preserve_env CPP +preserve_env CFLAGS preserve_env CXX +preserve_env CXXFLAGS preserve_env INSTALL preserve_env LD +preserve_env LDFLAGS preserve_env LD_LIBRARY_PATH preserve_env LIBTOOL preserve_env MAKE diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c index d09e607431f..bd7650a7a27 100644 --- a/contrib/elf2dmp/download.c +++ b/contrib/elf2dmp/download.c @@ -25,21 +25,19 @@ int download_url(const char *name, const char *url) goto out_curl; } - curl_easy_setopt(curl, CURLOPT_URL, url); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, file); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0); - - if (curl_easy_perform(curl) != CURLE_OK) { - err = 1; - fclose(file); + if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK + || curl_easy_perform(curl) != CURLE_OK) { unlink(name); - goto out_curl; + fclose(file); + err = 1; + } else { + err = fclose(file); } - err = fclose(file); - out_curl: curl_easy_cleanup(curl); diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c index b3a65470680..adcfa7e154c 100644 --- a/contrib/elf2dmp/pdb.c +++ b/contrib/elf2dmp/pdb.c @@ -215,6 +215,10 @@ static int pdb_init_symbols(struct pdb_reader *r) static int pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER *hdr) { + if (hdr->block_size == 0) { + return 1; + } + memset(r->file_used, 0, sizeof(r->file_used)); r->ds.header = hdr; r->ds.toc = pdb_ds_read(hdr, (uint32_t *)((uint8_t *)hdr + diff --git a/contrib/gitdm/aliases b/contrib/gitdm/aliases index c1e744312f5..4792413ce78 100644 --- a/contrib/gitdm/aliases +++ b/contrib/gitdm/aliases @@ -31,6 +31,12 @@ pbrook@c046a42c-6fe2-441c-8c8c-71466251a162 paul@codesourcery.com ths@c046a42c-6fe2-441c-8c8c-71466251a162 ths@networkno.de malc@c046a42c-6fe2-441c-8c8c-71466251a162 av1474@comtv.ru +# canonical emails +liq3ea@163.com liq3ea@gmail.com + +# some broken tags +yuval.shaia.ml.gmail.com yuval.shaia.ml@gmail.com + # There is also a: # (no author) <(no author)@c046a42c-6fe2-441c-8c8c-71466251a162> # for the cvs2svn initialization commit e63c3dc74bf. diff --git a/contrib/gitdm/domain-map b/contrib/gitdm/domain-map index 0074da618f5..2800d9f986a 100644 --- a/contrib/gitdm/domain-map +++ b/contrib/gitdm/domain-map @@ -9,6 +9,8 @@ baidu.com Baidu bytedance.com ByteDance cmss.chinamobile.com China Mobile citrix.com Citrix +crudebyte.com Crudebyte +eldorado.org.br Instituto de Pesquisas Eldorado fujitsu.com Fujitsu google.com Google greensocs.com GreenSocs @@ -17,20 +19,25 @@ ibm.com IBM igalia.com Igalia intel.com Intel linaro.org Linaro +lwn.net LWN microsoft.com Microsoft +mvista.com MontaVista nokia.com Nokia nuviainc.com NUVIA +nvidia.com NVIDIA oracle.com Oracle proxmox.com Proxmox quicinc.com Qualcomm Innovation Center redhat.com Red Hat rt-rk.com RT-RK +samsung.com Samsung siemens.com Siemens sifive.com SiFive suse.com SUSE suse.de SUSE virtuozzo.com Virtuozzo wdc.com Western Digital +windriver.com Wind River xilinx.com Xilinx yadro.com YADRO yandex-team.ru Yandex diff --git a/contrib/gitdm/group-map-academics b/contrib/gitdm/group-map-academics index bf3c894821b..44745ca85b6 100644 --- a/contrib/gitdm/group-map-academics +++ b/contrib/gitdm/group-map-academics @@ -16,3 +16,6 @@ cota@braap.org uni-paderborn.de edu edu.cn + +# Boston University +bu.edu diff --git a/contrib/gitdm/group-map-individuals b/contrib/gitdm/group-map-individuals index 36bbb77c39a..f816aa87702 100644 --- a/contrib/gitdm/group-map-individuals +++ b/contrib/gitdm/group-map-individuals @@ -29,3 +29,8 @@ mrolnik@gmail.com huth@tuxfamily.org jhogan@kernel.org atar4qemu@gmail.com +minwoo.im.dev@gmail.com +bmeng.cn@gmail.com +liq3ea@gmail.com +chetan4windows@gmail.com +akihiko.odaki@gmail.com diff --git a/contrib/gitdm/group-map-interns b/contrib/gitdm/group-map-interns new file mode 100644 index 00000000000..fe33a3231ec --- /dev/null +++ b/contrib/gitdm/group-map-interns @@ -0,0 +1,13 @@ +# +# Group together everyone working as an intern via one of the various +# outreach programs. +# + +# GSoC 2020 Virtual FIDO/U2F security key +cesar.belley@lse.epita.fr + +# GSoC 2020 TCG performance +ahmedkhaledkaraman@gmail.com + +# GSoC 2021 TCG plugins +ma.mandourr@gmail.com diff --git a/contrib/gitdm/group-map-netflix b/contrib/gitdm/group-map-netflix new file mode 100644 index 00000000000..468f95dcb2e --- /dev/null +++ b/contrib/gitdm/group-map-netflix @@ -0,0 +1,5 @@ +# +# Netflix contributors using their personal emails +# + +imp@bsdimp.com diff --git a/contrib/gitdm/group-map-robots b/contrib/gitdm/group-map-robots new file mode 100644 index 00000000000..ffd956c2eb5 --- /dev/null +++ b/contrib/gitdm/group-map-robots @@ -0,0 +1,7 @@ +# +# There are various automatic robots that occasionally scan and report +# bugs. Let's group them together here. +# + +# Euler Robot +euler.robot@huawei.com diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile index b9d7935e5ef..54ac5ccd9ff 100644 --- a/contrib/plugins/Makefile +++ b/contrib/plugins/Makefile @@ -13,18 +13,20 @@ include $(BUILD_DIR)/config-host.mak VPATH += $(SRC_PATH)/contrib/plugins NAMES := +NAMES += execlog NAMES += hotblocks NAMES += hotpages NAMES += howvec NAMES += lockstep NAMES += hwprofile +NAMES += cache SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES))) # The main QEMU uses Glib extensively so it's perfectly fine to use it # in plugins (which many example do). CFLAGS = $(GLIB_CFLAGS) -CFLAGS += -fPIC +CFLAGS += -fPIC -Wall $(filter -W%, $(QEMU_CFLAGS)) CFLAGS += $(if $(findstring no-psabi,$(QEMU_CFLAGS)),-Wpsabi) CFLAGS += -I$(SRC_PATH)/include/qemu diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c new file mode 100644 index 00000000000..b9226e7c40b --- /dev/null +++ b/contrib/plugins/cache.c @@ -0,0 +1,860 @@ +/* + * Copyright (C) 2021, Mahmoud Mandour + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include +#include + +#include + +#define STRTOLL(x) g_ascii_strtoll(x, NULL, 10) + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; + +static GHashTable *miss_ht; + +static GMutex hashtable_lock; +static GRand *rng; + +static int limit; +static bool sys; + +enum EvictionPolicy { + LRU, + FIFO, + RAND, +}; + +enum EvictionPolicy policy; + +/* + * A CacheSet is a set of cache blocks. A memory block that maps to a set can be + * put in any of the blocks inside the set. The number of block per set is + * called the associativity (assoc). + * + * Each block contains the the stored tag and a valid bit. Since this is not + * a functional simulator, the data itself is not stored. We only identify + * whether a block is in the cache or not by searching for its tag. + * + * In order to search for memory data in the cache, the set identifier and tag + * are extracted from the address and the set is probed to see whether a tag + * match occur. + * + * An address is logically divided into three portions: The block offset, + * the set number, and the tag. + * + * The set number is used to identify the set in which the block may exist. + * The tag is compared against all the tags of a set to search for a match. If a + * match is found, then the access is a hit. + * + * The CacheSet also contains bookkeaping information about eviction details. + */ + +typedef struct { + uint64_t tag; + bool valid; +} CacheBlock; + +typedef struct { + CacheBlock *blocks; + uint64_t *lru_priorities; + uint64_t lru_gen_counter; + GQueue *fifo_queue; +} CacheSet; + +typedef struct { + CacheSet *sets; + int num_sets; + int cachesize; + int assoc; + int blksize_shift; + uint64_t set_mask; + uint64_t tag_mask; + uint64_t accesses; + uint64_t misses; +} Cache; + +typedef struct { + char *disas_str; + const char *symbol; + uint64_t addr; + uint64_t l1_dmisses; + uint64_t l1_imisses; + uint64_t l2_misses; +} InsnData; + +void (*update_hit)(Cache *cache, int set, int blk); +void (*update_miss)(Cache *cache, int set, int blk); + +void (*metadata_init)(Cache *cache); +void (*metadata_destroy)(Cache *cache); + +static int cores; +static Cache **l1_dcaches, **l1_icaches; + +static bool use_l2; +static Cache **l2_ucaches; + +static GMutex *l1_dcache_locks; +static GMutex *l1_icache_locks; +static GMutex *l2_ucache_locks; + +static uint64_t l1_dmem_accesses; +static uint64_t l1_imem_accesses; +static uint64_t l1_imisses; +static uint64_t l1_dmisses; + +static uint64_t l2_mem_accesses; +static uint64_t l2_misses; + +static int pow_of_two(int num) +{ + g_assert((num & (num - 1)) == 0); + int ret = 0; + while (num /= 2) { + ret++; + } + return ret; +} + +/* + * LRU evection policy: For each set, a generation counter is maintained + * alongside a priority array. + * + * On each set access, the generation counter is incremented. + * + * On a cache hit: The hit-block is assigned the current generation counter, + * indicating that it is the most recently used block. + * + * On a cache miss: The block with the least priority is searched and replaced + * with the newly-cached block, of which the priority is set to the current + * generation number. + */ + +static void lru_priorities_init(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc); + cache->sets[i].lru_gen_counter = 0; + } +} + +static void lru_update_blk(Cache *cache, int set_idx, int blk_idx) +{ + CacheSet *set = &cache->sets[set_idx]; + set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter; + set->lru_gen_counter++; +} + +static int lru_get_lru_block(Cache *cache, int set_idx) +{ + int i, min_idx, min_priority; + + min_priority = cache->sets[set_idx].lru_priorities[0]; + min_idx = 0; + + for (i = 1; i < cache->assoc; i++) { + if (cache->sets[set_idx].lru_priorities[i] < min_priority) { + min_priority = cache->sets[set_idx].lru_priorities[i]; + min_idx = i; + } + } + return min_idx; +} + +static void lru_priorities_destroy(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + g_free(cache->sets[i].lru_priorities); + } +} + +/* + * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that + * stores accesses to the cache. + * + * On a compulsory miss: The block index is enqueued to the fifo_queue to + * indicate that it's the latest cached block. + * + * On a conflict miss: The first-in block is removed from the cache and the new + * block is put in its place and enqueued to the FIFO queue. + */ + +static void fifo_init(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + cache->sets[i].fifo_queue = g_queue_new(); + } +} + +static int fifo_get_first_block(Cache *cache, int set) +{ + GQueue *q = cache->sets[set].fifo_queue; + return GPOINTER_TO_INT(g_queue_pop_tail(q)); +} + +static void fifo_update_on_miss(Cache *cache, int set, int blk_idx) +{ + GQueue *q = cache->sets[set].fifo_queue; + g_queue_push_head(q, GINT_TO_POINTER(blk_idx)); +} + +static void fifo_destroy(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + g_queue_free(cache->sets[i].fifo_queue); + } +} + +static inline uint64_t extract_tag(Cache *cache, uint64_t addr) +{ + return addr & cache->tag_mask; +} + +static inline uint64_t extract_set(Cache *cache, uint64_t addr) +{ + return (addr & cache->set_mask) >> cache->blksize_shift; +} + +static const char *cache_config_error(int blksize, int assoc, int cachesize) +{ + if (cachesize % blksize != 0) { + return "cache size must be divisible by block size"; + } else if (cachesize % (blksize * assoc) != 0) { + return "cache size must be divisible by set size (assoc * block size)"; + } else { + return NULL; + } +} + +static bool bad_cache_params(int blksize, int assoc, int cachesize) +{ + return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0); +} + +static Cache *cache_init(int blksize, int assoc, int cachesize) +{ + Cache *cache; + int i; + uint64_t blk_mask; + + /* + * This function shall not be called directly, and hence expects suitable + * parameters. + */ + g_assert(!bad_cache_params(blksize, assoc, cachesize)); + + cache = g_new(Cache, 1); + cache->assoc = assoc; + cache->cachesize = cachesize; + cache->num_sets = cachesize / (blksize * assoc); + cache->sets = g_new(CacheSet, cache->num_sets); + cache->blksize_shift = pow_of_two(blksize); + cache->accesses = 0; + cache->misses = 0; + + for (i = 0; i < cache->num_sets; i++) { + cache->sets[i].blocks = g_new0(CacheBlock, assoc); + } + + blk_mask = blksize - 1; + cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift); + cache->tag_mask = ~(cache->set_mask | blk_mask); + + if (metadata_init) { + metadata_init(cache); + } + + return cache; +} + +static Cache **caches_init(int blksize, int assoc, int cachesize) +{ + Cache **caches; + int i; + + if (bad_cache_params(blksize, assoc, cachesize)) { + return NULL; + } + + caches = g_new(Cache *, cores); + + for (i = 0; i < cores; i++) { + caches[i] = cache_init(blksize, assoc, cachesize); + } + + return caches; +} + +static int get_invalid_block(Cache *cache, uint64_t set) +{ + int i; + + for (i = 0; i < cache->assoc; i++) { + if (!cache->sets[set].blocks[i].valid) { + return i; + } + } + + return -1; +} + +static int get_replaced_block(Cache *cache, int set) +{ + switch (policy) { + case RAND: + return g_rand_int_range(rng, 0, cache->assoc); + case LRU: + return lru_get_lru_block(cache, set); + case FIFO: + return fifo_get_first_block(cache, set); + default: + g_assert_not_reached(); + } +} + +static int in_cache(Cache *cache, uint64_t addr) +{ + int i; + uint64_t tag, set; + + tag = extract_tag(cache, addr); + set = extract_set(cache, addr); + + for (i = 0; i < cache->assoc; i++) { + if (cache->sets[set].blocks[i].tag == tag && + cache->sets[set].blocks[i].valid) { + return i; + } + } + + return -1; +} + +/** + * access_cache(): Simulate a cache access + * @cache: The cache under simulation + * @addr: The address of the requested memory location + * + * Returns true if the requsted data is hit in the cache and false when missed. + * The cache is updated on miss for the next access. + */ +static bool access_cache(Cache *cache, uint64_t addr) +{ + int hit_blk, replaced_blk; + uint64_t tag, set; + + tag = extract_tag(cache, addr); + set = extract_set(cache, addr); + + hit_blk = in_cache(cache, addr); + if (hit_blk != -1) { + if (update_hit) { + update_hit(cache, set, hit_blk); + } + return true; + } + + replaced_blk = get_invalid_block(cache, set); + + if (replaced_blk == -1) { + replaced_blk = get_replaced_block(cache, set); + } + + if (update_miss) { + update_miss(cache, set, replaced_blk); + } + + cache->sets[set].blocks[replaced_blk].tag = tag; + cache->sets[set].blocks[replaced_blk].valid = true; + + return false; +} + +static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info, + uint64_t vaddr, void *userdata) +{ + uint64_t effective_addr; + struct qemu_plugin_hwaddr *hwaddr; + int cache_idx; + InsnData *insn; + bool hit_in_l1; + + hwaddr = qemu_plugin_get_hwaddr(info, vaddr); + if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) { + return; + } + + effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr; + cache_idx = vcpu_index % cores; + + g_mutex_lock(&l1_dcache_locks[cache_idx]); + hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr); + if (!hit_in_l1) { + insn = (InsnData *) userdata; + __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST); + l1_dcaches[cache_idx]->misses++; + } + l1_dcaches[cache_idx]->accesses++; + g_mutex_unlock(&l1_dcache_locks[cache_idx]); + + if (hit_in_l1 || !use_l2) { + /* No need to access L2 */ + return; + } + + g_mutex_lock(&l2_ucache_locks[cache_idx]); + if (!access_cache(l2_ucaches[cache_idx], effective_addr)) { + insn = (InsnData *) userdata; + __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST); + l2_ucaches[cache_idx]->misses++; + } + l2_ucaches[cache_idx]->accesses++; + g_mutex_unlock(&l2_ucache_locks[cache_idx]); +} + +static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata) +{ + uint64_t insn_addr; + InsnData *insn; + int cache_idx; + bool hit_in_l1; + + insn_addr = ((InsnData *) userdata)->addr; + + cache_idx = vcpu_index % cores; + g_mutex_lock(&l1_icache_locks[cache_idx]); + hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr); + if (!hit_in_l1) { + insn = (InsnData *) userdata; + __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST); + l1_icaches[cache_idx]->misses++; + } + l1_icaches[cache_idx]->accesses++; + g_mutex_unlock(&l1_icache_locks[cache_idx]); + + if (hit_in_l1 || !use_l2) { + /* No need to access L2 */ + return; + } + + g_mutex_lock(&l2_ucache_locks[cache_idx]); + if (!access_cache(l2_ucaches[cache_idx], insn_addr)) { + insn = (InsnData *) userdata; + __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST); + l2_ucaches[cache_idx]->misses++; + } + l2_ucaches[cache_idx]->accesses++; + g_mutex_unlock(&l2_ucache_locks[cache_idx]); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n_insns; + size_t i; + InsnData *data; + + n_insns = qemu_plugin_tb_n_insns(tb); + for (i = 0; i < n_insns; i++) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); + uint64_t effective_addr; + + if (sys) { + effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn); + } else { + effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn); + } + + /* + * Instructions might get translated multiple times, we do not create + * new entries for those instructions. Instead, we fetch the same + * entry from the hash table and register it for the callback again. + */ + g_mutex_lock(&hashtable_lock); + data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr)); + if (data == NULL) { + data = g_new0(InsnData, 1); + data->disas_str = qemu_plugin_insn_disas(insn); + data->symbol = qemu_plugin_insn_symbol(insn); + data->addr = effective_addr; + g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr), + (gpointer) data); + } + g_mutex_unlock(&hashtable_lock); + + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access, + QEMU_PLUGIN_CB_NO_REGS, + rw, data); + + qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, + QEMU_PLUGIN_CB_NO_REGS, data); + } +} + +static void insn_free(gpointer data) +{ + InsnData *insn = (InsnData *) data; + g_free(insn->disas_str); + g_free(insn); +} + +static void cache_free(Cache *cache) +{ + for (int i = 0; i < cache->num_sets; i++) { + g_free(cache->sets[i].blocks); + } + + if (metadata_destroy) { + metadata_destroy(cache); + } + + g_free(cache->sets); + g_free(cache); +} + +static void caches_free(Cache **caches) +{ + int i; + + for (i = 0; i < cores; i++) { + cache_free(caches[i]); + } +} + +static void append_stats_line(GString *line, uint64_t l1_daccess, + uint64_t l1_dmisses, uint64_t l1_iaccess, + uint64_t l1_imisses, uint64_t l2_access, + uint64_t l2_misses) +{ + double l1_dmiss_rate, l1_imiss_rate, l2_miss_rate; + + l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0; + l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0; + + g_string_append_printf(line, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu" + " %9.4lf%%", + l1_daccess, + l1_dmisses, + l1_daccess ? l1_dmiss_rate : 0.0, + l1_iaccess, + l1_imisses, + l1_iaccess ? l1_imiss_rate : 0.0); + + if (use_l2) { + l2_miss_rate = ((double) l2_misses) / (l2_access) * 100.0; + g_string_append_printf(line, " %-12lu %-11lu %10.4lf%%", + l2_access, + l2_misses, + l2_access ? l2_miss_rate : 0.0); + } + + g_string_append(line, "\n"); +} + +static void sum_stats(void) +{ + int i; + + g_assert(cores > 1); + for (i = 0; i < cores; i++) { + l1_imisses += l1_icaches[i]->misses; + l1_dmisses += l1_dcaches[i]->misses; + l1_imem_accesses += l1_icaches[i]->accesses; + l1_dmem_accesses += l1_dcaches[i]->accesses; + + if (use_l2) { + l2_misses += l2_ucaches[i]->misses; + l2_mem_accesses += l2_ucaches[i]->accesses; + } + } +} + +static int dcmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1; +} + +static int icmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1; +} + +static int l2_cmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1; +} + +static void log_stats(void) +{ + int i; + Cache *icache, *dcache, *l2_cache; + + g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses," + " dmiss rate, insn accesses," + " insn misses, imiss rate"); + + if (use_l2) { + g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate"); + } + + g_string_append(rep, "\n"); + + for (i = 0; i < cores; i++) { + g_string_append_printf(rep, "%-8d", i); + dcache = l1_dcaches[i]; + icache = l1_icaches[i]; + l2_cache = use_l2 ? l2_ucaches[i] : NULL; + append_stats_line(rep, dcache->accesses, dcache->misses, + icache->accesses, icache->misses, + l2_cache ? l2_cache->accesses : 0, + l2_cache ? l2_cache->misses : 0); + } + + if (cores > 1) { + sum_stats(); + g_string_append_printf(rep, "%-8s", "sum"); + append_stats_line(rep, l1_dmem_accesses, l1_dmisses, + l1_imem_accesses, l1_imisses, + l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0); + } + + g_string_append(rep, "\n"); + qemu_plugin_outs(rep->str); +} + +static void log_top_insns(void) +{ + int i; + GList *curr, *miss_insns; + InsnData *insn; + + miss_insns = g_hash_table_get_values(miss_ht); + miss_insns = g_list_sort(miss_insns, dcmp); + g_autoptr(GString) rep = g_string_new(""); + g_string_append_printf(rep, "%s", "address, data misses, instruction\n"); + + for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { + insn = (InsnData *) curr->data; + g_string_append_printf(rep, "0x%" PRIx64, insn->addr); + if (insn->symbol) { + g_string_append_printf(rep, " (%s)", insn->symbol); + } + g_string_append_printf(rep, ", %ld, %s\n", insn->l1_dmisses, + insn->disas_str); + } + + miss_insns = g_list_sort(miss_insns, icmp); + g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n"); + + for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { + insn = (InsnData *) curr->data; + g_string_append_printf(rep, "0x%" PRIx64, insn->addr); + if (insn->symbol) { + g_string_append_printf(rep, " (%s)", insn->symbol); + } + g_string_append_printf(rep, ", %ld, %s\n", insn->l1_imisses, + insn->disas_str); + } + + if (!use_l2) { + goto finish; + } + + miss_insns = g_list_sort(miss_insns, l2_cmp); + g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n"); + + for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { + insn = (InsnData *) curr->data; + g_string_append_printf(rep, "0x%" PRIx64, insn->addr); + if (insn->symbol) { + g_string_append_printf(rep, " (%s)", insn->symbol); + } + g_string_append_printf(rep, ", %ld, %s\n", insn->l2_misses, + insn->disas_str); + } + +finish: + qemu_plugin_outs(rep->str); + g_list_free(miss_insns); +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + log_stats(); + log_top_insns(); + + caches_free(l1_dcaches); + caches_free(l1_icaches); + + g_free(l1_dcache_locks); + g_free(l1_icache_locks); + + if (use_l2) { + caches_free(l2_ucaches); + g_free(l2_ucache_locks); + } + + g_hash_table_destroy(miss_ht); +} + +static void policy_init(void) +{ + switch (policy) { + case LRU: + update_hit = lru_update_blk; + update_miss = lru_update_blk; + metadata_init = lru_priorities_init; + metadata_destroy = lru_priorities_destroy; + break; + case FIFO: + update_miss = fifo_update_on_miss; + metadata_init = fifo_init; + metadata_destroy = fifo_destroy; + break; + case RAND: + rng = g_rand_new(); + break; + default: + g_assert_not_reached(); + } +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + int i; + int l1_iassoc, l1_iblksize, l1_icachesize; + int l1_dassoc, l1_dblksize, l1_dcachesize; + int l2_assoc, l2_blksize, l2_cachesize; + + limit = 32; + sys = info->system_emulation; + + l1_dassoc = 8; + l1_dblksize = 64; + l1_dcachesize = l1_dblksize * l1_dassoc * 32; + + l1_iassoc = 8; + l1_iblksize = 64; + l1_icachesize = l1_iblksize * l1_iassoc * 32; + + l2_assoc = 16; + l2_blksize = 64; + l2_cachesize = l2_assoc * l2_blksize * 2048; + + policy = LRU; + + cores = sys ? qemu_plugin_n_vcpus() : 1; + + for (i = 0; i < argc; i++) { + char *opt = argv[i]; + g_autofree char **tokens = g_strsplit(opt, "=", 2); + + if (g_strcmp0(tokens[0], "iblksize") == 0) { + l1_iblksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "iassoc") == 0) { + l1_iassoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "icachesize") == 0) { + l1_icachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dblksize") == 0) { + l1_dblksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dassoc") == 0) { + l1_dassoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dcachesize") == 0) { + l1_dcachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "limit") == 0) { + limit = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "cores") == 0) { + cores = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) { + use_l2 = true; + l2_cachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2blksize") == 0) { + use_l2 = true; + l2_blksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2assoc") == 0) { + use_l2 = true; + l2_assoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "evict") == 0) { + if (g_strcmp0(tokens[1], "rand") == 0) { + policy = RAND; + } else if (g_strcmp0(tokens[1], "lru") == 0) { + policy = LRU; + } else if (g_strcmp0(tokens[1], "fifo") == 0) { + policy = FIFO; + } else { + fprintf(stderr, "invalid eviction policy: %s\n", opt); + return -1; + } + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + policy_init(); + + l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize); + if (!l1_dcaches) { + const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize); + fprintf(stderr, "dcache cannot be constructed from given parameters\n"); + fprintf(stderr, "%s\n", err); + return -1; + } + + l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize); + if (!l1_icaches) { + const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize); + fprintf(stderr, "icache cannot be constructed from given parameters\n"); + fprintf(stderr, "%s\n", err); + return -1; + } + + l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL; + if (!l2_ucaches && use_l2) { + const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize); + fprintf(stderr, "L2 cache cannot be constructed from given parameters\n"); + fprintf(stderr, "%s\n", err); + return -1; + } + + l1_dcache_locks = g_new0(GMutex, cores); + l1_icache_locks = g_new0(GMutex, cores); + l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL; + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + + miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free); + + return 0; +} diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c new file mode 100644 index 00000000000..a5275dcc15c --- /dev/null +++ b/contrib/plugins/execlog.c @@ -0,0 +1,153 @@ +/* + * Copyright (C) 2021, Alexandre Iooss + * + * Log instruction execution with memory access. + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include +#include +#include +#include +#include +#include + +#include + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +/* Store last executed instruction on each vCPU as a GString */ +GArray *last_exec; + +/** + * Add memory read or write information to current instruction log + */ +static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info, + uint64_t vaddr, void *udata) +{ + GString *s; + + /* Find vCPU in array */ + g_assert(cpu_index < last_exec->len); + s = g_array_index(last_exec, GString *, cpu_index); + + /* Indicate type of memory access */ + if (qemu_plugin_mem_is_store(info)) { + g_string_append(s, ", store"); + } else { + g_string_append(s, ", load"); + } + + /* If full system emulation log physical address and device name */ + struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr); + if (hwaddr) { + uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr); + const char *name = qemu_plugin_hwaddr_device_name(hwaddr); + g_string_append_printf(s, ", 0x%08"PRIx64", %s", addr, name); + } else { + g_string_append_printf(s, ", 0x%08"PRIx64, vaddr); + } +} + +/** + * Log instruction execution + */ +static void vcpu_insn_exec(unsigned int cpu_index, void *udata) +{ + GString *s; + + /* Find or create vCPU in array */ + while (cpu_index >= last_exec->len) { + s = g_string_new(NULL); + g_array_append_val(last_exec, s); + } + s = g_array_index(last_exec, GString *, cpu_index); + + /* Print previous instruction in cache */ + if (s->len) { + qemu_plugin_outs(s->str); + qemu_plugin_outs("\n"); + } + + /* Store new instruction in cache */ + /* vcpu_mem will add memory access information to last_exec */ + g_string_printf(s, "%u, ", cpu_index); + g_string_append(s, (char *)udata); +} + +/** + * On translation block new translation + * + * QEMU convert code by translation block (TB). By hooking here we can then hook + * a callback on each instruction and memory access. + */ +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + struct qemu_plugin_insn *insn; + uint64_t insn_vaddr; + uint32_t insn_opcode; + char *insn_disas; + + size_t n = qemu_plugin_tb_n_insns(tb); + for (size_t i = 0; i < n; i++) { + /* + * `insn` is shared between translations in QEMU, copy needed data here. + * `output` is never freed as it might be used multiple times during + * the emulation lifetime. + * We only consider the first 32 bits of the instruction, this may be + * a limitation for CISC architectures. + */ + insn = qemu_plugin_tb_get_insn(tb, i); + insn_vaddr = qemu_plugin_insn_vaddr(insn); + insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn)); + insn_disas = qemu_plugin_insn_disas(insn); + char *output = g_strdup_printf("0x%"PRIx64", 0x%"PRIx32", \"%s\"", + insn_vaddr, insn_opcode, insn_disas); + + /* Register callback on memory read or write */ + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem, + QEMU_PLUGIN_CB_NO_REGS, + QEMU_PLUGIN_MEM_RW, NULL); + + /* Register callback on instruction */ + qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, + QEMU_PLUGIN_CB_NO_REGS, output); + } +} + +/** + * On plugin exit, print last instruction in cache + */ +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + guint i; + GString *s; + for (i = 0; i < last_exec->len; i++) { + s = g_array_index(last_exec, GString *, i); + if (s->str) { + qemu_plugin_outs(s->str); + qemu_plugin_outs("\n"); + } + } +} + +/** + * Install the plugin + */ +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, int argc, + char **argv) +{ + /* + * Initialize dynamic array to cache vCPU instruction. In user mode + * we don't know the size before emulation. + */ + last_exec = g_array_new(FALSE, FALSE, sizeof(GString *)); + + /* Register translation block and exit callbacks */ + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + + return 0; +} diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c index 4b083401432..062200a7a42 100644 --- a/contrib/plugins/hotblocks.c +++ b/contrib/plugins/hotblocks.c @@ -133,8 +133,18 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc, char **argv) { - if (argc && strcmp(argv[0], "inline") == 0) { - do_inline = true; + for (int i = 0; i < argc; i++) { + char *opt = argv[i]; + g_autofree char **tokens = g_strsplit(opt, "=", 2); + if (g_strcmp0(tokens[0], "inline") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } } plugin_init(); diff --git a/contrib/plugins/hotpages.c b/contrib/plugins/hotpages.c index bf532675328..0d12910af69 100644 --- a/contrib/plugins/hotpages.c +++ b/contrib/plugins/hotpages.c @@ -169,16 +169,26 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, for (i = 0; i < argc; i++) { char *opt = argv[i]; - if (g_strcmp0(opt, "reads") == 0) { - sort_by = SORT_R; - } else if (g_strcmp0(opt, "writes") == 0) { - sort_by = SORT_W; - } else if (g_strcmp0(opt, "address") == 0) { - sort_by = SORT_A; - } else if (g_strcmp0(opt, "io") == 0) { - track_io = true; - } else if (g_str_has_prefix(opt, "pagesize=")) { - page_size = g_ascii_strtoull(opt + 9, NULL, 10); + g_autofree char **tokens = g_strsplit(opt, "=", -1); + + if (g_strcmp0(tokens[0], "sortby") == 0) { + if (g_strcmp0(tokens[1], "reads") == 0) { + sort_by = SORT_R; + } else if (g_strcmp0(tokens[1], "writes") == 0) { + sort_by = SORT_W; + } else if (g_strcmp0(tokens[1], "address") == 0) { + sort_by = SORT_A; + } else { + fprintf(stderr, "invalid value to sortby: %s\n", tokens[1]); + return -1; + } + } else if (g_strcmp0(tokens[0], "io") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &track_io)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "pagesize") == 0) { + page_size = g_ascii_strtoull(tokens[1], NULL, 10); } else { fprintf(stderr, "option parsing failed: %s\n", opt); return -1; diff --git a/contrib/plugins/howvec.c b/contrib/plugins/howvec.c index 600f7facc1e..4a5ec3d936a 100644 --- a/contrib/plugins/howvec.c +++ b/contrib/plugins/howvec.c @@ -333,23 +333,34 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, for (i = 0; i < argc; i++) { char *p = argv[i]; - if (strcmp(p, "inline") == 0) { - do_inline = true; - } else if (strcmp(p, "verbose") == 0) { - verbose = true; - } else { + g_autofree char **tokens = g_strsplit(p, "=", -1); + if (g_strcmp0(tokens[0], "inline") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", p); + return -1; + } + } else if (g_strcmp0(tokens[0], "verbose") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", p); + return -1; + } + } else if (g_strcmp0(tokens[0], "count") == 0) { + char *value = tokens[1]; int j; CountType type = COUNT_INDIVIDUAL; - if (*p == '!') { + if (*value == '!') { type = COUNT_NONE; - p++; + value++; } for (j = 0; j < class_table_sz; j++) { - if (strcmp(p, class_table[j].opt) == 0) { + if (strcmp(value, class_table[j].opt) == 0) { class_table[j].what = type; break; } } + } else { + fprintf(stderr, "option parsing failed: %s\n", p); + return -1; } } diff --git a/contrib/plugins/hwprofile.c b/contrib/plugins/hwprofile.c index faf216ac002..691d4edb0c6 100644 --- a/contrib/plugins/hwprofile.c +++ b/contrib/plugins/hwprofile.c @@ -259,27 +259,42 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc, char **argv) { int i; + g_autoptr(GString) matches_raw = g_string_new(""); for (i = 0; i < argc; i++) { char *opt = argv[i]; - if (g_strcmp0(opt, "read") == 0) { - rw = QEMU_PLUGIN_MEM_R; - } else if (g_strcmp0(opt, "write") == 0) { - rw = QEMU_PLUGIN_MEM_W; - } else if (g_strcmp0(opt, "pattern") == 0) { - pattern = true; - } else if (g_strcmp0(opt, "source") == 0) { - source = true; - } else if (g_str_has_prefix(opt, "match")) { - gchar **parts = g_strsplit(opt, "=", 2); + g_autofree char **tokens = g_strsplit(opt, "=", 2); + + if (g_strcmp0(tokens[0], "track") == 0) { + if (g_strcmp0(tokens[1], "read") == 0) { + rw = QEMU_PLUGIN_MEM_R; + } else if (g_strcmp0(tokens[1], "write") == 0) { + rw = QEMU_PLUGIN_MEM_W; + } else { + fprintf(stderr, "invalid value for track: %s\n", tokens[1]); + return -1; + } + } else if (g_strcmp0(tokens[0], "pattern") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &pattern)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "source") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &source)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "match") == 0) { check_match = true; - matches = g_strsplit(parts[1], ",", -1); - g_strfreev(parts); + g_string_append_printf(matches_raw, "%s,", tokens[1]); } else { fprintf(stderr, "option parsing failed: %s\n", opt); return -1; } } + if (check_match) { + matches = g_strsplit(matches_raw->str, ",", -1); + } if (source && pattern) { fprintf(stderr, "can only currently track either source or pattern.\n"); diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c index 7fd35eb6692..a41ffe83fa6 100644 --- a/contrib/plugins/lockstep.c +++ b/contrib/plugins/lockstep.c @@ -319,22 +319,35 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, int argc, char **argv) { int i; - - if (!argc || !argv[0]) { - qemu_plugin_outs("Need a socket path to talk to other instance."); - return -1; - } + g_autofree char *sock_path = NULL; for (i = 0; i < argc; i++) { char *p = argv[i]; - if (strcmp(p, "verbose") == 0) { - verbose = true; - } else if (!setup_unix_socket(argv[0])) { - qemu_plugin_outs("Failed to setup socket for communications."); + g_autofree char **tokens = g_strsplit(p, "=", 2); + + if (g_strcmp0(tokens[0], "verbose") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", p); + return -1; + } + } else if (g_strcmp0(tokens[0], "sockpath") == 0) { + sock_path = tokens[1]; + } else { + fprintf(stderr, "option parsing failed: %s\n", p); return -1; } } + if (sock_path == NULL) { + fprintf(stderr, "Need a socket path to talk to other instance.\n"); + return -1; + } + + if (!setup_unix_socket(sock_path)) { + fprintf(stderr, "Failed to setup socket for communications.\n"); + return -1; + } + our_id = id; qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); diff --git a/contrib/vhost-user-gpu/meson.build b/contrib/vhost-user-gpu/meson.build index 0ce1515a10e..92c8f3a86a7 100644 --- a/contrib/vhost-user-gpu/meson.build +++ b/contrib/vhost-user-gpu/meson.build @@ -1,6 +1,5 @@ -if 'CONFIG_TOOLS' in config_host and 'CONFIG_VIRGL' in config_host \ - and 'CONFIG_GBM' in config_host and 'CONFIG_LINUX' in config_host \ - and pixman.found() +if 'CONFIG_TOOLS' in config_host and virgl.found() and gbm.found() \ + and 'CONFIG_LINUX' in config_host and pixman.found() executable('vhost-user-gpu', files('vhost-user-gpu.c', 'virgl.c', 'vugbm.c'), dependencies: [qemuutil, pixman, gbm, virgl, vhost_user, opengl], install: true, diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c index f73f292c9f7..611360e6b47 100644 --- a/contrib/vhost-user-gpu/vhost-user-gpu.c +++ b/contrib/vhost-user-gpu/vhost-user-gpu.c @@ -49,6 +49,8 @@ static char *opt_render_node; static gboolean opt_virgl; static void vg_handle_ctrl(VuDev *dev, int qidx); +static void vg_cleanup_mapping(VuGpu *g, + struct virtio_gpu_simple_resource *res); static const char * vg_cmd_to_string(int cmd) @@ -348,6 +350,7 @@ vg_resource_create_2d(VuGpu *g, if (!res->image) { g_critical("%s: resource creation failed %d %d %d", __func__, c2d.resource_id, c2d.width, c2d.height); + vugbm_buffer_destroy(&res->buffer); g_free(res); cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; return; @@ -399,6 +402,7 @@ vg_resource_destroy(VuGpu *g, } vugbm_buffer_destroy(&res->buffer); + vg_cleanup_mapping(g, res); pixman_image_unref(res->image); QTAILQ_REMOVE(&g->reslist, res, next); g_free(res); @@ -488,6 +492,11 @@ vg_resource_attach_backing(VuGpu *g, return; } + if (res->iov) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } + ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); if (ret != 0) { cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; @@ -497,6 +506,22 @@ vg_resource_attach_backing(VuGpu *g, res->iov_cnt = ab.nr_entries; } +/* Though currently only free iov, maybe later will do more work. */ +void vg_cleanup_mapping_iov(VuGpu *g, + struct iovec *iov, uint32_t count) +{ + g_free(iov); +} + +static void +vg_cleanup_mapping(VuGpu *g, + struct virtio_gpu_simple_resource *res) +{ + vg_cleanup_mapping_iov(g, res->iov, res->iov_cnt); + res->iov = NULL; + res->iov_cnt = 0; +} + static void vg_resource_detach_backing(VuGpu *g, struct virtio_gpu_ctrl_command *cmd) @@ -515,9 +540,7 @@ vg_resource_detach_backing(VuGpu *g, return; } - g_free(res->iov); - res->iov = NULL; - res->iov_cnt = 0; + vg_cleanup_mapping(g, res); } static void diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c index 9e6660c7ab8..3e45e1bd336 100644 --- a/contrib/vhost-user-gpu/virgl.c +++ b/contrib/vhost-user-gpu/virgl.c @@ -108,9 +108,17 @@ virgl_cmd_resource_unref(VuGpu *g, struct virtio_gpu_ctrl_command *cmd) { struct virtio_gpu_resource_unref unref; + struct iovec *res_iovs = NULL; + int num_iovs = 0; VUGPU_FILL_CMD(unref); + virgl_renderer_resource_detach_iov(unref.resource_id, + &res_iovs, + &num_iovs); + if (res_iovs != NULL && num_iovs != 0) { + vg_cleanup_mapping_iov(g, res_iovs, num_iovs); + } virgl_renderer_resource_unref(unref.resource_id); } @@ -128,6 +136,7 @@ virgl_cmd_get_capset_info(VuGpu *g, VUGPU_FILL_CMD(info); + memset(&resp, 0, sizeof(resp)); if (info.capset_index == 0) { resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL; virgl_renderer_get_cap_set(resp.capset_id, @@ -169,6 +178,10 @@ virgl_cmd_get_capset(VuGpu *g, virgl_renderer_get_cap_set(gc.capset_id, &max_ver, &max_size); + if (!max_size) { + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } resp = g_malloc0(sizeof(*resp) + max_size); resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET; @@ -279,8 +292,11 @@ virgl_resource_attach_backing(VuGpu *g, return; } - virgl_renderer_resource_attach_iov(att_rb.resource_id, + ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, res_iovs, att_rb.nr_entries); + if (ret != 0) { + vg_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries); + } } static void @@ -299,7 +315,7 @@ virgl_resource_detach_backing(VuGpu *g, if (res_iovs == NULL || num_iovs == 0) { return; } - g_free(res_iovs); + vg_cleanup_mapping_iov(g, res_iovs, num_iovs); } static void diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h index 04d56158123..e2864bba68e 100644 --- a/contrib/vhost-user-gpu/vugpu.h +++ b/contrib/vhost-user-gpu/vugpu.h @@ -169,7 +169,7 @@ int vg_create_mapping_iov(VuGpu *g, struct virtio_gpu_resource_attach_backing *ab, struct virtio_gpu_ctrl_command *cmd, struct iovec **iov); - +void vg_cleanup_mapping_iov(VuGpu *g, struct iovec *iov, uint32_t count); void vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd); void vg_wait_ok(VuGpu *g); diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c index c15d18c33f0..081230da548 100644 --- a/contrib/vhost-user-input/main.c +++ b/contrib/vhost-user-input/main.c @@ -6,12 +6,13 @@ #include "qemu/osdep.h" -#include +#include #include "qemu/iov.h" #include "qemu/bswap.h" #include "qemu/sockets.h" #include "libvhost-user-glib.h" +#include "standard-headers/linux/input.h" #include "standard-headers/linux/virtio_input.h" #include "qapi/error.h" @@ -113,13 +114,16 @@ vi_evdev_watch(VuDev *dev, int condition, void *data) static void vi_handle_status(VuInput *vi, virtio_input_event *event) { struct input_event evdev; + struct timeval tval; int rc; - if (gettimeofday(&evdev.time, NULL)) { + if (gettimeofday(&tval, NULL)) { perror("vi_handle_status: gettimeofday"); return; } + evdev.input_event_sec = tval.tv_sec; + evdev.input_event_usec = tval.tv_usec; evdev.type = le16toh(event->type); evdev.code = le16toh(event->code); evdev.value = le32toh(event->value); diff --git a/cpu.c b/cpu.c index bfbe5a66f95..9bce67ef556 100644 --- a/cpu.c +++ b/cpu.c @@ -29,6 +29,7 @@ #ifdef CONFIG_USER_ONLY #include "qemu.h" #else +#include "hw/core/sysemu-cpu-ops.h" #include "exec/address-spaces.h" #endif #include "sysemu/tcg.h" @@ -36,6 +37,8 @@ #include "sysemu/replay.h" #include "exec/translate-all.h" #include "exec/log.h" +#include "hw/core/accel-cpu.h" +#include "trace/trace-root.h" uintptr_t qemu_host_page_size; intptr_t qemu_host_page_mask; @@ -126,10 +129,14 @@ const VMStateDescription vmstate_cpu_common = { void cpu_exec_realizefn(CPUState *cpu, Error **errp) { +#ifndef CONFIG_USER_ONLY CPUClass *cc = CPU_GET_CLASS(cpu); +#endif cpu_list_add(cpu); - + if (!accel_cpu_realizefn(cpu, errp)) { + return; + } #ifdef CONFIG_TCG /* NB: errp parameter is unused currently */ if (tcg_enabled()) { @@ -138,26 +145,25 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) #endif /* CONFIG_TCG */ #ifdef CONFIG_USER_ONLY - assert(cc->vmsd == NULL); + assert(qdev_get_vmsd(DEVICE(cpu)) == NULL || + qdev_get_vmsd(DEVICE(cpu))->unmigratable); #else if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu); } - if (cc->vmsd != NULL) { - vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu); + if (cc->sysemu_ops->legacy_vmsd != NULL) { + vmstate_register(NULL, cpu->cpu_index, cc->sysemu_ops->legacy_vmsd, cpu); } #endif /* CONFIG_USER_ONLY */ } void cpu_exec_unrealizefn(CPUState *cpu) { +#ifndef CONFIG_USER_ONLY CPUClass *cc = CPU_GET_CLASS(cpu); -#ifdef CONFIG_USER_ONLY - assert(cc->vmsd == NULL); -#else - if (cc->vmsd != NULL) { - vmstate_unregister(NULL, cc->vmsd, cpu); + if (cc->sysemu_ops->legacy_vmsd != NULL) { + vmstate_unregister(NULL, cc->sysemu_ops->legacy_vmsd, cpu); } if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { vmstate_unregister(NULL, &vmstate_cpu_common, cpu); @@ -173,6 +179,27 @@ void cpu_exec_unrealizefn(CPUState *cpu) cpu_list_remove(cpu); } +static Property cpu_common_props[] = { +#ifndef CONFIG_USER_ONLY + /* + * Create a memory property for softmmu CPU object, + * so users can wire up its memory. (This can't go in hw/core/cpu.c + * because that file is compiled only once for both user-mode + * and system builds.) The default if no link is set up is to use + * the system address space. + */ + DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION, + MemoryRegion *), +#endif + DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false), + DEFINE_PROP_END_OF_LIST(), +}; + +void cpu_class_init_props(DeviceClass *dc) +{ + device_class_set_props(dc, cpu_common_props); +} + void cpu_exec_initfn(CPUState *cpu) { cpu->as = NULL; @@ -219,11 +246,6 @@ void tb_invalidate_phys_addr(target_ulong addr) tb_invalidate_phys_page_range(addr, addr + 1); mmap_unlock(); } - -static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) -{ - tb_invalidate_phys_addr(pc); -} #else void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) { @@ -244,25 +266,19 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) ram_addr = memory_region_get_ram_addr(mr) + addr; tb_invalidate_phys_page_range(ram_addr, ram_addr + 1); } - -static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) -{ - /* - * There may not be a virtual to physical translation for the pc - * right now, but there may exist cached TB for this pc. - * Flush the whole TB cache to force re-translation of such TBs. - * This is heavyweight, but we're debugging anyway. - */ - tb_flush(cpu); -} #endif /* Add a breakpoint. */ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags, CPUBreakpoint **breakpoint) { + CPUClass *cc = CPU_GET_CLASS(cpu); CPUBreakpoint *bp; + if (cc->gdb_adjust_breakpoint) { + pc = cc->gdb_adjust_breakpoint(cpu, pc); + } + bp = g_malloc(sizeof(*bp)); bp->pc = pc; @@ -275,19 +291,24 @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags, QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry); } - breakpoint_invalidate(cpu, pc); - if (breakpoint) { *breakpoint = bp; } + + trace_breakpoint_insert(cpu->cpu_index, pc, flags); return 0; } /* Remove a specific breakpoint. */ int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags) { + CPUClass *cc = CPU_GET_CLASS(cpu); CPUBreakpoint *bp; + if (cc->gdb_adjust_breakpoint) { + pc = cc->gdb_adjust_breakpoint(cpu, pc); + } + QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { if (bp->pc == pc && bp->flags == flags) { cpu_breakpoint_remove_by_ref(cpu, bp); @@ -298,13 +319,12 @@ int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags) } /* Remove a specific breakpoint by reference. */ -void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint) +void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *bp) { - QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry); + QTAILQ_REMOVE(&cpu->breakpoints, bp, entry); - breakpoint_invalidate(cpu, breakpoint->pc); - - g_free(breakpoint); + trace_breakpoint_remove(cpu->cpu_index, bp->pc, bp->flags); + g_free(bp); } /* Remove all matching breakpoints. */ @@ -327,11 +347,8 @@ void cpu_single_step(CPUState *cpu, int enabled) cpu->singlestep_enabled = enabled; if (kvm_enabled()) { kvm_update_guest_debug(cpu, 0); - } else { - /* must flush all the translated code to avoid inconsistencies */ - /* XXX: only flush what is necessary */ - tb_flush(cpu); } + trace_breakpoint_singlestep(cpu->cpu_index, enabled); } } diff --git a/crypto/cipher-builtin.c.inc b/crypto/cipher-builtin.c.inc index 7597cf4a10f..b409089095c 100644 --- a/crypto/cipher-builtin.c.inc +++ b/crypto/cipher-builtin.c.inc @@ -19,8 +19,6 @@ */ #include "crypto/aes.h" -#include "crypto/desrfb.h" -#include "crypto/xts.h" typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext; struct QCryptoCipherBuiltinAESContext { @@ -32,7 +30,6 @@ typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES; struct QCryptoCipherBuiltinAES { QCryptoCipher base; QCryptoCipherBuiltinAESContext key; - QCryptoCipherBuiltinAESContext key_tweak; uint8_t iv[AES_BLOCK_SIZE]; }; @@ -194,39 +191,6 @@ static int qcrypto_cipher_aes_decrypt_cbc(QCryptoCipher *cipher, return 0; } -static int qcrypto_cipher_aes_encrypt_xts(QCryptoCipher *cipher, - const void *in, void *out, - size_t len, Error **errp) -{ - QCryptoCipherBuiltinAES *ctx - = container_of(cipher, QCryptoCipherBuiltinAES, base); - - if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { - return -1; - } - xts_encrypt(&ctx->key, &ctx->key_tweak, - do_aes_encrypt_ecb, do_aes_decrypt_ecb, - ctx->iv, len, out, in); - return 0; -} - -static int qcrypto_cipher_aes_decrypt_xts(QCryptoCipher *cipher, - const void *in, void *out, - size_t len, Error **errp) -{ - QCryptoCipherBuiltinAES *ctx - = container_of(cipher, QCryptoCipherBuiltinAES, base); - - if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) { - return -1; - } - xts_decrypt(&ctx->key, &ctx->key_tweak, - do_aes_encrypt_ecb, do_aes_decrypt_ecb, - ctx->iv, len, out, in); - return 0; -} - - static int qcrypto_cipher_aes_setiv(QCryptoCipher *cipher, const uint8_t *iv, size_t niv, Error **errp) { @@ -257,84 +221,16 @@ static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_cbc = { .cipher_free = qcrypto_cipher_ctx_free, }; -static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_xts = { - .cipher_encrypt = qcrypto_cipher_aes_encrypt_xts, - .cipher_decrypt = qcrypto_cipher_aes_decrypt_xts, - .cipher_setiv = qcrypto_cipher_aes_setiv, - .cipher_free = qcrypto_cipher_ctx_free, -}; - - -typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB; -struct QCryptoCipherBuiltinDESRFB { - QCryptoCipher base; - - /* C.f. alg_key_len[QCRYPTO_CIPHER_ALG_DES_RFB] */ - uint8_t key[8]; -}; - -static int qcrypto_cipher_encrypt_des_rfb(QCryptoCipher *cipher, - const void *in, void *out, - size_t len, Error **errp) -{ - QCryptoCipherBuiltinDESRFB *ctx - = container_of(cipher, QCryptoCipherBuiltinDESRFB, base); - size_t i; - - if (!qcrypto_length_check(len, 8, errp)) { - return -1; - } - - deskey(ctx->key, EN0); - - for (i = 0; i < len; i += 8) { - des((void *)in + i, out + i); - } - - return 0; -} - -static int qcrypto_cipher_decrypt_des_rfb(QCryptoCipher *cipher, - const void *in, void *out, - size_t len, Error **errp) -{ - QCryptoCipherBuiltinDESRFB *ctx - = container_of(cipher, QCryptoCipherBuiltinDESRFB, base); - size_t i; - - if (!qcrypto_length_check(len, 8, errp)) { - return -1; - } - - deskey(ctx->key, DE1); - - for (i = 0; i < len; i += 8) { - des((void *)in + i, out + i); - } - - return 0; -} - -static const struct QCryptoCipherDriver qcrypto_cipher_des_rfb_driver = { - .cipher_encrypt = qcrypto_cipher_encrypt_des_rfb, - .cipher_decrypt = qcrypto_cipher_decrypt_des_rfb, - .cipher_setiv = qcrypto_cipher_no_setiv, - .cipher_free = qcrypto_cipher_ctx_free, -}; - bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, QCryptoCipherMode mode) { switch (alg) { - case QCRYPTO_CIPHER_ALG_DES_RFB: - return mode == QCRYPTO_CIPHER_MODE_ECB; case QCRYPTO_CIPHER_ALG_AES_128: case QCRYPTO_CIPHER_ALG_AES_192: case QCRYPTO_CIPHER_ALG_AES_256: switch (mode) { case QCRYPTO_CIPHER_MODE_ECB: case QCRYPTO_CIPHER_MODE_CBC: - case QCRYPTO_CIPHER_MODE_XTS: return true; default: return false; @@ -356,18 +252,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, } switch (alg) { - case QCRYPTO_CIPHER_ALG_DES_RFB: - if (mode == QCRYPTO_CIPHER_MODE_ECB) { - QCryptoCipherBuiltinDESRFB *ctx; - - ctx = g_new0(QCryptoCipherBuiltinDESRFB, 1); - ctx->base.driver = &qcrypto_cipher_des_rfb_driver; - memcpy(ctx->key, key, sizeof(ctx->key)); - - return &ctx->base; - } - goto bad_mode; - case QCRYPTO_CIPHER_ALG_AES_128: case QCRYPTO_CIPHER_ALG_AES_192: case QCRYPTO_CIPHER_ALG_AES_256: @@ -382,9 +266,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, case QCRYPTO_CIPHER_MODE_CBC: drv = &qcrypto_cipher_aes_driver_cbc; break; - case QCRYPTO_CIPHER_MODE_XTS: - drv = &qcrypto_cipher_aes_driver_xts; - break; default: goto bad_mode; } @@ -392,19 +273,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, ctx = g_new0(QCryptoCipherBuiltinAES, 1); ctx->base.driver = drv; - if (mode == QCRYPTO_CIPHER_MODE_XTS) { - nkey /= 2; - if (AES_set_encrypt_key(key + nkey, nkey * 8, - &ctx->key_tweak.enc)) { - error_setg(errp, "Failed to set encryption key"); - goto error; - } - if (AES_set_decrypt_key(key + nkey, nkey * 8, - &ctx->key_tweak.dec)) { - error_setg(errp, "Failed to set decryption key"); - goto error; - } - } if (AES_set_encrypt_key(key, nkey * 8, &ctx->key.enc)) { error_setg(errp, "Failed to set encryption key"); goto error; diff --git a/crypto/cipher-gcrypt.c.inc b/crypto/cipher-gcrypt.c.inc index 42d4137534f..a6a0117717f 100644 --- a/crypto/cipher-gcrypt.c.inc +++ b/crypto/cipher-gcrypt.c.inc @@ -18,17 +18,13 @@ * */ -#ifdef CONFIG_QEMU_PRIVATE_XTS -#include "crypto/xts.h" -#endif - #include bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, QCryptoCipherMode mode) { switch (alg) { - case QCRYPTO_CIPHER_ALG_DES_RFB: + case QCRYPTO_CIPHER_ALG_DES: case QCRYPTO_CIPHER_ALG_3DES: case QCRYPTO_CIPHER_ALG_AES_128: case QCRYPTO_CIPHER_ALG_AES_192: @@ -59,10 +55,6 @@ typedef struct QCryptoCipherGcrypt { QCryptoCipher base; gcry_cipher_hd_t handle; size_t blocksize; -#ifdef CONFIG_QEMU_PRIVATE_XTS - gcry_cipher_hd_t tweakhandle; - uint8_t iv[XTS_BLOCK_SIZE]; -#endif } QCryptoCipherGcrypt; @@ -178,90 +170,6 @@ static const struct QCryptoCipherDriver qcrypto_gcrypt_ctr_driver = { .cipher_free = qcrypto_gcrypt_ctx_free, }; -#ifdef CONFIG_QEMU_PRIVATE_XTS -static void qcrypto_gcrypt_xts_ctx_free(QCryptoCipher *cipher) -{ - QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); - - gcry_cipher_close(ctx->tweakhandle); - qcrypto_gcrypt_ctx_free(cipher); -} - -static void qcrypto_gcrypt_xts_wrape(const void *ctx, size_t length, - uint8_t *dst, const uint8_t *src) -{ - gcry_error_t err; - err = gcry_cipher_encrypt((gcry_cipher_hd_t)ctx, dst, length, src, length); - g_assert(err == 0); -} - -static void qcrypto_gcrypt_xts_wrapd(const void *ctx, size_t length, - uint8_t *dst, const uint8_t *src) -{ - gcry_error_t err; - err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length); - g_assert(err == 0); -} - -static int qcrypto_gcrypt_xts_encrypt(QCryptoCipher *cipher, const void *in, - void *out, size_t len, Error **errp) -{ - QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); - - if (len & (ctx->blocksize - 1)) { - error_setg(errp, "Length %zu must be a multiple of block size %zu", - len, ctx->blocksize); - return -1; - } - - xts_encrypt(ctx->handle, ctx->tweakhandle, - qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd, - ctx->iv, len, out, in); - return 0; -} - -static int qcrypto_gcrypt_xts_decrypt(QCryptoCipher *cipher, const void *in, - void *out, size_t len, Error **errp) -{ - QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); - - if (len & (ctx->blocksize - 1)) { - error_setg(errp, "Length %zu must be a multiple of block size %zu", - len, ctx->blocksize); - return -1; - } - - xts_decrypt(ctx->handle, ctx->tweakhandle, - qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd, - ctx->iv, len, out, in); - return 0; -} - -static int qcrypto_gcrypt_xts_setiv(QCryptoCipher *cipher, - const uint8_t *iv, size_t niv, - Error **errp) -{ - QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base); - - if (niv != ctx->blocksize) { - error_setg(errp, "Expected IV size %zu not %zu", - ctx->blocksize, niv); - return -1; - } - - memcpy(ctx->iv, iv, niv); - return 0; -} - -static const struct QCryptoCipherDriver qcrypto_gcrypt_xts_driver = { - .cipher_encrypt = qcrypto_gcrypt_xts_encrypt, - .cipher_decrypt = qcrypto_gcrypt_xts_decrypt, - .cipher_setiv = qcrypto_gcrypt_xts_setiv, - .cipher_free = qcrypto_gcrypt_xts_ctx_free, -}; -#endif /* CONFIG_QEMU_PRIVATE_XTS */ - - static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, QCryptoCipherMode mode, const uint8_t *key, @@ -278,7 +186,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, } switch (alg) { - case QCRYPTO_CIPHER_ALG_DES_RFB: + case QCRYPTO_CIPHER_ALG_DES: gcryalg = GCRY_CIPHER_DES; break; case QCRYPTO_CIPHER_ALG_3DES: @@ -323,12 +231,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, gcrymode = GCRY_CIPHER_MODE_ECB; break; case QCRYPTO_CIPHER_MODE_XTS: -#ifdef CONFIG_QEMU_PRIVATE_XTS - drv = &qcrypto_gcrypt_xts_driver; - gcrymode = GCRY_CIPHER_MODE_ECB; -#else gcrymode = GCRY_CIPHER_MODE_XTS; -#endif break; case QCRYPTO_CIPHER_MODE_CBC: gcrymode = GCRY_CIPHER_MODE_CBC; @@ -354,44 +257,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, } ctx->blocksize = gcry_cipher_get_algo_blklen(gcryalg); -#ifdef CONFIG_QEMU_PRIVATE_XTS - if (mode == QCRYPTO_CIPHER_MODE_XTS) { - if (ctx->blocksize != XTS_BLOCK_SIZE) { - error_setg(errp, - "Cipher block size %zu must equal XTS block size %d", - ctx->blocksize, XTS_BLOCK_SIZE); - goto error; - } - err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0); - if (err != 0) { - error_setg(errp, "Cannot initialize cipher: %s", - gcry_strerror(err)); - goto error; - } - } -#endif - - if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) { - /* We're using standard DES cipher from gcrypt, so we need - * to munge the key so that the results are the same as the - * bizarre RFB variant of DES :-) - */ - uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey); - err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey); - g_free(rfbkey); - } else { -#ifdef CONFIG_QEMU_PRIVATE_XTS - if (mode == QCRYPTO_CIPHER_MODE_XTS) { - nkey /= 2; - err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey); - if (err != 0) { - error_setg(errp, "Cannot set key: %s", gcry_strerror(err)); - goto error; - } - } -#endif - err = gcry_cipher_setkey(ctx->handle, key, nkey); - } + err = gcry_cipher_setkey(ctx->handle, key, nkey); if (err != 0) { error_setg(errp, "Cannot set key: %s", gcry_strerror(err)); goto error; @@ -400,9 +266,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, return &ctx->base; error: -#ifdef CONFIG_QEMU_PRIVATE_XTS - gcry_cipher_close(ctx->tweakhandle); -#endif gcry_cipher_close(ctx->handle); g_free(ctx); return NULL; diff --git a/crypto/cipher-gnutls.c.inc b/crypto/cipher-gnutls.c.inc new file mode 100644 index 00000000000..501e4e07a5b --- /dev/null +++ b/crypto/cipher-gnutls.c.inc @@ -0,0 +1,335 @@ +/* + * QEMU Crypto cipher gnutls algorithms + * + * Copyright (c) 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "cipherpriv.h" + +#include + +#if GNUTLS_VERSION_NUMBER >= 0x030608 +#define QEMU_GNUTLS_XTS +#endif + +bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode) +{ + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: + case QCRYPTO_CIPHER_MODE_CBC: + switch (alg) { + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_192: + case QCRYPTO_CIPHER_ALG_AES_256: + case QCRYPTO_CIPHER_ALG_DES: + case QCRYPTO_CIPHER_ALG_3DES: + return true; + default: + return false; + } +#ifdef QEMU_GNUTLS_XTS + case QCRYPTO_CIPHER_MODE_XTS: + switch (alg) { + case QCRYPTO_CIPHER_ALG_AES_128: + case QCRYPTO_CIPHER_ALG_AES_256: + return true; + default: + return false; + } +#endif + default: + return false; + } +} + +typedef struct QCryptoCipherGnutls QCryptoCipherGnutls; +struct QCryptoCipherGnutls { + QCryptoCipher base; + gnutls_cipher_hd_t handle; /* XTS & CBC mode */ + gnutls_cipher_algorithm_t galg; /* ECB mode */ + guint8 *key; /* ECB mode */ + size_t nkey; /* ECB mode */ + size_t blocksize; +}; + + +static void +qcrypto_gnutls_cipher_free(QCryptoCipher *cipher) +{ + QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base); + + g_free(ctx->key); + if (ctx->handle) { + gnutls_cipher_deinit(ctx->handle); + } + g_free(ctx); +} + + +static int +qcrypto_gnutls_cipher_encrypt(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp) +{ + QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base); + int err; + + if (len % ctx->blocksize) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, ctx->blocksize); + return -1; + } + + if (ctx->handle) { /* CBC / XTS mode */ + err = gnutls_cipher_encrypt2(ctx->handle, + in, len, + out, len); + if (err != 0) { + error_setg(errp, "Cannot encrypt data: %s", + gnutls_strerror(err)); + return -1; + } + } else { /* ECB mode very inefficiently faked with CBC */ + g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize); + while (len) { + gnutls_cipher_hd_t handle; + gnutls_datum_t gkey = { (unsigned char *)ctx->key, ctx->nkey }; + int err = gnutls_cipher_init(&handle, ctx->galg, &gkey, NULL); + if (err != 0) { + error_setg(errp, "Cannot initialize cipher: %s", + gnutls_strerror(err)); + return -1; + } + + gnutls_cipher_set_iv(handle, iv, ctx->blocksize); + + err = gnutls_cipher_encrypt2(handle, + in, ctx->blocksize, + out, ctx->blocksize); + if (err != 0) { + gnutls_cipher_deinit(handle); + error_setg(errp, "Cannot encrypt data: %s", + gnutls_strerror(err)); + return -1; + } + gnutls_cipher_deinit(handle); + + len -= ctx->blocksize; + in += ctx->blocksize; + out += ctx->blocksize; + } + } + + return 0; +} + + +static int +qcrypto_gnutls_cipher_decrypt(QCryptoCipher *cipher, + const void *in, + void *out, + size_t len, + Error **errp) +{ + QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base); + int err; + + if (len % ctx->blocksize) { + error_setg(errp, "Length %zu must be a multiple of block size %zu", + len, ctx->blocksize); + return -1; + } + + if (ctx->handle) { /* CBC / XTS mode */ + err = gnutls_cipher_decrypt2(ctx->handle, + in, len, + out, len); + + if (err != 0) { + error_setg(errp, "Cannot decrypt data: %s", + gnutls_strerror(err)); + return -1; + } + } else { /* ECB mode very inefficiently faked with CBC */ + g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize); + while (len) { + gnutls_cipher_hd_t handle; + gnutls_datum_t gkey = { (unsigned char *)ctx->key, ctx->nkey }; + int err = gnutls_cipher_init(&handle, ctx->galg, &gkey, NULL); + if (err != 0) { + error_setg(errp, "Cannot initialize cipher: %s", + gnutls_strerror(err)); + return -1; + } + + gnutls_cipher_set_iv(handle, iv, ctx->blocksize); + + err = gnutls_cipher_decrypt2(handle, + in, ctx->blocksize, + out, ctx->blocksize); + if (err != 0) { + gnutls_cipher_deinit(handle); + error_setg(errp, "Cannot encrypt data: %s", + gnutls_strerror(err)); + return -1; + } + gnutls_cipher_deinit(handle); + + len -= ctx->blocksize; + in += ctx->blocksize; + out += ctx->blocksize; + } + } + + return 0; +} + +static int +qcrypto_gnutls_cipher_setiv(QCryptoCipher *cipher, + const uint8_t *iv, size_t niv, + Error **errp) +{ + QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base); + + if (niv != ctx->blocksize) { + error_setg(errp, "Expected IV size %zu not %zu", + ctx->blocksize, niv); + return -1; + } + + gnutls_cipher_set_iv(ctx->handle, (unsigned char *)iv, niv); + + return 0; +} + + +static struct QCryptoCipherDriver gnutls_driver = { + .cipher_encrypt = qcrypto_gnutls_cipher_encrypt, + .cipher_decrypt = qcrypto_gnutls_cipher_decrypt, + .cipher_setiv = qcrypto_gnutls_cipher_setiv, + .cipher_free = qcrypto_gnutls_cipher_free, +}; + +static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, + size_t nkey, + Error **errp) +{ + QCryptoCipherGnutls *ctx; + gnutls_datum_t gkey = { (unsigned char *)key, nkey }; + gnutls_cipher_algorithm_t galg = GNUTLS_CIPHER_UNKNOWN; + int err; + + switch (mode) { +#ifdef QEMU_GNUTLS_XTS + case QCRYPTO_CIPHER_MODE_XTS: + switch (alg) { + case QCRYPTO_CIPHER_ALG_AES_128: + galg = GNUTLS_CIPHER_AES_128_XTS; + break; + case QCRYPTO_CIPHER_ALG_AES_256: + galg = GNUTLS_CIPHER_AES_256_XTS; + break; + default: + break; + } + break; +#endif + + case QCRYPTO_CIPHER_MODE_ECB: + case QCRYPTO_CIPHER_MODE_CBC: + switch (alg) { + case QCRYPTO_CIPHER_ALG_AES_128: + galg = GNUTLS_CIPHER_AES_128_CBC; + break; + case QCRYPTO_CIPHER_ALG_AES_192: + galg = GNUTLS_CIPHER_AES_192_CBC; + break; + case QCRYPTO_CIPHER_ALG_AES_256: + galg = GNUTLS_CIPHER_AES_256_CBC; + break; + case QCRYPTO_CIPHER_ALG_DES: + galg = GNUTLS_CIPHER_DES_CBC; + break; + case QCRYPTO_CIPHER_ALG_3DES: + galg = GNUTLS_CIPHER_3DES_CBC; + break; + default: + break; + } + break; + default: + break; + } + + if (galg == GNUTLS_CIPHER_UNKNOWN) { + error_setg(errp, "Unsupported cipher algorithm %s with %s mode", + QCryptoCipherAlgorithm_str(alg), + QCryptoCipherMode_str(mode)); + return NULL; + } + + if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) { + return NULL; + } + + ctx = g_new0(QCryptoCipherGnutls, 1); + ctx->base.driver = &gnutls_driver; + + if (mode == QCRYPTO_CIPHER_MODE_ECB) { + ctx->key = g_new0(guint8, nkey); + memcpy(ctx->key, key, nkey); + ctx->nkey = nkey; + ctx->galg = galg; + } else { + err = gnutls_cipher_init(&ctx->handle, galg, &gkey, NULL); + if (err != 0) { + error_setg(errp, "Cannot initialize cipher: %s", + gnutls_strerror(err)); + goto error; + } + } + + if (alg == QCRYPTO_CIPHER_ALG_DES || + alg == QCRYPTO_CIPHER_ALG_3DES) + ctx->blocksize = 8; + else + ctx->blocksize = 16; + + /* + * Our API contract for requires iv to be optional + * but nettle gets unhappy when called by gnutls + * in this case, so we just force set a default + * all-zeros IV, to match behaviour of other backends. + */ + if (mode != QCRYPTO_CIPHER_MODE_ECB) { + g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize); + gnutls_cipher_set_iv(ctx->handle, iv, ctx->blocksize); + } + + return &ctx->base; + + error: + qcrypto_gnutls_cipher_free(&ctx->base); + return NULL; +} diff --git a/crypto/cipher-nettle.c.inc b/crypto/cipher-nettle.c.inc index cac771e4ff7..24cc61f87bf 100644 --- a/crypto/cipher-nettle.c.inc +++ b/crypto/cipher-nettle.c.inc @@ -34,47 +34,6 @@ #include #endif -typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx, - size_t length, - uint8_t *dst, - const uint8_t *src); - -#if CONFIG_NETTLE_VERSION_MAJOR < 3 -typedef nettle_crypt_func * QCryptoCipherNettleFuncNative; -typedef void * cipher_ctx_t; -typedef unsigned cipher_length_t; -#define CONST_CTX - -#define cast5_set_key cast128_set_key - -#define aes128_ctx aes_ctx -#define aes192_ctx aes_ctx -#define aes256_ctx aes_ctx -#define aes128_set_encrypt_key(c, k) \ - aes_set_encrypt_key(c, 16, k) -#define aes192_set_encrypt_key(c, k) \ - aes_set_encrypt_key(c, 24, k) -#define aes256_set_encrypt_key(c, k) \ - aes_set_encrypt_key(c, 32, k) -#define aes128_set_decrypt_key(c, k) \ - aes_set_decrypt_key(c, 16, k) -#define aes192_set_decrypt_key(c, k) \ - aes_set_decrypt_key(c, 24, k) -#define aes256_set_decrypt_key(c, k) \ - aes_set_decrypt_key(c, 32, k) -#define aes128_encrypt aes_encrypt -#define aes192_encrypt aes_encrypt -#define aes256_encrypt aes_encrypt -#define aes128_decrypt aes_decrypt -#define aes192_decrypt aes_decrypt -#define aes256_decrypt aes_decrypt -#else -typedef nettle_cipher_func * QCryptoCipherNettleFuncNative; -typedef const void * cipher_ctx_t; -typedef size_t cipher_length_t; -#define CONST_CTX const -#endif - static inline bool qcrypto_length_check(size_t len, size_t blocksize, Error **errp) { @@ -197,12 +156,12 @@ static const struct QCryptoCipherDriver NAME##_driver_ctr = { \ static void NAME##_xts_wrape(const void *ctx, size_t length, \ uint8_t *dst, const uint8_t *src) \ { \ - ENCRYPT((cipher_ctx_t)ctx, length, dst, src); \ + ENCRYPT((const void *)ctx, length, dst, src); \ } \ static void NAME##_xts_wrapd(const void *ctx, size_t length, \ uint8_t *dst, const uint8_t *src) \ { \ - DECRYPT((cipher_ctx_t)ctx, length, dst, src); \ + DECRYPT((const void *)ctx, length, dst, src); \ } \ static int NAME##_encrypt_xts(QCryptoCipher *cipher, const void *in, \ void *out, size_t len, Error **errp) \ @@ -276,25 +235,25 @@ static const struct QCryptoCipherDriver NAME##_driver_xts = { \ DEFINE_XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT) -typedef struct QCryptoNettleDESRFB { +typedef struct QCryptoNettleDES { QCryptoCipher base; struct des_ctx key; uint8_t iv[DES_BLOCK_SIZE]; -} QCryptoNettleDESRFB; +} QCryptoNettleDES; -static void des_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void des_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { des_encrypt(ctx, length, dst, src); } -static void des_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void des_decrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { des_decrypt(ctx, length, dst, src); } -DEFINE_ECB_CBC_CTR(qcrypto_nettle_des_rfb, QCryptoNettleDESRFB, +DEFINE_ECB_CBC_CTR(qcrypto_nettle_des, QCryptoNettleDES, DES_BLOCK_SIZE, des_encrypt_native, des_decrypt_native) @@ -304,13 +263,13 @@ typedef struct QCryptoNettleDES3 { uint8_t iv[DES3_BLOCK_SIZE]; } QCryptoNettleDES3; -static void des3_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void des3_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { des3_encrypt(ctx, length, dst, src); } -static void des3_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void des3_decrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { des3_decrypt(ctx, length, dst, src); @@ -327,17 +286,17 @@ typedef struct QCryptoNettleAES128 { struct aes128_ctx key[2], key_xts[2]; } QCryptoNettleAES128; -static void aes128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void aes128_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { - CONST_CTX struct aes128_ctx *keys = ctx; + const struct aes128_ctx *keys = ctx; aes128_encrypt(&keys[0], length, dst, src); } -static void aes128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void aes128_decrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { - CONST_CTX struct aes128_ctx *keys = ctx; + const struct aes128_ctx *keys = ctx; aes128_decrypt(&keys[1], length, dst, src); } @@ -353,17 +312,17 @@ typedef struct QCryptoNettleAES192 { struct aes192_ctx key[2], key_xts[2]; } QCryptoNettleAES192; -static void aes192_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void aes192_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { - CONST_CTX struct aes192_ctx *keys = ctx; + const struct aes192_ctx *keys = ctx; aes192_encrypt(&keys[0], length, dst, src); } -static void aes192_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void aes192_decrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { - CONST_CTX struct aes192_ctx *keys = ctx; + const struct aes192_ctx *keys = ctx; aes192_decrypt(&keys[1], length, dst, src); } @@ -379,17 +338,17 @@ typedef struct QCryptoNettleAES256 { struct aes256_ctx key[2], key_xts[2]; } QCryptoNettleAES256; -static void aes256_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void aes256_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { - CONST_CTX struct aes256_ctx *keys = ctx; + const struct aes256_ctx *keys = ctx; aes256_encrypt(&keys[0], length, dst, src); } -static void aes256_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, - uint8_t *dst, const uint8_t *src) +static void aes256_decrypt_native(const void *ctx, size_t length, + uint8_t *dst, const uint8_t *src) { - CONST_CTX struct aes256_ctx *keys = ctx; + const struct aes256_ctx *keys = ctx; aes256_decrypt(&keys[1], length, dst, src); } @@ -404,13 +363,13 @@ typedef struct QCryptoNettleCAST128 { struct cast128_ctx key, key_xts; } QCryptoNettleCAST128; -static void cast128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void cast128_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { cast128_encrypt(ctx, length, dst, src); } -static void cast128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void cast128_decrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { cast128_decrypt(ctx, length, dst, src); @@ -428,13 +387,13 @@ typedef struct QCryptoNettleSerpent { } QCryptoNettleSerpent; -static void serpent_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void serpent_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { serpent_encrypt(ctx, length, dst, src); } -static void serpent_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void serpent_decrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { serpent_decrypt(ctx, length, dst, src); @@ -451,13 +410,13 @@ typedef struct QCryptoNettleTwofish { struct twofish_ctx key, key_xts; } QCryptoNettleTwofish; -static void twofish_encrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void twofish_encrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { twofish_encrypt(ctx, length, dst, src); } -static void twofish_decrypt_native(cipher_ctx_t ctx, cipher_length_t length, +static void twofish_decrypt_native(const void *ctx, size_t length, uint8_t *dst, const uint8_t *src) { twofish_decrypt(ctx, length, dst, src); @@ -472,7 +431,7 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, QCryptoCipherMode mode) { switch (alg) { - case QCRYPTO_CIPHER_ALG_DES_RFB: + case QCRYPTO_CIPHER_ALG_DES: case QCRYPTO_CIPHER_ALG_3DES: case QCRYPTO_CIPHER_ALG_AES_128: case QCRYPTO_CIPHER_ALG_AES_192: @@ -521,32 +480,28 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, } switch (alg) { - case QCRYPTO_CIPHER_ALG_DES_RFB: + case QCRYPTO_CIPHER_ALG_DES: { - QCryptoNettleDESRFB *ctx; + QCryptoNettleDES *ctx; const QCryptoCipherDriver *drv; - uint8_t *rfbkey; switch (mode) { case QCRYPTO_CIPHER_MODE_ECB: - drv = &qcrypto_nettle_des_rfb_driver_ecb; + drv = &qcrypto_nettle_des_driver_ecb; break; case QCRYPTO_CIPHER_MODE_CBC: - drv = &qcrypto_nettle_des_rfb_driver_cbc; + drv = &qcrypto_nettle_des_driver_cbc; break; case QCRYPTO_CIPHER_MODE_CTR: - drv = &qcrypto_nettle_des_rfb_driver_ctr; + drv = &qcrypto_nettle_des_driver_ctr; break; default: goto bad_cipher_mode; } - ctx = g_new0(QCryptoNettleDESRFB, 1); + ctx = g_new0(QCryptoNettleDES, 1); ctx->base.driver = drv; - - rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey); - des_set_key(&ctx->key, rfbkey); - g_free(rfbkey); + des_set_key(&ctx->key, key); return &ctx->base; } diff --git a/crypto/cipher.c b/crypto/cipher.c index 068b2fb867c..74b09a5b261 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -29,7 +29,7 @@ static const size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = { [QCRYPTO_CIPHER_ALG_AES_128] = 16, [QCRYPTO_CIPHER_ALG_AES_192] = 24, [QCRYPTO_CIPHER_ALG_AES_256] = 32, - [QCRYPTO_CIPHER_ALG_DES_RFB] = 8, + [QCRYPTO_CIPHER_ALG_DES] = 8, [QCRYPTO_CIPHER_ALG_3DES] = 24, [QCRYPTO_CIPHER_ALG_CAST5_128] = 16, [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16, @@ -44,7 +44,7 @@ static const size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = { [QCRYPTO_CIPHER_ALG_AES_128] = 16, [QCRYPTO_CIPHER_ALG_AES_192] = 16, [QCRYPTO_CIPHER_ALG_AES_256] = 16, - [QCRYPTO_CIPHER_ALG_DES_RFB] = 8, + [QCRYPTO_CIPHER_ALG_DES] = 8, [QCRYPTO_CIPHER_ALG_3DES] = 8, [QCRYPTO_CIPHER_ALG_CAST5_128] = 8, [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16, @@ -107,9 +107,9 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg, } if (mode == QCRYPTO_CIPHER_MODE_XTS) { - if (alg == QCRYPTO_CIPHER_ALG_DES_RFB - || alg == QCRYPTO_CIPHER_ALG_3DES) { - error_setg(errp, "XTS mode not compatible with DES-RFB/3DES"); + if (alg == QCRYPTO_CIPHER_ALG_DES || + alg == QCRYPTO_CIPHER_ALG_3DES) { + error_setg(errp, "XTS mode not compatible with DES/3DES"); return false; } if (nkey % 2) { @@ -132,28 +132,12 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg, return true; } -#if defined(CONFIG_GCRYPT) || defined(CONFIG_NETTLE) -static uint8_t * -qcrypto_cipher_munge_des_rfb_key(const uint8_t *key, - size_t nkey) -{ - uint8_t *ret = g_new0(uint8_t, nkey); - size_t i; - for (i = 0; i < nkey; i++) { - uint8_t r = key[i]; - r = (r & 0xf0) >> 4 | (r & 0x0f) << 4; - r = (r & 0xcc) >> 2 | (r & 0x33) << 2; - r = (r & 0xaa) >> 1 | (r & 0x55) << 1; - ret[i] = r; - } - return ret; -} -#endif /* CONFIG_GCRYPT || CONFIG_NETTLE */ - #ifdef CONFIG_GCRYPT #include "cipher-gcrypt.c.inc" #elif defined CONFIG_NETTLE #include "cipher-nettle.c.inc" +#elif defined CONFIG_GNUTLS_CRYPTO +#include "cipher-gnutls.c.inc" #else #include "cipher-builtin.c.inc" #endif diff --git a/crypto/desrfb.c b/crypto/desrfb.c deleted file mode 100644 index b2a105ebbcb..00000000000 --- a/crypto/desrfb.c +++ /dev/null @@ -1,416 +0,0 @@ -/* - * This is D3DES (V5.09) by Richard Outerbridge with the double and - * triple-length support removed for use in VNC. Also the bytebit[] array - * has been reversed so that the most significant bit in each byte of the - * key is ignored, not the least significant. - * - * These changes are: - * Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ - -/* D3DES (V5.09) - - * - * A portable, public domain, version of the Data Encryption Standard. - * - * Written with Symantec's THINK (Lightspeed) C by Richard Outerbridge. - * Thanks to: Dan Hoey for his excellent Initial and Inverse permutation - * code; Jim Gillogly & Phil Karn for the DES key schedule code; Dennis - * Ferguson, Eric Young and Dana How for comparing notes; and Ray Lau, - * for humouring me on. - * - * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge. - * (GEnie : OUTER; CIS : [71755,204]) Graven Imagery, 1992. - */ - -#include "qemu/osdep.h" -#include "crypto/desrfb.h" - -static void scrunch(unsigned char *, unsigned long *); -static void unscrun(unsigned long *, unsigned char *); -static void desfunc(unsigned long *, unsigned long *); -static void cookey(unsigned long *); - -static unsigned long KnL[32] = { 0L }; - -static const unsigned short bytebit[8] = { - 01, 02, 04, 010, 020, 040, 0100, 0200 }; - -static const unsigned long bigbyte[24] = { - 0x800000L, 0x400000L, 0x200000L, 0x100000L, - 0x80000L, 0x40000L, 0x20000L, 0x10000L, - 0x8000L, 0x4000L, 0x2000L, 0x1000L, - 0x800L, 0x400L, 0x200L, 0x100L, - 0x80L, 0x40L, 0x20L, 0x10L, - 0x8L, 0x4L, 0x2L, 0x1L }; - -/* Use the key schedule specified in the Standard (ANSI X3.92-1981). */ - -static const unsigned char pc1[56] = { - 56, 48, 40, 32, 24, 16, 8, 0, 57, 49, 41, 33, 25, 17, - 9, 1, 58, 50, 42, 34, 26, 18, 10, 2, 59, 51, 43, 35, - 62, 54, 46, 38, 30, 22, 14, 6, 61, 53, 45, 37, 29, 21, - 13, 5, 60, 52, 44, 36, 28, 20, 12, 4, 27, 19, 11, 3 }; - -static const unsigned char totrot[16] = { - 1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28 }; - -static const unsigned char pc2[48] = { - 13, 16, 10, 23, 0, 4, 2, 27, 14, 5, 20, 9, - 22, 18, 11, 3, 25, 7, 15, 6, 26, 19, 12, 1, - 40, 51, 30, 36, 46, 54, 29, 39, 50, 44, 32, 47, - 43, 48, 38, 55, 33, 52, 45, 41, 49, 35, 28, 31 }; - -/* Thanks to James Gillogly & Phil Karn! */ -void deskey(unsigned char *key, int edf) -{ - register int i, j, l, m, n; - unsigned char pc1m[56], pcr[56]; - unsigned long kn[32]; - - for ( j = 0; j < 56; j++ ) { - l = pc1[j]; - m = l & 07; - pc1m[j] = (key[l >> 3] & bytebit[m]) ? 1 : 0; - } - for( i = 0; i < 16; i++ ) { - if( edf == DE1 ) m = (15 - i) << 1; - else m = i << 1; - n = m + 1; - kn[m] = kn[n] = 0L; - for( j = 0; j < 28; j++ ) { - l = j + totrot[i]; - if( l < 28 ) pcr[j] = pc1m[l]; - else pcr[j] = pc1m[l - 28]; - } - for( j = 28; j < 56; j++ ) { - l = j + totrot[i]; - if( l < 56 ) pcr[j] = pc1m[l]; - else pcr[j] = pc1m[l - 28]; - } - for( j = 0; j < 24; j++ ) { - if( pcr[pc2[j]] ) kn[m] |= bigbyte[j]; - if( pcr[pc2[j + 24]] ) kn[n] |= bigbyte[j]; - } - } - cookey(kn); - return; - } - -static void cookey(register unsigned long *raw1) -{ - register unsigned long *cook, *raw0; - unsigned long dough[32]; - register int i; - - cook = dough; - for( i = 0; i < 16; i++, raw1++ ) { - raw0 = raw1++; - *cook = (*raw0 & 0x00fc0000L) << 6; - *cook |= (*raw0 & 0x00000fc0L) << 10; - *cook |= (*raw1 & 0x00fc0000L) >> 10; - *cook++ |= (*raw1 & 0x00000fc0L) >> 6; - *cook = (*raw0 & 0x0003f000L) << 12; - *cook |= (*raw0 & 0x0000003fL) << 16; - *cook |= (*raw1 & 0x0003f000L) >> 4; - *cook++ |= (*raw1 & 0x0000003fL); - } - usekey(dough); - return; - } - -void usekey(register unsigned long *from) -{ - register unsigned long *to, *endp; - - to = KnL, endp = &KnL[32]; - while( to < endp ) *to++ = *from++; - return; - } - -void des(unsigned char *inblock, unsigned char *outblock) -{ - unsigned long work[2]; - - scrunch(inblock, work); - desfunc(work, KnL); - unscrun(work, outblock); - return; - } - -static void scrunch(register unsigned char *outof, register unsigned long *into) -{ - *into = (*outof++ & 0xffL) << 24; - *into |= (*outof++ & 0xffL) << 16; - *into |= (*outof++ & 0xffL) << 8; - *into++ |= (*outof++ & 0xffL); - *into = (*outof++ & 0xffL) << 24; - *into |= (*outof++ & 0xffL) << 16; - *into |= (*outof++ & 0xffL) << 8; - *into |= (*outof & 0xffL); - return; - } - -static void unscrun(register unsigned long *outof, register unsigned char *into) -{ - *into++ = (unsigned char)((*outof >> 24) & 0xffL); - *into++ = (unsigned char)((*outof >> 16) & 0xffL); - *into++ = (unsigned char)((*outof >> 8) & 0xffL); - *into++ = (unsigned char)(*outof++ & 0xffL); - *into++ = (unsigned char)((*outof >> 24) & 0xffL); - *into++ = (unsigned char)((*outof >> 16) & 0xffL); - *into++ = (unsigned char)((*outof >> 8) & 0xffL); - *into = (unsigned char)(*outof & 0xffL); - return; - } - -static const unsigned long SP1[64] = { - 0x01010400L, 0x00000000L, 0x00010000L, 0x01010404L, - 0x01010004L, 0x00010404L, 0x00000004L, 0x00010000L, - 0x00000400L, 0x01010400L, 0x01010404L, 0x00000400L, - 0x01000404L, 0x01010004L, 0x01000000L, 0x00000004L, - 0x00000404L, 0x01000400L, 0x01000400L, 0x00010400L, - 0x00010400L, 0x01010000L, 0x01010000L, 0x01000404L, - 0x00010004L, 0x01000004L, 0x01000004L, 0x00010004L, - 0x00000000L, 0x00000404L, 0x00010404L, 0x01000000L, - 0x00010000L, 0x01010404L, 0x00000004L, 0x01010000L, - 0x01010400L, 0x01000000L, 0x01000000L, 0x00000400L, - 0x01010004L, 0x00010000L, 0x00010400L, 0x01000004L, - 0x00000400L, 0x00000004L, 0x01000404L, 0x00010404L, - 0x01010404L, 0x00010004L, 0x01010000L, 0x01000404L, - 0x01000004L, 0x00000404L, 0x00010404L, 0x01010400L, - 0x00000404L, 0x01000400L, 0x01000400L, 0x00000000L, - 0x00010004L, 0x00010400L, 0x00000000L, 0x01010004L }; - -static const unsigned long SP2[64] = { - 0x80108020L, 0x80008000L, 0x00008000L, 0x00108020L, - 0x00100000L, 0x00000020L, 0x80100020L, 0x80008020L, - 0x80000020L, 0x80108020L, 0x80108000L, 0x80000000L, - 0x80008000L, 0x00100000L, 0x00000020L, 0x80100020L, - 0x00108000L, 0x00100020L, 0x80008020L, 0x00000000L, - 0x80000000L, 0x00008000L, 0x00108020L, 0x80100000L, - 0x00100020L, 0x80000020L, 0x00000000L, 0x00108000L, - 0x00008020L, 0x80108000L, 0x80100000L, 0x00008020L, - 0x00000000L, 0x00108020L, 0x80100020L, 0x00100000L, - 0x80008020L, 0x80100000L, 0x80108000L, 0x00008000L, - 0x80100000L, 0x80008000L, 0x00000020L, 0x80108020L, - 0x00108020L, 0x00000020L, 0x00008000L, 0x80000000L, - 0x00008020L, 0x80108000L, 0x00100000L, 0x80000020L, - 0x00100020L, 0x80008020L, 0x80000020L, 0x00100020L, - 0x00108000L, 0x00000000L, 0x80008000L, 0x00008020L, - 0x80000000L, 0x80100020L, 0x80108020L, 0x00108000L }; - -static const unsigned long SP3[64] = { - 0x00000208L, 0x08020200L, 0x00000000L, 0x08020008L, - 0x08000200L, 0x00000000L, 0x00020208L, 0x08000200L, - 0x00020008L, 0x08000008L, 0x08000008L, 0x00020000L, - 0x08020208L, 0x00020008L, 0x08020000L, 0x00000208L, - 0x08000000L, 0x00000008L, 0x08020200L, 0x00000200L, - 0x00020200L, 0x08020000L, 0x08020008L, 0x00020208L, - 0x08000208L, 0x00020200L, 0x00020000L, 0x08000208L, - 0x00000008L, 0x08020208L, 0x00000200L, 0x08000000L, - 0x08020200L, 0x08000000L, 0x00020008L, 0x00000208L, - 0x00020000L, 0x08020200L, 0x08000200L, 0x00000000L, - 0x00000200L, 0x00020008L, 0x08020208L, 0x08000200L, - 0x08000008L, 0x00000200L, 0x00000000L, 0x08020008L, - 0x08000208L, 0x00020000L, 0x08000000L, 0x08020208L, - 0x00000008L, 0x00020208L, 0x00020200L, 0x08000008L, - 0x08020000L, 0x08000208L, 0x00000208L, 0x08020000L, - 0x00020208L, 0x00000008L, 0x08020008L, 0x00020200L }; - -static const unsigned long SP4[64] = { - 0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L, - 0x00802080L, 0x00800081L, 0x00800001L, 0x00002001L, - 0x00000000L, 0x00802000L, 0x00802000L, 0x00802081L, - 0x00000081L, 0x00000000L, 0x00800080L, 0x00800001L, - 0x00000001L, 0x00002000L, 0x00800000L, 0x00802001L, - 0x00000080L, 0x00800000L, 0x00002001L, 0x00002080L, - 0x00800081L, 0x00000001L, 0x00002080L, 0x00800080L, - 0x00002000L, 0x00802080L, 0x00802081L, 0x00000081L, - 0x00800080L, 0x00800001L, 0x00802000L, 0x00802081L, - 0x00000081L, 0x00000000L, 0x00000000L, 0x00802000L, - 0x00002080L, 0x00800080L, 0x00800081L, 0x00000001L, - 0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L, - 0x00802081L, 0x00000081L, 0x00000001L, 0x00002000L, - 0x00800001L, 0x00002001L, 0x00802080L, 0x00800081L, - 0x00002001L, 0x00002080L, 0x00800000L, 0x00802001L, - 0x00000080L, 0x00800000L, 0x00002000L, 0x00802080L }; - -static const unsigned long SP5[64] = { - 0x00000100L, 0x02080100L, 0x02080000L, 0x42000100L, - 0x00080000L, 0x00000100L, 0x40000000L, 0x02080000L, - 0x40080100L, 0x00080000L, 0x02000100L, 0x40080100L, - 0x42000100L, 0x42080000L, 0x00080100L, 0x40000000L, - 0x02000000L, 0x40080000L, 0x40080000L, 0x00000000L, - 0x40000100L, 0x42080100L, 0x42080100L, 0x02000100L, - 0x42080000L, 0x40000100L, 0x00000000L, 0x42000000L, - 0x02080100L, 0x02000000L, 0x42000000L, 0x00080100L, - 0x00080000L, 0x42000100L, 0x00000100L, 0x02000000L, - 0x40000000L, 0x02080000L, 0x42000100L, 0x40080100L, - 0x02000100L, 0x40000000L, 0x42080000L, 0x02080100L, - 0x40080100L, 0x00000100L, 0x02000000L, 0x42080000L, - 0x42080100L, 0x00080100L, 0x42000000L, 0x42080100L, - 0x02080000L, 0x00000000L, 0x40080000L, 0x42000000L, - 0x00080100L, 0x02000100L, 0x40000100L, 0x00080000L, - 0x00000000L, 0x40080000L, 0x02080100L, 0x40000100L }; - -static const unsigned long SP6[64] = { - 0x20000010L, 0x20400000L, 0x00004000L, 0x20404010L, - 0x20400000L, 0x00000010L, 0x20404010L, 0x00400000L, - 0x20004000L, 0x00404010L, 0x00400000L, 0x20000010L, - 0x00400010L, 0x20004000L, 0x20000000L, 0x00004010L, - 0x00000000L, 0x00400010L, 0x20004010L, 0x00004000L, - 0x00404000L, 0x20004010L, 0x00000010L, 0x20400010L, - 0x20400010L, 0x00000000L, 0x00404010L, 0x20404000L, - 0x00004010L, 0x00404000L, 0x20404000L, 0x20000000L, - 0x20004000L, 0x00000010L, 0x20400010L, 0x00404000L, - 0x20404010L, 0x00400000L, 0x00004010L, 0x20000010L, - 0x00400000L, 0x20004000L, 0x20000000L, 0x00004010L, - 0x20000010L, 0x20404010L, 0x00404000L, 0x20400000L, - 0x00404010L, 0x20404000L, 0x00000000L, 0x20400010L, - 0x00000010L, 0x00004000L, 0x20400000L, 0x00404010L, - 0x00004000L, 0x00400010L, 0x20004010L, 0x00000000L, - 0x20404000L, 0x20000000L, 0x00400010L, 0x20004010L }; - -static const unsigned long SP7[64] = { - 0x00200000L, 0x04200002L, 0x04000802L, 0x00000000L, - 0x00000800L, 0x04000802L, 0x00200802L, 0x04200800L, - 0x04200802L, 0x00200000L, 0x00000000L, 0x04000002L, - 0x00000002L, 0x04000000L, 0x04200002L, 0x00000802L, - 0x04000800L, 0x00200802L, 0x00200002L, 0x04000800L, - 0x04000002L, 0x04200000L, 0x04200800L, 0x00200002L, - 0x04200000L, 0x00000800L, 0x00000802L, 0x04200802L, - 0x00200800L, 0x00000002L, 0x04000000L, 0x00200800L, - 0x04000000L, 0x00200800L, 0x00200000L, 0x04000802L, - 0x04000802L, 0x04200002L, 0x04200002L, 0x00000002L, - 0x00200002L, 0x04000000L, 0x04000800L, 0x00200000L, - 0x04200800L, 0x00000802L, 0x00200802L, 0x04200800L, - 0x00000802L, 0x04000002L, 0x04200802L, 0x04200000L, - 0x00200800L, 0x00000000L, 0x00000002L, 0x04200802L, - 0x00000000L, 0x00200802L, 0x04200000L, 0x00000800L, - 0x04000002L, 0x04000800L, 0x00000800L, 0x00200002L }; - -static const unsigned long SP8[64] = { - 0x10001040L, 0x00001000L, 0x00040000L, 0x10041040L, - 0x10000000L, 0x10001040L, 0x00000040L, 0x10000000L, - 0x00040040L, 0x10040000L, 0x10041040L, 0x00041000L, - 0x10041000L, 0x00041040L, 0x00001000L, 0x00000040L, - 0x10040000L, 0x10000040L, 0x10001000L, 0x00001040L, - 0x00041000L, 0x00040040L, 0x10040040L, 0x10041000L, - 0x00001040L, 0x00000000L, 0x00000000L, 0x10040040L, - 0x10000040L, 0x10001000L, 0x00041040L, 0x00040000L, - 0x00041040L, 0x00040000L, 0x10041000L, 0x00001000L, - 0x00000040L, 0x10040040L, 0x00001000L, 0x00041040L, - 0x10001000L, 0x00000040L, 0x10000040L, 0x10040000L, - 0x10040040L, 0x10000000L, 0x00040000L, 0x10001040L, - 0x00000000L, 0x10041040L, 0x00040040L, 0x10000040L, - 0x10040000L, 0x10001000L, 0x10001040L, 0x00000000L, - 0x10041040L, 0x00041000L, 0x00041000L, 0x00001040L, - 0x00001040L, 0x00040040L, 0x10000000L, 0x10041000L }; - -static void desfunc(register unsigned long *block, register unsigned long *keys) -{ - register unsigned long fval, work, right, leftt; - register int round; - - leftt = block[0]; - right = block[1]; - work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL; - right ^= work; - leftt ^= (work << 4); - work = ((leftt >> 16) ^ right) & 0x0000ffffL; - right ^= work; - leftt ^= (work << 16); - work = ((right >> 2) ^ leftt) & 0x33333333L; - leftt ^= work; - right ^= (work << 2); - work = ((right >> 8) ^ leftt) & 0x00ff00ffL; - leftt ^= work; - right ^= (work << 8); - right = ((right << 1) | ((right >> 31) & 1L)) & 0xffffffffL; - work = (leftt ^ right) & 0xaaaaaaaaL; - leftt ^= work; - right ^= work; - leftt = ((leftt << 1) | ((leftt >> 31) & 1L)) & 0xffffffffL; - - for( round = 0; round < 8; round++ ) { - work = (right << 28) | (right >> 4); - work ^= *keys++; - fval = SP7[ work & 0x3fL]; - fval |= SP5[(work >> 8) & 0x3fL]; - fval |= SP3[(work >> 16) & 0x3fL]; - fval |= SP1[(work >> 24) & 0x3fL]; - work = right ^ *keys++; - fval |= SP8[ work & 0x3fL]; - fval |= SP6[(work >> 8) & 0x3fL]; - fval |= SP4[(work >> 16) & 0x3fL]; - fval |= SP2[(work >> 24) & 0x3fL]; - leftt ^= fval; - work = (leftt << 28) | (leftt >> 4); - work ^= *keys++; - fval = SP7[ work & 0x3fL]; - fval |= SP5[(work >> 8) & 0x3fL]; - fval |= SP3[(work >> 16) & 0x3fL]; - fval |= SP1[(work >> 24) & 0x3fL]; - work = leftt ^ *keys++; - fval |= SP8[ work & 0x3fL]; - fval |= SP6[(work >> 8) & 0x3fL]; - fval |= SP4[(work >> 16) & 0x3fL]; - fval |= SP2[(work >> 24) & 0x3fL]; - right ^= fval; - } - - right = (right << 31) | (right >> 1); - work = (leftt ^ right) & 0xaaaaaaaaL; - leftt ^= work; - right ^= work; - leftt = (leftt << 31) | (leftt >> 1); - work = ((leftt >> 8) ^ right) & 0x00ff00ffL; - right ^= work; - leftt ^= (work << 8); - work = ((leftt >> 2) ^ right) & 0x33333333L; - right ^= work; - leftt ^= (work << 2); - work = ((right >> 16) ^ leftt) & 0x0000ffffL; - leftt ^= work; - right ^= (work << 16); - work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL; - leftt ^= work; - right ^= (work << 4); - *block++ = right; - *block = leftt; - return; - } - -/* Validation sets: - * - * Single-length key, single-length plaintext - - * Key : 0123 4567 89ab cdef - * Plain : 0123 4567 89ab cde7 - * Cipher : c957 4425 6a5e d31d - * - * Double-length key, single-length plaintext - - * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 - * Plain : 0123 4567 89ab cde7 - * Cipher : 7f1d 0a77 826b 8aff - * - * Double-length key, double-length plaintext - - * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 - * Plain : 0123 4567 89ab cdef 0123 4567 89ab cdff - * Cipher : 27a0 8440 406a df60 278f 47cf 42d6 15d7 - * - * Triple-length key, single-length plaintext - - * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567 - * Plain : 0123 4567 89ab cde7 - * Cipher : de0b 7c06 ae5e 0ed5 - * - * Triple-length key, double-length plaintext - - * Key : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567 - * Plain : 0123 4567 89ab cdef 0123 4567 89ab cdff - * Cipher : ad0d 1b30 ac17 cf07 0ed1 1c63 81e4 4de5 - * - * d3des V5.0a rwo 9208.07 18:44 Graven Imagery - **********************************************************************/ diff --git a/crypto/hash-gnutls.c b/crypto/hash-gnutls.c new file mode 100644 index 00000000000..17911ac5d1c --- /dev/null +++ b/crypto/hash-gnutls.c @@ -0,0 +1,104 @@ +/* + * QEMU Crypto hash algorithms + * + * Copyright (c) 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include +#include "qapi/error.h" +#include "crypto/hash.h" +#include "hashpriv.h" + + +static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5, + [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1, + [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_DIG_SHA224, + [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256, + [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_DIG_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_DIG_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_DIG_RMD160, +}; + +gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg) +{ + size_t i; + const gnutls_digest_algorithm_t *algs; + if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) || + qcrypto_hash_alg_map[alg] == GNUTLS_DIG_UNKNOWN) { + return false; + } + algs = gnutls_digest_list(); + for (i = 0; algs[i] != GNUTLS_DIG_UNKNOWN; i++) { + if (algs[i] == qcrypto_hash_alg_map[alg]) { + return true; + } + } + return false; +} + + +static int +qcrypto_gnutls_hash_bytesv(QCryptoHashAlgorithm alg, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + int i, ret; + gnutls_hash_hd_t hash; + + if (!qcrypto_hash_supports(alg)) { + error_setg(errp, + "Unknown hash algorithm %d", + alg); + return -1; + } + + ret = gnutls_hash_get_len(qcrypto_hash_alg_map[alg]); + if (*resultlen == 0) { + *resultlen = ret; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != ret) { + error_setg(errp, + "Result buffer size %zu is smaller than hash %d", + *resultlen, ret); + return -1; + } + + ret = gnutls_hash_init(&hash, qcrypto_hash_alg_map[alg]); + if (ret < 0) { + error_setg(errp, + "Unable to initialize hash algorithm: %s", + gnutls_strerror(ret)); + return -1; + } + + for (i = 0; i < niov; i++) { + gnutls_hash(hash, iov[i].iov_base, iov[i].iov_len); + } + + gnutls_hash_deinit(hash, *result); + return 0; +} + + +QCryptoHashDriver qcrypto_hash_lib_driver = { + .hash_bytesv = qcrypto_gnutls_hash_bytesv, +}; diff --git a/crypto/hash-nettle.c b/crypto/hash-nettle.c index 2a6ee7c7d53..1ca1a410628 100644 --- a/crypto/hash-nettle.c +++ b/crypto/hash-nettle.c @@ -26,18 +26,12 @@ #include #include -#if CONFIG_NETTLE_VERSION_MAJOR < 3 -typedef unsigned int hash_length_t; -#else -typedef size_t hash_length_t; -#endif - typedef void (*qcrypto_nettle_init)(void *ctx); typedef void (*qcrypto_nettle_write)(void *ctx, - hash_length_t len, + size_t len, const uint8_t *buf); typedef void (*qcrypto_nettle_result)(void *ctx, - hash_length_t len, + size_t len, uint8_t *buf); union qcrypto_hash_ctx { diff --git a/crypto/hmac-gnutls.c b/crypto/hmac-gnutls.c new file mode 100644 index 00000000000..24db383322c --- /dev/null +++ b/crypto/hmac-gnutls.c @@ -0,0 +1,139 @@ +/* + * QEMU Crypto hmac algorithms + * + * Copyright (c) 2021 Red Hat, Inc. + * + * Derived from hmac-gcrypt.c: + * + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + * + */ + +#include "qemu/osdep.h" +#include + +#include "qapi/error.h" +#include "crypto/hmac.h" +#include "hmacpriv.h" + +static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = GNUTLS_MAC_MD5, + [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_MAC_SHA1, + [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_MAC_SHA224, + [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_MAC_SHA256, + [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_MAC_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_MAC_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_MAC_RMD160, +}; + +typedef struct QCryptoHmacGnutls QCryptoHmacGnutls; +struct QCryptoHmacGnutls { + gnutls_hmac_hd_t handle; +}; + +bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg) +{ + size_t i; + const gnutls_digest_algorithm_t *algs; + if (alg >= G_N_ELEMENTS(qcrypto_hmac_alg_map) || + qcrypto_hmac_alg_map[alg] == GNUTLS_DIG_UNKNOWN) { + return false; + } + algs = gnutls_digest_list(); + for (i = 0; algs[i] != GNUTLS_DIG_UNKNOWN; i++) { + if (algs[i] == qcrypto_hmac_alg_map[alg]) { + return true; + } + } + return false; +} + +void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg, + const uint8_t *key, size_t nkey, + Error **errp) +{ + QCryptoHmacGnutls *ctx; + int err; + + if (!qcrypto_hmac_supports(alg)) { + error_setg(errp, "Unsupported hmac algorithm %s", + QCryptoHashAlgorithm_str(alg)); + return NULL; + } + + ctx = g_new0(QCryptoHmacGnutls, 1); + + err = gnutls_hmac_init(&ctx->handle, + qcrypto_hmac_alg_map[alg], + (const void *)key, nkey); + if (err != 0) { + error_setg(errp, "Cannot initialize hmac: %s", + gnutls_strerror(err)); + goto error; + } + + return ctx; + +error: + g_free(ctx); + return NULL; +} + +static void +qcrypto_gnutls_hmac_ctx_free(QCryptoHmac *hmac) +{ + QCryptoHmacGnutls *ctx; + + ctx = hmac->opaque; + gnutls_hmac_deinit(ctx->handle, NULL); + + g_free(ctx); +} + +static int +qcrypto_gnutls_hmac_bytesv(QCryptoHmac *hmac, + const struct iovec *iov, + size_t niov, + uint8_t **result, + size_t *resultlen, + Error **errp) +{ + QCryptoHmacGnutls *ctx; + uint32_t ret; + int i; + + ctx = hmac->opaque; + + for (i = 0; i < niov; i++) { + gnutls_hmac(ctx->handle, iov[i].iov_base, iov[i].iov_len); + } + + ret = gnutls_hmac_get_len(qcrypto_hmac_alg_map[hmac->alg]); + if (ret <= 0) { + error_setg(errp, "Unable to get hmac length: %s", + gnutls_strerror(ret)); + return -1; + } + + if (*resultlen == 0) { + *resultlen = ret; + *result = g_new0(uint8_t, *resultlen); + } else if (*resultlen != ret) { + error_setg(errp, "Result buffer size %zu is smaller than hmac %d", + *resultlen, ret); + return -1; + } + + gnutls_hmac_output(ctx->handle, *result); + + return 0; +} + +QCryptoHmacDriver qcrypto_hmac_lib_driver = { + .hmac_bytesv = qcrypto_gnutls_hmac_bytesv, + .hmac_free = qcrypto_gnutls_hmac_ctx_free, +}; diff --git a/crypto/hmac-nettle.c b/crypto/hmac-nettle.c index 1152b741fdc..1ad6c4f2530 100644 --- a/crypto/hmac-nettle.c +++ b/crypto/hmac-nettle.c @@ -18,22 +18,16 @@ #include "hmacpriv.h" #include -#if CONFIG_NETTLE_VERSION_MAJOR < 3 -typedef unsigned int hmac_length_t; -#else -typedef size_t hmac_length_t; -#endif - typedef void (*qcrypto_nettle_hmac_setkey)(void *ctx, - hmac_length_t key_length, + size_t key_length, const uint8_t *key); typedef void (*qcrypto_nettle_hmac_update)(void *ctx, - hmac_length_t length, + size_t length, const uint8_t *data); typedef void (*qcrypto_nettle_hmac_digest)(void *ctx, - hmac_length_t length, + size_t length, uint8_t *digest); typedef struct QCryptoHmacNettle QCryptoHmacNettle; diff --git a/crypto/init.c b/crypto/init.c index ea233b9192a..fb7f1bff105 100644 --- a/crypto/init.c +++ b/crypto/init.c @@ -35,21 +35,6 @@ #include "crypto/random.h" /* #define DEBUG_GNUTLS */ - -/* - * We need to init gcrypt threading if - * - * - gcrypt < 1.6.0 - * - */ - -#if (defined(CONFIG_GCRYPT) && \ - (GCRYPT_VERSION_NUMBER < 0x010600)) -#define QCRYPTO_INIT_GCRYPT_THREADS -#else -#undef QCRYPTO_INIT_GCRYPT_THREADS -#endif - #ifdef DEBUG_GNUTLS static void qcrypto_gnutls_log(int level, const char *str) { @@ -57,55 +42,8 @@ static void qcrypto_gnutls_log(int level, const char *str) } #endif -#ifdef QCRYPTO_INIT_GCRYPT_THREADS -static int qcrypto_gcrypt_mutex_init(void **priv) -{ \ - QemuMutex *lock = NULL; - lock = g_new0(QemuMutex, 1); - qemu_mutex_init(lock); - *priv = lock; - return 0; -} - -static int qcrypto_gcrypt_mutex_destroy(void **priv) -{ - QemuMutex *lock = *priv; - qemu_mutex_destroy(lock); - g_free(lock); - return 0; -} - -static int qcrypto_gcrypt_mutex_lock(void **priv) -{ - QemuMutex *lock = *priv; - qemu_mutex_lock(lock); - return 0; -} - -static int qcrypto_gcrypt_mutex_unlock(void **priv) -{ - QemuMutex *lock = *priv; - qemu_mutex_unlock(lock); - return 0; -} - -static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = { - (GCRY_THREAD_OPTION_PTHREAD | (GCRY_THREAD_OPTION_VERSION << 8)), - NULL, - qcrypto_gcrypt_mutex_init, - qcrypto_gcrypt_mutex_destroy, - qcrypto_gcrypt_mutex_lock, - qcrypto_gcrypt_mutex_unlock, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL -}; -#endif /* QCRYPTO_INIT_GCRYPT */ - int qcrypto_init(Error **errp) { -#ifdef QCRYPTO_INIT_GCRYPT_THREADS - gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl); -#endif /* QCRYPTO_INIT_GCRYPT_THREADS */ - #ifdef CONFIG_GNUTLS int ret; ret = gnutls_global_init(); diff --git a/crypto/meson.build b/crypto/meson.build index 7f37b5d3354..95a6a835049 100644 --- a/crypto/meson.build +++ b/crypto/meson.build @@ -5,7 +5,6 @@ crypto_ss.add(files( 'block-qcow.c', 'block.c', 'cipher.c', - 'desrfb.c', 'hash.c', 'hmac.c', 'ivgen-essiv.c', @@ -22,52 +21,36 @@ crypto_ss.add(files( 'tlssession.c', )) -if 'CONFIG_NETTLE' in config_host - crypto_ss.add(files('hash-nettle.c', 'hmac-nettle.c', 'pbkdf-nettle.c')) -elif 'CONFIG_GCRYPT' in config_host - crypto_ss.add(files('hash-gcrypt.c', 'pbkdf-gcrypt.c')) - if 'CONFIG_GCRYPT_HMAC' in config_host - crypto_ss.add(files('hmac-gcrypt.c')) - else - crypto_ss.add(files('hmac-glib.c')) +if nettle.found() + crypto_ss.add(nettle, files('hash-nettle.c', 'hmac-nettle.c', 'pbkdf-nettle.c')) + if xts == 'private' + crypto_ss.add(files('xts.c')) endif +elif gcrypt.found() + crypto_ss.add(gcrypt, files('hash-gcrypt.c', 'hmac-gcrypt.c', 'pbkdf-gcrypt.c')) +elif gnutls_crypto.found() + crypto_ss.add(gnutls, files('hash-gnutls.c', 'hmac-gnutls.c', 'pbkdf-gnutls.c')) else crypto_ss.add(files('hash-glib.c', 'hmac-glib.c', 'pbkdf-stub.c')) endif crypto_ss.add(when: 'CONFIG_SECRET_KEYRING', if_true: files('secret_keyring.c')) -crypto_ss.add(when: 'CONFIG_QEMU_PRIVATE_XTS', if_true: files('xts.c')) crypto_ss.add(when: 'CONFIG_AF_ALG', if_true: files('afalg.c', 'cipher-afalg.c', 'hash-afalg.c')) -crypto_ss.add(when: 'CONFIG_GNUTLS', if_true: files('tls-cipher-suites.c')) - -if 'CONFIG_NETTLE' in config_host - crypto_ss.add(nettle) -elif 'CONFIG_GCRYPT' in config_host - crypto_ss.add(gcrypt) -endif - -if 'CONFIG_GNUTLS' in config_host - crypto_ss.add(gnutls) -endif - +crypto_ss.add(when: gnutls, if_true: files('tls-cipher-suites.c')) util_ss.add(files('aes.c')) util_ss.add(files('init.c')) +if gnutls.found() + util_ss.add(gnutls) +endif -if 'CONFIG_GCRYPT' in config_host - util_ss.add(files('random-gcrypt.c')) -elif 'CONFIG_GNUTLS' in config_host - util_ss.add(files('random-gnutls.c')) +if gcrypt.found() + util_ss.add(gcrypt, files('random-gcrypt.c')) +elif gnutls.found() + util_ss.add(gnutls, files('random-gnutls.c')) elif 'CONFIG_RNG_NONE' in config_host util_ss.add(files('random-none.c')) else util_ss.add(files('random-platform.c')) endif -if 'CONFIG_GCRYPT' in config_host - util_ss.add(gcrypt) -endif - -if 'CONFIG_GNUTLS' in config_host - util_ss.add(gnutls) -endif diff --git a/crypto/pbkdf-gnutls.c b/crypto/pbkdf-gnutls.c new file mode 100644 index 00000000000..2dfbbd382c2 --- /dev/null +++ b/crypto/pbkdf-gnutls.c @@ -0,0 +1,90 @@ +/* + * QEMU Crypto PBKDF support (Password-Based Key Derivation Function) + * + * Copyright (c) 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include +#include "qapi/error.h" +#include "crypto/pbkdf.h" + +bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash) +{ + switch (hash) { + case QCRYPTO_HASH_ALG_MD5: + case QCRYPTO_HASH_ALG_SHA1: + case QCRYPTO_HASH_ALG_SHA224: + case QCRYPTO_HASH_ALG_SHA256: + case QCRYPTO_HASH_ALG_SHA384: + case QCRYPTO_HASH_ALG_SHA512: + case QCRYPTO_HASH_ALG_RIPEMD160: + return true; + default: + return false; + } +} + +int qcrypto_pbkdf2(QCryptoHashAlgorithm hash, + const uint8_t *key, size_t nkey, + const uint8_t *salt, size_t nsalt, + uint64_t iterations, + uint8_t *out, size_t nout, + Error **errp) +{ + static const int hash_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5, + [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1, + [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_DIG_SHA224, + [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256, + [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_DIG_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_DIG_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_DIG_RMD160, + }; + int ret; + const gnutls_datum_t gkey = { (unsigned char *)key, nkey }; + const gnutls_datum_t gsalt = { (unsigned char *)salt, nsalt }; + + if (iterations > ULONG_MAX) { + error_setg_errno(errp, ERANGE, + "PBKDF iterations %llu must be less than %lu", + (long long unsigned)iterations, ULONG_MAX); + return -1; + } + + if (hash >= G_N_ELEMENTS(hash_map) || + hash_map[hash] == GNUTLS_DIG_UNKNOWN) { + error_setg_errno(errp, ENOSYS, + "PBKDF does not support hash algorithm %s", + QCryptoHashAlgorithm_str(hash)); + return -1; + } + + ret = gnutls_pbkdf2(hash_map[hash], + &gkey, + &gsalt, + iterations, + out, + nout); + if (ret != 0) { + error_setg(errp, "Cannot derive password: %s", + gnutls_strerror(ret)); + return -1; + } + + return 0; +} diff --git a/crypto/tls-cipher-suites.c b/crypto/tls-cipher-suites.c index 55fb5f7c19d..5e4f5974645 100644 --- a/crypto/tls-cipher-suites.c +++ b/crypto/tls-cipher-suites.c @@ -14,8 +14,15 @@ #include "crypto/tlscreds.h" #include "crypto/tls-cipher-suites.h" #include "hw/nvram/fw_cfg.h" +#include "tlscredspriv.h" #include "trace.h" +struct QCryptoTLSCipherSuites { + /* */ + QCryptoTLSCreds parent_obj; + /* */ +}; + /* * IANA registered TLS ciphers: * https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-4 diff --git a/crypto/tlscreds.c b/crypto/tlscreds.c index b68735f06fe..084ce0d51ae 100644 --- a/crypto/tlscreds.c +++ b/crypto/tlscreds.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qapi-types-crypto.h" #include "qemu/module.h" #include "tlscredspriv.h" #include "trace.h" @@ -259,6 +260,17 @@ qcrypto_tls_creds_finalize(Object *obj) g_free(creds->priority); } +bool qcrypto_tls_creds_check_endpoint(QCryptoTLSCreds *creds, + QCryptoTLSCredsEndpoint endpoint, + Error **errp) +{ + if (creds->endpoint != endpoint) { + error_setg(errp, "Expected TLS credentials for a %s endpoint", + QCryptoTLSCredsEndpoint_str(endpoint)); + return false; + } + return true; +} static const TypeInfo qcrypto_tls_creds_info = { .parent = TYPE_OBJECT, diff --git a/crypto/tlscredsanon.c b/crypto/tlscredsanon.c index bea5f76c55d..6fb83639ecd 100644 --- a/crypto/tlscredsanon.c +++ b/crypto/tlscredsanon.c @@ -29,6 +29,8 @@ #ifdef CONFIG_GNUTLS +#include + static int qcrypto_tls_creds_anon_load(QCryptoTLSCredsAnon *creds, diff --git a/crypto/tlscredspriv.h b/crypto/tlscredspriv.h index 39f1a91c413..df9815a2863 100644 --- a/crypto/tlscredspriv.h +++ b/crypto/tlscredspriv.h @@ -23,6 +23,51 @@ #include "crypto/tlscreds.h" +#ifdef CONFIG_GNUTLS +#include +#endif + +struct QCryptoTLSCreds { + Object parent_obj; + char *dir; + QCryptoTLSCredsEndpoint endpoint; +#ifdef CONFIG_GNUTLS + gnutls_dh_params_t dh_params; +#endif + bool verifyPeer; + char *priority; +}; + +struct QCryptoTLSCredsAnon { + QCryptoTLSCreds parent_obj; +#ifdef CONFIG_GNUTLS + union { + gnutls_anon_server_credentials_t server; + gnutls_anon_client_credentials_t client; + } data; +#endif +}; + +struct QCryptoTLSCredsPSK { + QCryptoTLSCreds parent_obj; + char *username; +#ifdef CONFIG_GNUTLS + union { + gnutls_psk_server_credentials_t server; + gnutls_psk_client_credentials_t client; + } data; +#endif +}; + +struct QCryptoTLSCredsX509 { + QCryptoTLSCreds parent_obj; +#ifdef CONFIG_GNUTLS + gnutls_certificate_credentials_t data; +#endif + bool sanityCheck; + char *passwordid; +}; + #ifdef CONFIG_GNUTLS int qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds, diff --git a/crypto/tlscredspsk.c b/crypto/tlscredspsk.c index f5a31108d15..752f2d92bee 100644 --- a/crypto/tlscredspsk.c +++ b/crypto/tlscredspsk.c @@ -29,6 +29,8 @@ #ifdef CONFIG_GNUTLS +#include + static int lookup_key(const char *pskfile, const char *username, gnutls_datum_t *key, Error **errp) diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c index bc503bab558..32948a6bdc4 100644 --- a/crypto/tlscredsx509.c +++ b/crypto/tlscredsx509.c @@ -30,6 +30,7 @@ #ifdef CONFIG_GNUTLS +#include #include @@ -354,11 +355,9 @@ qcrypto_tls_creds_check_cert_pair(gnutls_x509_crt_t cert, reason = "The certificate has been revoked"; } -#ifndef GNUTLS_1_0_COMPAT if (status & GNUTLS_CERT_INSECURE_ALGORITHM) { reason = "The certificate uses an insecure algorithm"; } -#endif error_setg(errp, "Our own certificate %s failed validation against %s: %s", diff --git a/crypto/tlssession.c b/crypto/tlssession.c index 33203e8ca71..a8db8c76d13 100644 --- a/crypto/tlssession.c +++ b/crypto/tlssession.c @@ -25,6 +25,7 @@ #include "crypto/tlscredsx509.h" #include "qapi/error.h" #include "authz/base.h" +#include "tlscredspriv.h" #include "trace.h" #ifdef CONFIG_GNUTLS diff --git a/crypto/trace-events b/crypto/trace-events index 798b6067ab0..bccd0bbf291 100644 --- a/crypto/trace-events +++ b/crypto/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # tlscreds.c qcrypto_tls_creds_load_dh(void *creds, const char *filename) "TLS creds load DH creds=%p filename=%s" diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak deleted file mode 100644 index 958b1e08e40..00000000000 --- a/default-configs/devices/aarch64-softmmu.mak +++ /dev/null @@ -1,8 +0,0 @@ -# Default configuration for aarch64-softmmu - -# We support all the 32 bit boards so need all their config -include arm-softmmu.mak - -CONFIG_XLNX_ZYNQMP_ARM=y -CONFIG_XLNX_VERSAL=y -CONFIG_SBSA_REF=y diff --git a/default-configs/devices/arm-softmmu.mak b/default-configs/devices/arm-softmmu.mak deleted file mode 100644 index 0500156a0c7..00000000000 --- a/default-configs/devices/arm-softmmu.mak +++ /dev/null @@ -1,46 +0,0 @@ -# Default configuration for arm-softmmu - -# TODO: ARM_V7M is currently always required - make this more flexible! -CONFIG_ARM_V7M=y - -# CONFIG_PCI_DEVICES=n -# CONFIG_TEST_DEVICES=n - -CONFIG_ARM_VIRT=y -CONFIG_CUBIEBOARD=y -CONFIG_EXYNOS4=y -CONFIG_HIGHBANK=y -CONFIG_INTEGRATOR=y -CONFIG_FSL_IMX31=y -CONFIG_MUSICPAL=y -CONFIG_MUSCA=y -CONFIG_CHEETAH=y -CONFIG_SX1=y -CONFIG_NSERIES=y -CONFIG_STELLARIS=y -CONFIG_REALVIEW=y -CONFIG_VERSATILE=y -CONFIG_VEXPRESS=y -CONFIG_ZYNQ=y -CONFIG_MAINSTONE=y -CONFIG_GUMSTIX=y -CONFIG_SPITZ=y -CONFIG_TOSA=y -CONFIG_Z2=y -CONFIG_NPCM7XX=y -CONFIG_COLLIE=y -CONFIG_ASPEED_SOC=y -CONFIG_NETDUINO2=y -CONFIG_NETDUINOPLUS2=y -CONFIG_MPS2=y -CONFIG_RASPI=y -CONFIG_DIGIC=y -CONFIG_SABRELITE=y -CONFIG_EMCRAFT_SF2=y -CONFIG_MICROBIT=y -CONFIG_FSL_IMX25=y -CONFIG_FSL_IMX7=y -CONFIG_FSL_IMX6UL=y -CONFIG_SEMIHOSTING=y -CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y -CONFIG_ALLWINNER_H3=y diff --git a/default-configs/devices/i386-softmmu.mak b/default-configs/devices/i386-softmmu.mak deleted file mode 100644 index 84d1a2487cd..00000000000 --- a/default-configs/devices/i386-softmmu.mak +++ /dev/null @@ -1,31 +0,0 @@ -# Default configuration for i386-softmmu - -# Uncomment the following lines to disable these optional devices: -# -#CONFIG_AMD_IOMMU=n -#CONFIG_APPLESMC=n -#CONFIG_FDC=n -#CONFIG_HPET=n -#CONFIG_HYPERV=n -#CONFIG_ISA_DEBUG=n -#CONFIG_ISA_IPMI_BT=n -#CONFIG_ISA_IPMI_KCS=n -#CONFIG_PCI_IPMI_KCS=n -#CONFIG_PCI_IPMI_BT=n -#CONFIG_IPMI_SSIF=n -#CONFIG_PCI_DEVICES=n -#CONFIG_PVPANIC=n -#CONFIG_QXL=n -#CONFIG_SEV=n -#CONFIG_SGA=n -#CONFIG_TEST_DEVICES=n -#CONFIG_TPM_CRB=n -#CONFIG_TPM_TIS_ISA=n -#CONFIG_VTD=n - -# Boards: -# -CONFIG_ISAPC=y -CONFIG_I440FX=y -CONFIG_Q35=y -CONFIG_MICROVM=y diff --git a/default-configs/devices/lm32-softmmu.mak b/default-configs/devices/lm32-softmmu.mak deleted file mode 100644 index 1bce3f6e8b6..00000000000 --- a/default-configs/devices/lm32-softmmu.mak +++ /dev/null @@ -1,12 +0,0 @@ -# Default configuration for lm32-softmmu - -# Uncomment the following lines to disable these optional devices: -# -#CONFIG_MILKYMIST_TMU2=n # disabling it actually causes compile-time failures - -CONFIG_SEMIHOSTING=y - -# Boards: -# -CONFIG_LM32_EVR=y -CONFIG_MILKYMIST=y diff --git a/default-configs/devices/microblazeel-softmmu.mak b/default-configs/devices/microblazeel-softmmu.mak deleted file mode 100644 index 2fcf442fc75..00000000000 --- a/default-configs/devices/microblazeel-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for microblazeel-softmmu - -include microblaze-softmmu.mak diff --git a/default-configs/devices/mips-softmmu-common.mak b/default-configs/devices/mips-softmmu-common.mak deleted file mode 100644 index a53bc898bfe..00000000000 --- a/default-configs/devices/mips-softmmu-common.mak +++ /dev/null @@ -1,42 +0,0 @@ -# Common mips*-softmmu CONFIG defines - -# CONFIG_SEMIHOSTING is always required on this architecture -CONFIG_SEMIHOSTING=y - -CONFIG_ISA_BUS=y -CONFIG_PCI=y -CONFIG_PCI_DEVICES=y -CONFIG_VGA_ISA=y -CONFIG_VGA_ISA_MM=y -CONFIG_VGA_CIRRUS=y -CONFIG_VMWARE_VGA=y -CONFIG_SERIAL=y -CONFIG_SERIAL_ISA=y -CONFIG_PARALLEL=y -CONFIG_I8254=y -CONFIG_PCSPK=y -CONFIG_PCKBD=y -CONFIG_FDC=y -CONFIG_ACPI=y -CONFIG_ACPI_X86=y -CONFIG_ACPI_MEMORY_HOTPLUG=y -CONFIG_ACPI_NVDIMM=y -CONFIG_ACPI_CPU_HOTPLUG=y -CONFIG_APM=y -CONFIG_I8257=y -CONFIG_PIIX4=y -CONFIG_IDE_ISA=y -CONFIG_IDE_PIIX=y -CONFIG_PFLASH_CFI01=y -CONFIG_I8259=y -CONFIG_MC146818RTC=y -CONFIG_EMPTY_SLOT=y -CONFIG_MIPS_CPS=y -CONFIG_MIPS_ITU=y -CONFIG_MALTA=y -CONFIG_PCNET_PCI=y -CONFIG_MIPSSIM=y -CONFIG_ACPI_SMBUS=y -CONFIG_SMBUS_EEPROM=y -CONFIG_TEST_DEVICES=y -CONFIG_VIRTIO_MMIO=y diff --git a/default-configs/devices/mips-softmmu.mak b/default-configs/devices/mips-softmmu.mak deleted file mode 100644 index 9fede6e00f8..00000000000 --- a/default-configs/devices/mips-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for mips-softmmu - -include mips-softmmu-common.mak diff --git a/default-configs/devices/mips64-softmmu.mak b/default-configs/devices/mips64-softmmu.mak deleted file mode 100644 index a169738635f..00000000000 --- a/default-configs/devices/mips64-softmmu.mak +++ /dev/null @@ -1,4 +0,0 @@ -# Default configuration for mips64-softmmu - -include mips-softmmu-common.mak -CONFIG_JAZZ=y diff --git a/default-configs/devices/mips64cheri128-softmmu.mak b/default-configs/devices/mips64cheri128-softmmu.mak deleted file mode 100644 index bbbc8cf747f..00000000000 --- a/default-configs/devices/mips64cheri128-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for cheri128-softmmu - -include mips64-softmmu.mak diff --git a/default-configs/devices/mips64el-softmmu.mak b/default-configs/devices/mips64el-softmmu.mak deleted file mode 100644 index 26c660a05c4..00000000000 --- a/default-configs/devices/mips64el-softmmu.mak +++ /dev/null @@ -1,15 +0,0 @@ -# Default configuration for mips64el-softmmu - -include mips-softmmu-common.mak -CONFIG_IDE_VIA=y -CONFIG_FULOONG=y -CONFIG_LOONGSON3V=y -CONFIG_ATI_VGA=y -CONFIG_RTL8139_PCI=y -CONFIG_JAZZ=y -CONFIG_VT82C686=y -CONFIG_AHCI=y -CONFIG_MIPS_BOSTON=y -CONFIG_FITLOADER=y -CONFIG_PCI_EXPRESS=y -CONFIG_PCI_EXPRESS_XILINX=y diff --git a/default-configs/devices/mipsel-softmmu.mak b/default-configs/devices/mipsel-softmmu.mak deleted file mode 100644 index a7f60594849..00000000000 --- a/default-configs/devices/mipsel-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for mipsel-softmmu - -include mips-softmmu-common.mak diff --git a/default-configs/devices/moxie-softmmu.mak b/default-configs/devices/moxie-softmmu.mak deleted file mode 100644 index bd50da3c58f..00000000000 --- a/default-configs/devices/moxie-softmmu.mak +++ /dev/null @@ -1,5 +0,0 @@ -# Default configuration for moxie-softmmu - -# Boards: -# -CONFIG_MOXIESIM=y diff --git a/default-configs/devices/ppc-softmmu.mak b/default-configs/devices/ppc-softmmu.mak deleted file mode 100644 index 61b78b844dd..00000000000 --- a/default-configs/devices/ppc-softmmu.mak +++ /dev/null @@ -1,18 +0,0 @@ -# Default configuration for ppc-softmmu - -# For embedded PPCs: -CONFIG_DS1338=y -CONFIG_E500=y -CONFIG_PPC405=y -CONFIG_PPC440=y -CONFIG_VIRTEX=y - -# For Sam460ex -CONFIG_SAM460EX=y - -# For Macs -CONFIG_MAC_OLDWORLD=y -CONFIG_MAC_NEWWORLD=y - -# For PReP -CONFIG_PREP=y diff --git a/default-configs/devices/ppc64-softmmu.mak b/default-configs/devices/ppc64-softmmu.mak deleted file mode 100644 index ae0841fa3a1..00000000000 --- a/default-configs/devices/ppc64-softmmu.mak +++ /dev/null @@ -1,11 +0,0 @@ -# Default configuration for ppc64-softmmu - -# Include all 32-bit boards -include ppc-softmmu.mak - -# For PowerNV -CONFIG_POWERNV=y - -# For pSeries -CONFIG_PSERIES=y -CONFIG_NVDIMM=y diff --git a/default-configs/devices/riscv32cheri-softmmu.mak b/default-configs/devices/riscv32cheri-softmmu.mak deleted file mode 100644 index ede744e1fec..00000000000 --- a/default-configs/devices/riscv32cheri-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for riscv32cheri-softmmu - -include riscv32-softmmu.mak diff --git a/default-configs/devices/riscv64-softmmu.mak b/default-configs/devices/riscv64-softmmu.mak deleted file mode 100644 index d5eec75f05e..00000000000 --- a/default-configs/devices/riscv64-softmmu.mak +++ /dev/null @@ -1,15 +0,0 @@ -# Default configuration for riscv64-softmmu - -# Uncomment the following lines to disable these optional devices: -# -#CONFIG_PCI_DEVICES=n -CONFIG_SEMIHOSTING=y -CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y - -# Boards: -# -CONFIG_SPIKE=y -CONFIG_SIFIVE_E=y -CONFIG_SIFIVE_U=y -CONFIG_RISCV_VIRT=y -CONFIG_MICROCHIP_PFSOC=y diff --git a/default-configs/devices/riscv64cheri-softmmu.mak b/default-configs/devices/riscv64cheri-softmmu.mak deleted file mode 100644 index fd6e4b93792..00000000000 --- a/default-configs/devices/riscv64cheri-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for riscv64cheri-softmmu - -include riscv64-softmmu.mak diff --git a/default-configs/devices/sh4eb-softmmu.mak b/default-configs/devices/sh4eb-softmmu.mak deleted file mode 100644 index 522a7a50fab..00000000000 --- a/default-configs/devices/sh4eb-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for sh4eb-softmmu - -include sh4-softmmu.mak diff --git a/default-configs/devices/tricore-softmmu.mak b/default-configs/devices/tricore-softmmu.mak deleted file mode 100644 index 5cc91cebce7..00000000000 --- a/default-configs/devices/tricore-softmmu.mak +++ /dev/null @@ -1 +0,0 @@ -CONFIG_TRIBOARD=y diff --git a/default-configs/devices/unicore32-softmmu.mak b/default-configs/devices/unicore32-softmmu.mak deleted file mode 100644 index 899288e3d71..00000000000 --- a/default-configs/devices/unicore32-softmmu.mak +++ /dev/null @@ -1,6 +0,0 @@ -# Default configuration for unicore32-softmmu - -# Boards: -# -CONFIG_PUV3=y -CONFIG_SEMIHOSTING=y diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak deleted file mode 100644 index 64b2ee2960e..00000000000 --- a/default-configs/devices/x86_64-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for x86_64-softmmu - -include i386-softmmu.mak diff --git a/default-configs/devices/xtensaeb-softmmu.mak b/default-configs/devices/xtensaeb-softmmu.mak deleted file mode 100644 index f7e48c750cd..00000000000 --- a/default-configs/devices/xtensaeb-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -# Default configuration for Xtensa - -include xtensa-softmmu.mak diff --git a/default-configs/targets/aarch64-linux-user.mak b/default-configs/targets/aarch64-linux-user.mak deleted file mode 100644 index 4713253709f..00000000000 --- a/default-configs/targets/aarch64-linux-user.mak +++ /dev/null @@ -1,5 +0,0 @@ -TARGET_ARCH=aarch64 -TARGET_BASE_ARCH=arm -TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml -TARGET_HAS_BFLT=y -CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/default-configs/targets/aarch64-softmmu.mak b/default-configs/targets/aarch64-softmmu.mak deleted file mode 100644 index 7703127674e..00000000000 --- a/default-configs/targets/aarch64-softmmu.mak +++ /dev/null @@ -1,5 +0,0 @@ -TARGET_ARCH=aarch64 -TARGET_BASE_ARCH=arm -TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml -TARGET_NEED_FDT=y diff --git a/default-configs/targets/aarch64_be-linux-user.mak b/default-configs/targets/aarch64_be-linux-user.mak deleted file mode 100644 index fae831558da..00000000000 --- a/default-configs/targets/aarch64_be-linux-user.mak +++ /dev/null @@ -1,6 +0,0 @@ -TARGET_ARCH=aarch64 -TARGET_BASE_ARCH=arm -TARGET_WORDS_BIGENDIAN=y -TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml -TARGET_HAS_BFLT=y -CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/default-configs/targets/arm-linux-user.mak b/default-configs/targets/arm-linux-user.mak deleted file mode 100644 index e741ffd4d30..00000000000 --- a/default-configs/targets/arm-linux-user.mak +++ /dev/null @@ -1,6 +0,0 @@ -TARGET_ARCH=arm -TARGET_SYSTBL_ABI=common,oabi -TARGET_SYSTBL=syscall.tbl -TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml -TARGET_HAS_BFLT=y -CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/default-configs/targets/arm-softmmu.mak b/default-configs/targets/arm-softmmu.mak deleted file mode 100644 index 84a98f48186..00000000000 --- a/default-configs/targets/arm-softmmu.mak +++ /dev/null @@ -1,4 +0,0 @@ -TARGET_ARCH=arm -TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml -TARGET_NEED_FDT=y diff --git a/default-configs/targets/armeb-linux-user.mak b/default-configs/targets/armeb-linux-user.mak deleted file mode 100644 index 255e44e8b0a..00000000000 --- a/default-configs/targets/armeb-linux-user.mak +++ /dev/null @@ -1,7 +0,0 @@ -TARGET_ARCH=arm -TARGET_SYSTBL_ABI=common,oabi -TARGET_SYSTBL=syscall.tbl -TARGET_WORDS_BIGENDIAN=y -TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml -TARGET_HAS_BFLT=y -CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/default-configs/targets/i386-softmmu.mak b/default-configs/targets/i386-softmmu.mak deleted file mode 100644 index 5babf71895d..00000000000 --- a/default-configs/targets/i386-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -TARGET_ARCH=i386 -TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/i386-32bit.xml diff --git a/default-configs/targets/lm32-softmmu.mak b/default-configs/targets/lm32-softmmu.mak deleted file mode 100644 index 55e7184a3db..00000000000 --- a/default-configs/targets/lm32-softmmu.mak +++ /dev/null @@ -1,2 +0,0 @@ -TARGET_ARCH=lm32 -TARGET_WORDS_BIGENDIAN=y diff --git a/default-configs/targets/moxie-softmmu.mak b/default-configs/targets/moxie-softmmu.mak deleted file mode 100644 index 183e6b0ebda..00000000000 --- a/default-configs/targets/moxie-softmmu.mak +++ /dev/null @@ -1,2 +0,0 @@ -TARGET_ARCH=moxie -TARGET_WORDS_BIGENDIAN=y diff --git a/default-configs/targets/sparc-bsd-user.mak b/default-configs/targets/sparc-bsd-user.mak deleted file mode 100644 index 9ba3d7b07f1..00000000000 --- a/default-configs/targets/sparc-bsd-user.mak +++ /dev/null @@ -1,3 +0,0 @@ -TARGET_ARCH=sparc -TARGET_ALIGNED_ONLY=y -TARGET_WORDS_BIGENDIAN=y diff --git a/default-configs/targets/sparc64-bsd-user.mak b/default-configs/targets/sparc64-bsd-user.mak deleted file mode 100644 index 8dd32178004..00000000000 --- a/default-configs/targets/sparc64-bsd-user.mak +++ /dev/null @@ -1,4 +0,0 @@ -TARGET_ARCH=sparc64 -TARGET_BASE_ARCH=sparc -TARGET_ALIGNED_ONLY=y -TARGET_WORDS_BIGENDIAN=y diff --git a/default-configs/targets/unicore32-softmmu.mak b/default-configs/targets/unicore32-softmmu.mak deleted file mode 100644 index 57331e94fe2..00000000000 --- a/default-configs/targets/unicore32-softmmu.mak +++ /dev/null @@ -1 +0,0 @@ -TARGET_ARCH=unicore32 diff --git a/default-configs/targets/x86_64-softmmu.mak b/default-configs/targets/x86_64-softmmu.mak deleted file mode 100644 index 75e42bc8404..00000000000 --- a/default-configs/targets/x86_64-softmmu.mak +++ /dev/null @@ -1,4 +0,0 @@ -TARGET_ARCH=x86_64 -TARGET_BASE_ARCH=i386 -TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/i386-64bit.xml diff --git a/default-configs/targets/xtensa-softmmu.mak b/default-configs/targets/xtensa-softmmu.mak deleted file mode 100644 index 26c0285655c..00000000000 --- a/default-configs/targets/xtensa-softmmu.mak +++ /dev/null @@ -1,3 +0,0 @@ -TARGET_ARCH=xtensa -TARGET_ALIGNED_ONLY=y -TARGET_SUPPORTS_MTTCG=y diff --git a/default-configs/targets/xtensaeb-softmmu.mak b/default-configs/targets/xtensaeb-softmmu.mak deleted file mode 100644 index 14cb9289a62..00000000000 --- a/default-configs/targets/xtensaeb-softmmu.mak +++ /dev/null @@ -1,4 +0,0 @@ -TARGET_ARCH=xtensa -TARGET_ALIGNED_ONLY=y -TARGET_WORDS_BIGENDIAN=y -TARGET_SUPPORTS_MTTCG=y diff --git a/disas.c b/disas.c index a0de01cc39a..29bc66bc9da 100644 --- a/disas.c +++ b/disas.c @@ -4,7 +4,6 @@ #include "elf.h" #include "qemu/qemu-print.h" -#include "cpu.h" #include "disas/disas.h" #include "disas/capstone.h" diff --git a/disas/arm-a64.cc b/disas/arm-a64.cc index 1cb15f6b823..4b3c91f2131 100644 --- a/disas/arm-a64.cc +++ b/disas/arm-a64.cc @@ -18,9 +18,7 @@ */ #include "qemu/osdep.h" -extern "C" { #include "disas/dis-asm.h" -} #include "vixl/aarch64/disasm-aarch64.h" diff --git a/disas/hexagon.c b/disas/hexagon.c index 3c24e2a94af..c1a4ffc5f6b 100644 --- a/disas/hexagon.c +++ b/disas/hexagon.c @@ -33,7 +33,7 @@ int print_insn_hexagon(bfd_vma memaddr, struct disassemble_info *info) { uint32_t words[PACKET_WORDS_MAX]; bool found_end = false; - GString *buf = g_string_sized_new(PACKET_BUFFER_LEN); + GString *buf; int i, len; for (i = 0; i < PACKET_WORDS_MAX && !found_end; i++) { @@ -57,6 +57,7 @@ int print_insn_hexagon(bfd_vma memaddr, struct disassemble_info *info) return PACKET_WORDS_MAX * sizeof(uint32_t); } + buf = g_string_sized_new(PACKET_BUFFER_LEN); len = disassemble_hexagon(words, i, memaddr, buf); (*info->fprintf_func)(info->stream, "%s", buf->str); g_string_free(buf, true); diff --git a/disas/libvixl/vixl/code-buffer-vixl.h b/disas/libvixl/vixl/code-buffer-vixl.h index a43c584367e..88138e4e327 100644 --- a/disas/libvixl/vixl/code-buffer-vixl.h +++ b/disas/libvixl/vixl/code-buffer-vixl.h @@ -28,9 +28,7 @@ #define VIXL_CODE_BUFFER_H #include - -#include "globals-vixl.h" -#include "utils-vixl.h" +#include "vixl/globals.h" namespace vixl { diff --git a/disas/libvixl/vixl/globals.h b/disas/libvixl/vixl/globals.h new file mode 100644 index 00000000000..3a71942f1e5 --- /dev/null +++ b/disas/libvixl/vixl/globals.h @@ -0,0 +1,155 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_GLOBALS_H +#define VIXL_GLOBALS_H + +// Get standard C99 macros for integer types. +#ifndef __STDC_CONSTANT_MACROS +#define __STDC_CONSTANT_MACROS +#endif + +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +extern "C" { +#include +#include +} + +#include +#include +#include +#include +#include + +#include "vixl/platform.h" + + +typedef uint8_t byte; + +// Type for half-precision (16 bit) floating point numbers. +typedef uint16_t float16; + +const int KBytes = 1024; +const int MBytes = 1024 * KBytes; + +#define VIXL_ABORT() \ + do { printf("in %s, line %i", __FILE__, __LINE__); abort(); } while (false) +#ifdef VIXL_DEBUG + #define VIXL_ASSERT(condition) assert(condition) + #define VIXL_CHECK(condition) VIXL_ASSERT(condition) + #define VIXL_UNIMPLEMENTED() \ + do { fprintf(stderr, "UNIMPLEMENTED\t"); VIXL_ABORT(); } while (false) + #define VIXL_UNREACHABLE() \ + do { fprintf(stderr, "UNREACHABLE\t"); VIXL_ABORT(); } while (false) +#else + #define VIXL_ASSERT(condition) ((void) 0) + #define VIXL_CHECK(condition) assert(condition) + #define VIXL_UNIMPLEMENTED() ((void) 0) + #define VIXL_UNREACHABLE() ((void) 0) +#endif +// This is not as powerful as template based assertions, but it is simple. +// It assumes that the descriptions are unique. If this starts being a problem, +// we can switch to a different implemention. +#define VIXL_CONCAT(a, b) a##b +#define VIXL_STATIC_ASSERT_LINE(line, condition) \ + typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \ + __attribute__((unused)) +#define VIXL_STATIC_ASSERT(condition) \ + VIXL_STATIC_ASSERT_LINE(__LINE__, condition) + +template +inline void USE(T1) {} + +template +inline void USE(T1, T2) {} + +template +inline void USE(T1, T2, T3) {} + +template +inline void USE(T1, T2, T3, T4) {} + +#define VIXL_ALIGNMENT_EXCEPTION() \ + do { fprintf(stderr, "ALIGNMENT EXCEPTION\t"); VIXL_ABORT(); } while (0) + +// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough +// argument to annotate intentional fall-through between switch labels. +// For more information please refer to: +// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough +#ifndef __has_warning + #define __has_warning(x) 0 +#endif + +// Fallthrough annotation for Clang and C++11(201103L). +#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L + #define VIXL_FALLTHROUGH() [[clang::fallthrough]] //NOLINT +// Fallthrough annotation for GCC >= 7. +#elif __GNUC__ >= 7 + #define VIXL_FALLTHROUGH() __attribute__((fallthrough)) +#else + #define VIXL_FALLTHROUGH() do {} while (0) +#endif + +#if __cplusplus >= 201103L + #define VIXL_NO_RETURN [[noreturn]] //NOLINT +#else + #define VIXL_NO_RETURN __attribute__((noreturn)) +#endif + +// Some functions might only be marked as "noreturn" for the DEBUG build. This +// macro should be used for such cases (for more details see what +// VIXL_UNREACHABLE expands to). +#ifdef VIXL_DEBUG + #define VIXL_DEBUG_NO_RETURN VIXL_NO_RETURN +#else + #define VIXL_DEBUG_NO_RETURN +#endif + +#ifdef VIXL_INCLUDE_SIMULATOR +#ifndef VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE + #define VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE 1 +#endif +#else +#ifndef VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE + #define VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE 0 +#endif +#if VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE + #warning "Generating Simulator instructions without Simulator support." +#endif +#endif + +#ifdef USE_SIMULATOR + #error "Please see the release notes for USE_SIMULATOR." +#endif + +#endif // VIXL_GLOBALS_H diff --git a/disas/libvixl/vixl/utils.cc b/disas/libvixl/vixl/utils.cc new file mode 100644 index 00000000000..69304d266d7 --- /dev/null +++ b/disas/libvixl/vixl/utils.cc @@ -0,0 +1,142 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "vixl/utils.h" +#include + +namespace vixl { + +uint32_t float_to_rawbits(float value) { + uint32_t bits = 0; + memcpy(&bits, &value, 4); + return bits; +} + + +uint64_t double_to_rawbits(double value) { + uint64_t bits = 0; + memcpy(&bits, &value, 8); + return bits; +} + + +float rawbits_to_float(uint32_t bits) { + float value = 0.0; + memcpy(&value, &bits, 4); + return value; +} + + +double rawbits_to_double(uint64_t bits) { + double value = 0.0; + memcpy(&value, &bits, 8); + return value; +} + + +uint32_t float_sign(float val) { + uint32_t rawbits = float_to_rawbits(val); + return unsigned_bitextract_32(31, 31, rawbits); +} + + +uint32_t float_exp(float val) { + uint32_t rawbits = float_to_rawbits(val); + return unsigned_bitextract_32(30, 23, rawbits); +} + + +uint32_t float_mantissa(float val) { + uint32_t rawbits = float_to_rawbits(val); + return unsigned_bitextract_32(22, 0, rawbits); +} + + +uint32_t double_sign(double val) { + uint64_t rawbits = double_to_rawbits(val); + return static_cast(unsigned_bitextract_64(63, 63, rawbits)); +} + + +uint32_t double_exp(double val) { + uint64_t rawbits = double_to_rawbits(val); + return static_cast(unsigned_bitextract_64(62, 52, rawbits)); +} + + +uint64_t double_mantissa(double val) { + uint64_t rawbits = double_to_rawbits(val); + return unsigned_bitextract_64(51, 0, rawbits); +} + + +float float_pack(uint32_t sign, uint32_t exp, uint32_t mantissa) { + uint32_t bits = (sign << 31) | (exp << 23) | mantissa; + return rawbits_to_float(bits); +} + + +double double_pack(uint64_t sign, uint64_t exp, uint64_t mantissa) { + uint64_t bits = (sign << 63) | (exp << 52) | mantissa; + return rawbits_to_double(bits); +} + + +int float16classify(float16 value) { + uint16_t exponent_max = (1 << 5) - 1; + uint16_t exponent_mask = exponent_max << 10; + uint16_t mantissa_mask = (1 << 10) - 1; + + uint16_t exponent = (value & exponent_mask) >> 10; + uint16_t mantissa = value & mantissa_mask; + if (exponent == 0) { + if (mantissa == 0) { + return FP_ZERO; + } + return FP_SUBNORMAL; + } else if (exponent == exponent_max) { + if (mantissa == 0) { + return FP_INFINITE; + } + return FP_NAN; + } + return FP_NORMAL; +} + + +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) { + VIXL_ASSERT((reg_size % 8) == 0); + int count = 0; + for (unsigned i = 0; i < (reg_size / 16); i++) { + if ((imm & 0xffff) == 0) { + count++; + } + imm >>= 16; + } + return count; +} + +} // namespace vixl diff --git a/disas/libvixl/vixl/utils.h b/disas/libvixl/vixl/utils.h new file mode 100644 index 00000000000..ecb0f1014ab --- /dev/null +++ b/disas/libvixl/vixl/utils.h @@ -0,0 +1,286 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_UTILS_H +#define VIXL_UTILS_H + +#include +#include +#include "vixl/globals.h" +#include "vixl/compiler-intrinsics.h" + +namespace vixl { + +// Macros for compile-time format checking. +#if GCC_VERSION_OR_NEWER(4, 4, 0) +#define PRINTF_CHECK(format_index, varargs_index) \ + __attribute__((format(gnu_printf, format_index, varargs_index))) +#else +#define PRINTF_CHECK(format_index, varargs_index) +#endif + +// Check number width. +inline bool is_intn(unsigned n, int64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + int64_t limit = INT64_C(1) << (n - 1); + return (-limit <= x) && (x < limit); +} + +inline bool is_uintn(unsigned n, int64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + return !(x >> n); +} + +inline uint32_t truncate_to_intn(unsigned n, int64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + return static_cast(x & ((INT64_C(1) << n) - 1)); +} + +#define INT_1_TO_63_LIST(V) \ +V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \ +V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \ +V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \ +V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32) \ +V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \ +V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \ +V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \ +V(57) V(58) V(59) V(60) V(61) V(62) V(63) + +#define DECLARE_IS_INT_N(N) \ +inline bool is_int##N(int64_t x) { return is_intn(N, x); } +#define DECLARE_IS_UINT_N(N) \ +inline bool is_uint##N(int64_t x) { return is_uintn(N, x); } +#define DECLARE_TRUNCATE_TO_INT_N(N) \ +inline uint32_t truncate_to_int##N(int x) { return truncate_to_intn(N, x); } +INT_1_TO_63_LIST(DECLARE_IS_INT_N) +INT_1_TO_63_LIST(DECLARE_IS_UINT_N) +INT_1_TO_63_LIST(DECLARE_TRUNCATE_TO_INT_N) +#undef DECLARE_IS_INT_N +#undef DECLARE_IS_UINT_N +#undef DECLARE_TRUNCATE_TO_INT_N + +// Bit field extraction. +inline uint32_t unsigned_bitextract_32(int msb, int lsb, uint32_t x) { + return (x >> lsb) & ((1 << (1 + msb - lsb)) - 1); +} + +inline uint64_t unsigned_bitextract_64(int msb, int lsb, uint64_t x) { + return (x >> lsb) & ((static_cast(1) << (1 + msb - lsb)) - 1); +} + +inline int32_t signed_bitextract_32(int msb, int lsb, int32_t x) { + return (x << (31 - msb)) >> (lsb + 31 - msb); +} + +inline int64_t signed_bitextract_64(int msb, int lsb, int64_t x) { + return (x << (63 - msb)) >> (lsb + 63 - msb); +} + +// Floating point representation. +uint32_t float_to_rawbits(float value); +uint64_t double_to_rawbits(double value); +float rawbits_to_float(uint32_t bits); +double rawbits_to_double(uint64_t bits); + +uint32_t float_sign(float val); +uint32_t float_exp(float val); +uint32_t float_mantissa(float val); +uint32_t double_sign(double val); +uint32_t double_exp(double val); +uint64_t double_mantissa(double val); + +float float_pack(uint32_t sign, uint32_t exp, uint32_t mantissa); +double double_pack(uint64_t sign, uint64_t exp, uint64_t mantissa); + +// An fpclassify() function for 16-bit half-precision floats. +int float16classify(float16 value); + +// NaN tests. +inline bool IsSignallingNaN(double num) { + const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); + uint64_t raw = double_to_rawbits(num); + if (std::isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +inline bool IsSignallingNaN(float num) { + const uint32_t kFP32QuietNaNMask = 0x00400000; + uint32_t raw = float_to_rawbits(num); + if (std::isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +inline bool IsSignallingNaN(float16 num) { + const uint16_t kFP16QuietNaNMask = 0x0200; + return (float16classify(num) == FP_NAN) && + ((num & kFP16QuietNaNMask) == 0); +} + + +template +inline bool IsQuietNaN(T num) { + return std::isnan(num) && !IsSignallingNaN(num); +} + + +// Convert the NaN in 'num' to a quiet NaN. +inline double ToQuietNaN(double num) { + const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); + VIXL_ASSERT(std::isnan(num)); + return rawbits_to_double(double_to_rawbits(num) | kFP64QuietNaNMask); +} + + +inline float ToQuietNaN(float num) { + const uint32_t kFP32QuietNaNMask = 0x00400000; + VIXL_ASSERT(std::isnan(num)); + return rawbits_to_float(float_to_rawbits(num) | kFP32QuietNaNMask); +} + + +// Fused multiply-add. +inline double FusedMultiplyAdd(double op1, double op2, double a) { + return fma(op1, op2, a); +} + + +inline float FusedMultiplyAdd(float op1, float op2, float a) { + return fmaf(op1, op2, a); +} + + +inline uint64_t LowestSetBit(uint64_t value) { + return value & -value; +} + + +template +inline int HighestSetBitPosition(T value) { + VIXL_ASSERT(value != 0); + return (sizeof(value) * 8 - 1) - CountLeadingZeros(value); +} + + +template +inline int WhichPowerOf2(V value) { + VIXL_ASSERT(IsPowerOf2(value)); + return CountTrailingZeros(value); +} + + +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); + + +template +T ReverseBits(T value) { + VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || + (sizeof(value) == 4) || (sizeof(value) == 8)); + T result = 0; + for (unsigned i = 0; i < (sizeof(value) * 8); i++) { + result = (result << 1) | (value & 1); + value >>= 1; + } + return result; +} + + +template +T ReverseBytes(T value, int block_bytes_log2) { + VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8)); + VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value)); + // Split the 64-bit value into an 8-bit array, where b[0] is the least + // significant byte, and b[7] is the most significant. + uint8_t bytes[8]; + uint64_t mask = UINT64_C(0xff00000000000000); + for (int i = 7; i >= 0; i--) { + bytes[i] = (static_cast(value) & mask) >> (i * 8); + mask >>= 8; + } + + // Permutation tables for REV instructions. + // permute_table[0] is used by REV16_x, REV16_w + // permute_table[1] is used by REV32_x, REV_w + // permute_table[2] is used by REV_x + VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4)); + static const uint8_t permute_table[3][8] = { {6, 7, 4, 5, 2, 3, 0, 1}, + {4, 5, 6, 7, 0, 1, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 7} }; + T result = 0; + for (int i = 0; i < 8; i++) { + result <<= 8; + result |= bytes[permute_table[block_bytes_log2 - 1][i]]; + } + return result; +} + + +// Pointer alignment +// TODO: rename/refactor to make it specific to instructions. +template +bool IsWordAligned(T pointer) { + VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t)); // NOLINT(runtime/sizeof) + return ((intptr_t)(pointer) & 3) == 0; +} + +// Increment a pointer (up to 64 bits) until it has the specified alignment. +template +T AlignUp(T pointer, size_t alignment) { + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + + uint64_t pointer_raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); + + size_t align_step = (alignment - pointer_raw) % alignment; + VIXL_ASSERT((pointer_raw + align_step) % alignment == 0); + + return (T)(pointer_raw + align_step); +} + +// Decrement a pointer (up to 64 bits) until it has the specified alignment. +template +T AlignDown(T pointer, size_t alignment) { + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + + uint64_t pointer_raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); + + size_t align_step = pointer_raw % alignment; + VIXL_ASSERT((pointer_raw - align_step) % alignment == 0); + + return (T)(pointer_raw - align_step); +} + +} // namespace vixl + +#endif // VIXL_UTILS_H diff --git a/disas/lm32.c b/disas/lm32.c deleted file mode 100644 index 4fbb1245348..00000000000 --- a/disas/lm32.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Simple LatticeMico32 disassembler. - * - * Copyright (c) 2012 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - */ - -#include "qemu/osdep.h" -#include "disas/dis-asm.h" - -typedef enum { - LM32_OP_SRUI = 0, LM32_OP_NORI, LM32_OP_MULI, LM32_OP_SH, LM32_OP_LB, - LM32_OP_SRI, LM32_OP_XORI, LM32_OP_LH, LM32_OP_ANDI, LM32_OP_XNORI, - LM32_OP_LW, LM32_OP_LHU, LM32_OP_SB, LM32_OP_ADDI, LM32_OP_ORI, - LM32_OP_SLI, LM32_OP_LBU, LM32_OP_BE, LM32_OP_BG, LM32_OP_BGE, - LM32_OP_BGEU, LM32_OP_BGU, LM32_OP_SW, LM32_OP_BNE, LM32_OP_ANDHI, - LM32_OP_CMPEI, LM32_OP_CMPGI, LM32_OP_CMPGEI, LM32_OP_CMPGEUI, - LM32_OP_CMPGUI, LM32_OP_ORHI, LM32_OP_CMPNEI, LM32_OP_SRU, LM32_OP_NOR, - LM32_OP_MUL, LM32_OP_DIVU, LM32_OP_RCSR, LM32_OP_SR, LM32_OP_XOR, - LM32_OP_ILL0, LM32_OP_AND, LM32_OP_XNOR, LM32_OP_ILL1, LM32_OP_SCALL, - LM32_OP_SEXTB, LM32_OP_ADD, LM32_OP_OR, LM32_OP_SL, LM32_OP_B, - LM32_OP_MODU, LM32_OP_SUB, LM32_OP_ILL2, LM32_OP_WCSR, LM32_OP_ILL3, - LM32_OP_CALL, LM32_OP_SEXTH, LM32_OP_BI, LM32_OP_CMPE, LM32_OP_CMPG, - LM32_OP_CMPGE, LM32_OP_CMPGEU, LM32_OP_CMPGU, LM32_OP_CALLI, LM32_OP_CMPNE, -} Lm32Opcode; - -typedef enum { - FMT_INVALID = 0, FMT_RRI5, FMT_RRI16, FMT_IMM26, FMT_LOAD, FMT_STORE, - FMT_RRR, FMT_R, FMT_RNR, FMT_CRN, FMT_CNR, FMT_BREAK, -} Lm32OpcodeFmt; - -typedef enum { - LM32_CSR_IE = 0, LM32_CSR_IM, LM32_CSR_IP, LM32_CSR_ICC, LM32_CSR_DCC, - LM32_CSR_CC, LM32_CSR_CFG, LM32_CSR_EBA, LM32_CSR_DC, LM32_CSR_DEBA, - LM32_CSR_CFG2, LM32_CSR_JTX = 0xe, LM32_CSR_JRX, LM32_CSR_BP0, - LM32_CSR_BP1, LM32_CSR_BP2, LM32_CSR_BP3, LM32_CSR_WP0 = 0x18, - LM32_CSR_WP1, LM32_CSR_WP2, LM32_CSR_WP3, -} Lm32CsrNum; - -typedef struct { - int csr; - const char *name; -} Lm32CsrInfo; - -static const Lm32CsrInfo lm32_csr_info[] = { - {LM32_CSR_IE, "ie", }, - {LM32_CSR_IM, "im", }, - {LM32_CSR_IP, "ip", }, - {LM32_CSR_ICC, "icc", }, - {LM32_CSR_DCC, "dcc", }, - {LM32_CSR_CC, "cc", }, - {LM32_CSR_CFG, "cfg", }, - {LM32_CSR_EBA, "eba", }, - {LM32_CSR_DC, "dc", }, - {LM32_CSR_DEBA, "deba", }, - {LM32_CSR_CFG2, "cfg2", }, - {LM32_CSR_JTX, "jtx", }, - {LM32_CSR_JRX, "jrx", }, - {LM32_CSR_BP0, "bp0", }, - {LM32_CSR_BP1, "bp1", }, - {LM32_CSR_BP2, "bp2", }, - {LM32_CSR_BP3, "bp3", }, - {LM32_CSR_WP0, "wp0", }, - {LM32_CSR_WP1, "wp1", }, - {LM32_CSR_WP2, "wp2", }, - {LM32_CSR_WP3, "wp3", }, -}; - -static const Lm32CsrInfo *find_csr_info(int csr) -{ - const Lm32CsrInfo *info; - int i; - - for (i = 0; i < ARRAY_SIZE(lm32_csr_info); i++) { - info = &lm32_csr_info[i]; - if (csr == info->csr) { - return info; - } - } - - return NULL; -} - -typedef struct { - int reg; - const char *name; -} Lm32RegInfo; - -typedef enum { - LM32_REG_R0 = 0, LM32_REG_R1, LM32_REG_R2, LM32_REG_R3, LM32_REG_R4, - LM32_REG_R5, LM32_REG_R6, LM32_REG_R7, LM32_REG_R8, LM32_REG_R9, - LM32_REG_R10, LM32_REG_R11, LM32_REG_R12, LM32_REG_R13, LM32_REG_R14, - LM32_REG_R15, LM32_REG_R16, LM32_REG_R17, LM32_REG_R18, LM32_REG_R19, - LM32_REG_R20, LM32_REG_R21, LM32_REG_R22, LM32_REG_R23, LM32_REG_R24, - LM32_REG_R25, LM32_REG_GP, LM32_REG_FP, LM32_REG_SP, LM32_REG_RA, - LM32_REG_EA, LM32_REG_BA, -} Lm32RegNum; - -static const Lm32RegInfo lm32_reg_info[] = { - {LM32_REG_R0, "r0", }, - {LM32_REG_R1, "r1", }, - {LM32_REG_R2, "r2", }, - {LM32_REG_R3, "r3", }, - {LM32_REG_R4, "r4", }, - {LM32_REG_R5, "r5", }, - {LM32_REG_R6, "r6", }, - {LM32_REG_R7, "r7", }, - {LM32_REG_R8, "r8", }, - {LM32_REG_R9, "r9", }, - {LM32_REG_R10, "r10", }, - {LM32_REG_R11, "r11", }, - {LM32_REG_R12, "r12", }, - {LM32_REG_R13, "r13", }, - {LM32_REG_R14, "r14", }, - {LM32_REG_R15, "r15", }, - {LM32_REG_R16, "r16", }, - {LM32_REG_R17, "r17", }, - {LM32_REG_R18, "r18", }, - {LM32_REG_R19, "r19", }, - {LM32_REG_R20, "r20", }, - {LM32_REG_R21, "r21", }, - {LM32_REG_R22, "r22", }, - {LM32_REG_R23, "r23", }, - {LM32_REG_R24, "r24", }, - {LM32_REG_R25, "r25", }, - {LM32_REG_GP, "gp", }, - {LM32_REG_FP, "fp", }, - {LM32_REG_SP, "sp", }, - {LM32_REG_RA, "ra", }, - {LM32_REG_EA, "ea", }, - {LM32_REG_BA, "ba", }, -}; - -static const Lm32RegInfo *find_reg_info(int reg) -{ - assert(ARRAY_SIZE(lm32_reg_info) == 32); - return &lm32_reg_info[reg & 0x1f]; -} - -typedef struct { - struct { - uint32_t code; - uint32_t mask; - } op; - const char *name; - const char *args_fmt; -} Lm32OpcodeInfo; - -static const Lm32OpcodeInfo lm32_opcode_info[] = { - /* pseudo instructions */ - {{0x34000000, 0xffffffff}, "nop", NULL}, - {{0xac000002, 0xffffffff}, "break", NULL}, - {{0xac000003, 0xffffffff}, "scall", NULL}, - {{0xc3e00000, 0xffffffff}, "bret", NULL}, - {{0xc3c00000, 0xffffffff}, "eret", NULL}, - {{0xc3a00000, 0xffffffff}, "ret", NULL}, - {{0xa4000000, 0xfc1f07ff}, "not", "%2, %0"}, - {{0xb8000000, 0xfc1f07ff}, "mv", "%2, %0"}, - {{0x71e00000, 0xffe00000}, "mvhi", "%1, %u"}, - {{0x34000000, 0xffe00000}, "mvi", "%1, %s"}, - -#define _O(op) {op << 26, 0x3f << 26} - /* regular opcodes */ - {_O(LM32_OP_ADD), "add", "%2, %0, %1" }, - {_O(LM32_OP_ADDI), "addi", "%1, %0, %s" }, - {_O(LM32_OP_AND), "and", "%2, %0, %1" }, - {_O(LM32_OP_ANDHI), "andhi", "%1, %0, %u" }, - {_O(LM32_OP_ANDI), "andi", "%1, %0, %u" }, - {_O(LM32_OP_B), "b", "%0", }, - {_O(LM32_OP_BE), "be", "%1, %0, %r" }, - {_O(LM32_OP_BG), "bg", "%1, %0, %r" }, - {_O(LM32_OP_BGE), "bge", "%1, %0, %r" }, - {_O(LM32_OP_BGEU), "bgeu", "%1, %0, %r" }, - {_O(LM32_OP_BGU), "bgu", "%1, %0, %r" }, - {_O(LM32_OP_BI), "bi", "%R", }, - {_O(LM32_OP_BNE), "bne", "%1, %0, %r" }, - {_O(LM32_OP_CALL), "call", "%0", }, - {_O(LM32_OP_CALLI), "calli", "%R", }, - {_O(LM32_OP_CMPE), "cmpe", "%2, %0, %1" }, - {_O(LM32_OP_CMPEI), "cmpei", "%1, %0, %s" }, - {_O(LM32_OP_CMPG), "cmpg", "%2, %0, %1" }, - {_O(LM32_OP_CMPGE), "cmpge", "%2, %0, %1" }, - {_O(LM32_OP_CMPGEI), "cmpgei", "%1, %0, %s" }, - {_O(LM32_OP_CMPGEU), "cmpgeu", "%2, %0, %1" }, - {_O(LM32_OP_CMPGEUI), "cmpgeui", "%1, %0, %s" }, - {_O(LM32_OP_CMPGI), "cmpgi", "%1, %0, %s" }, - {_O(LM32_OP_CMPGU), "cmpgu", "%2, %0, %1" }, - {_O(LM32_OP_CMPGUI), "cmpgui", "%1, %0, %s" }, - {_O(LM32_OP_CMPNE), "cmpne", "%2, %0, %1" }, - {_O(LM32_OP_CMPNEI), "cmpnei", "%1, %0, %s" }, - {_O(LM32_OP_DIVU), "divu", "%2, %0, %1" }, - {_O(LM32_OP_LB), "lb", "%1, (%0+%s)" }, - {_O(LM32_OP_LBU), "lbu", "%1, (%0+%s)" }, - {_O(LM32_OP_LH), "lh", "%1, (%0+%s)" }, - {_O(LM32_OP_LHU), "lhu", "%1, (%0+%s)" }, - {_O(LM32_OP_LW), "lw", "%1, (%0+%s)" }, - {_O(LM32_OP_MODU), "modu", "%2, %0, %1" }, - {_O(LM32_OP_MULI), "muli", "%1, %0, %s" }, - {_O(LM32_OP_MUL), "mul", "%2, %0, %1" }, - {_O(LM32_OP_NORI), "nori", "%1, %0, %u" }, - {_O(LM32_OP_NOR), "nor", "%2, %0, %1" }, - {_O(LM32_OP_ORHI), "orhi", "%1, %0, %u" }, - {_O(LM32_OP_ORI), "ori", "%1, %0, %u" }, - {_O(LM32_OP_OR), "or", "%2, %0, %1" }, - {_O(LM32_OP_RCSR), "rcsr", "%2, %c", }, - {_O(LM32_OP_SB), "sb", "(%0+%s), %1" }, - {_O(LM32_OP_SEXTB), "sextb", "%2, %0", }, - {_O(LM32_OP_SEXTH), "sexth", "%2, %0", }, - {_O(LM32_OP_SH), "sh", "(%0+%s), %1" }, - {_O(LM32_OP_SLI), "sli", "%1, %0, %h" }, - {_O(LM32_OP_SL), "sl", "%2, %0, %1" }, - {_O(LM32_OP_SRI), "sri", "%1, %0, %h" }, - {_O(LM32_OP_SR), "sr", "%2, %0, %1" }, - {_O(LM32_OP_SRUI), "srui", "%1, %0, %d" }, - {_O(LM32_OP_SRU), "sru", "%2, %0, %s" }, - {_O(LM32_OP_SUB), "sub", "%2, %0, %s" }, - {_O(LM32_OP_SW), "sw", "(%0+%s), %1" }, - {_O(LM32_OP_WCSR), "wcsr", "%c, %1", }, - {_O(LM32_OP_XNORI), "xnori", "%1, %0, %u" }, - {_O(LM32_OP_XNOR), "xnor", "%2, %0, %1" }, - {_O(LM32_OP_XORI), "xori", "%1, %0, %u" }, - {_O(LM32_OP_XOR), "xor", "%2, %0, %1" }, -#undef _O -}; - -static const Lm32OpcodeInfo *find_opcode_info(uint32_t opcode) -{ - const Lm32OpcodeInfo *info; - int i; - for (i = 0; i < ARRAY_SIZE(lm32_opcode_info); i++) { - info = &lm32_opcode_info[i]; - if ((opcode & info->op.mask) == info->op.code) { - return info; - } - } - - return NULL; -} - -int print_insn_lm32(bfd_vma memaddr, struct disassemble_info *info) -{ - fprintf_function fprintf_fn = info->fprintf_func; - void *stream = info->stream; - int rc; - uint8_t insn[4]; - const Lm32OpcodeInfo *opc_info; - uint32_t op; - const char *args_fmt; - - rc = info->read_memory_func(memaddr, insn, 4, info); - if (rc != 0) { - info->memory_error_func(rc, memaddr, info); - return -1; - } - - fprintf_fn(stream, "%02x %02x %02x %02x ", - insn[0], insn[1], insn[2], insn[3]); - - op = bfd_getb32(insn); - opc_info = find_opcode_info(op); - if (opc_info) { - fprintf_fn(stream, "%-8s ", opc_info->name); - args_fmt = opc_info->args_fmt; - while (args_fmt && *args_fmt) { - if (*args_fmt == '%') { - switch (*(++args_fmt)) { - case '0': { - uint8_t r0; - const char *r0_name; - r0 = (op >> 21) & 0x1f; - r0_name = find_reg_info(r0)->name; - fprintf_fn(stream, "%s", r0_name); - break; - } - case '1': { - uint8_t r1; - const char *r1_name; - r1 = (op >> 16) & 0x1f; - r1_name = find_reg_info(r1)->name; - fprintf_fn(stream, "%s", r1_name); - break; - } - case '2': { - uint8_t r2; - const char *r2_name; - r2 = (op >> 11) & 0x1f; - r2_name = find_reg_info(r2)->name; - fprintf_fn(stream, "%s", r2_name); - break; - } - case 'c': { - uint8_t csr; - const Lm32CsrInfo *info; - csr = (op >> 21) & 0x1f; - info = find_csr_info(csr); - if (info) { - fprintf_fn(stream, "%s", info->name); - } else { - fprintf_fn(stream, "0x%x", csr); - } - break; - } - case 'u': { - uint16_t u16; - u16 = op & 0xffff; - fprintf_fn(stream, "0x%x", u16); - break; - } - case 's': { - int16_t s16; - s16 = (int16_t)(op & 0xffff); - fprintf_fn(stream, "%d", s16); - break; - } - case 'r': { - uint32_t rela; - rela = memaddr + (((int16_t)(op & 0xffff)) << 2); - fprintf_fn(stream, "%x", rela); - break; - } - case 'R': { - uint32_t rela; - int32_t imm26; - imm26 = (int32_t)((op & 0x3ffffff) << 6) >> 4; - rela = memaddr + imm26; - fprintf_fn(stream, "%x", rela); - break; - } - case 'h': { - uint8_t u5; - u5 = (op & 0x1f); - fprintf_fn(stream, "%d", u5); - break; - } - default: - break; - } - } else { - fprintf_fn(stream, "%c", *args_fmt); - } - args_fmt++; - } - } else { - fprintf_fn(stream, ".word 0x%x", op); - } - - return 4; -} diff --git a/disas/meson.build b/disas/meson.build index 4c8da018778..449f99e1de6 100644 --- a/disas/meson.build +++ b/disas/meson.build @@ -9,11 +9,9 @@ common_ss.add(when: 'CONFIG_CRIS_DIS', if_true: files('cris.c')) common_ss.add(when: 'CONFIG_HEXAGON_DIS', if_true: files('hexagon.c')) common_ss.add(when: 'CONFIG_HPPA_DIS', if_true: files('hppa.c')) common_ss.add(when: 'CONFIG_I386_DIS', if_true: files('i386.c')) -common_ss.add(when: 'CONFIG_LM32_DIS', if_true: files('lm32.c')) common_ss.add(when: 'CONFIG_M68K_DIS', if_true: files('m68k.c')) common_ss.add(when: 'CONFIG_MICROBLAZE_DIS', if_true: files('microblaze.c')) common_ss.add(when: 'CONFIG_MIPS_DIS', if_true: files('mips.c')) -common_ss.add(when: 'CONFIG_MOXIE_DIS', if_true: files('moxie.c')) common_ss.add(when: 'CONFIG_NANOMIPS_DIS', if_true: files('nanomips.cpp')) common_ss.add(when: 'CONFIG_NIOS2_DIS', if_true: files('nios2.c')) common_ss.add(when: 'CONFIG_PPC_DIS', if_true: files('ppc.c')) diff --git a/disas/moxie.c b/disas/moxie.c deleted file mode 100644 index e94ab4c33d8..00000000000 --- a/disas/moxie.c +++ /dev/null @@ -1,360 +0,0 @@ -/* Disassemble moxie instructions. - Copyright (c) 2009 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -#include "qemu/osdep.h" -#define STATIC_TABLE -#define DEFINE_TABLE - -#include "disas/dis-asm.h" - -static void *stream; - -/* Form 1 instructions come in different flavors: - - Some have no arguments (MOXIE_F1_NARG) - Some only use the A operand (MOXIE_F1_A) - Some use A and B registers (MOXIE_F1_AB) - Some use A and consume a 4 byte immediate value (MOXIE_F1_A4) - Some use just a 4 byte immediate value (MOXIE_F1_4) - Some use just a 4 byte memory address (MOXIE_F1_M) - Some use B and an indirect A (MOXIE_F1_AiB) - Some use A and an indirect B (MOXIE_F1_ABi) - Some consume a 4 byte immediate value and use X (MOXIE_F1_4A) - Some use B and an indirect A plus 4 bytes (MOXIE_F1_AiB4) - Some use A and an indirect B plus 4 bytes (MOXIE_F1_ABi4) - - Form 2 instructions also come in different flavors: - - Some have no arguments (MOXIE_F2_NARG) - Some use the A register and an 8-bit value (MOXIE_F2_A8V) - - Form 3 instructions also come in different flavors: - - Some have no arguments (MOXIE_F3_NARG) - Some have a 10-bit PC relative operand (MOXIE_F3_PCREL). */ - -#define MOXIE_F1_NARG 0x100 -#define MOXIE_F1_A 0x101 -#define MOXIE_F1_AB 0x102 -/* #define MOXIE_F1_ABC 0x103 */ -#define MOXIE_F1_A4 0x104 -#define MOXIE_F1_4 0x105 -#define MOXIE_F1_AiB 0x106 -#define MOXIE_F1_ABi 0x107 -#define MOXIE_F1_4A 0x108 -#define MOXIE_F1_AiB4 0x109 -#define MOXIE_F1_ABi4 0x10a -#define MOXIE_F1_M 0x10b - -#define MOXIE_F2_NARG 0x200 -#define MOXIE_F2_A8V 0x201 - -#define MOXIE_F3_NARG 0x300 -#define MOXIE_F3_PCREL 0x301 - -typedef struct moxie_opc_info_t { - short opcode; - unsigned itype; - const char * name; -} moxie_opc_info_t; - -extern const moxie_opc_info_t moxie_form1_opc_info[64]; -extern const moxie_opc_info_t moxie_form2_opc_info[4]; -extern const moxie_opc_info_t moxie_form3_opc_info[16]; - -/* The moxie processor's 16-bit instructions come in two forms: - - FORM 1 instructions start with a 0 bit... - - 0oooooooaaaabbbb - 0 F - - ooooooo - form 1 opcode number - aaaa - operand A - bbbb - operand B - - FORM 2 instructions start with bits "10"... - - 10ooaaaavvvvvvvv - 0 F - - oo - form 2 opcode number - aaaa - operand A - vvvvvvvv - 8-bit immediate value - - FORM 3 instructions start with a bits "11"... - - 11oooovvvvvvvvvv - 0 F - - oooo - form 3 opcode number - vvvvvvvvvv - 10-bit immediate value. */ - -const moxie_opc_info_t moxie_form1_opc_info[64] = - { - { 0x00, MOXIE_F1_NARG, "nop" }, - { 0x01, MOXIE_F1_A4, "ldi.l" }, - { 0x02, MOXIE_F1_AB, "mov" }, - { 0x03, MOXIE_F1_M, "jsra" }, - { 0x04, MOXIE_F1_NARG, "ret" }, - { 0x05, MOXIE_F1_AB, "add.l" }, - { 0x06, MOXIE_F1_AB, "push" }, - { 0x07, MOXIE_F1_AB, "pop" }, - { 0x08, MOXIE_F1_A4, "lda.l" }, - { 0x09, MOXIE_F1_4A, "sta.l" }, - { 0x0a, MOXIE_F1_ABi, "ld.l" }, - { 0x0b, MOXIE_F1_AiB, "st.l" }, - { 0x0c, MOXIE_F1_ABi4, "ldo.l" }, - { 0x0d, MOXIE_F1_AiB4, "sto.l" }, - { 0x0e, MOXIE_F1_AB, "cmp" }, - { 0x0f, MOXIE_F1_NARG, "bad" }, - { 0x10, MOXIE_F1_NARG, "bad" }, - { 0x11, MOXIE_F1_NARG, "bad" }, - { 0x12, MOXIE_F1_NARG, "bad" }, - { 0x13, MOXIE_F1_NARG, "bad" }, - { 0x14, MOXIE_F1_NARG, "bad" }, - { 0x15, MOXIE_F1_NARG, "bad" }, - { 0x16, MOXIE_F1_NARG, "bad" }, - { 0x17, MOXIE_F1_NARG, "bad" }, - { 0x18, MOXIE_F1_NARG, "bad" }, - { 0x19, MOXIE_F1_A, "jsr" }, - { 0x1a, MOXIE_F1_M, "jmpa" }, - { 0x1b, MOXIE_F1_A4, "ldi.b" }, - { 0x1c, MOXIE_F1_ABi, "ld.b" }, - { 0x1d, MOXIE_F1_A4, "lda.b" }, - { 0x1e, MOXIE_F1_AiB, "st.b" }, - { 0x1f, MOXIE_F1_4A, "sta.b" }, - { 0x20, MOXIE_F1_A4, "ldi.s" }, - { 0x21, MOXIE_F1_ABi, "ld.s" }, - { 0x22, MOXIE_F1_A4, "lda.s" }, - { 0x23, MOXIE_F1_AiB, "st.s" }, - { 0x24, MOXIE_F1_4A, "sta.s" }, - { 0x25, MOXIE_F1_A, "jmp" }, - { 0x26, MOXIE_F1_AB, "and" }, - { 0x27, MOXIE_F1_AB, "lshr" }, - { 0x28, MOXIE_F1_AB, "ashl" }, - { 0x29, MOXIE_F1_AB, "sub.l" }, - { 0x2a, MOXIE_F1_AB, "neg" }, - { 0x2b, MOXIE_F1_AB, "or" }, - { 0x2c, MOXIE_F1_AB, "not" }, - { 0x2d, MOXIE_F1_AB, "ashr" }, - { 0x2e, MOXIE_F1_AB, "xor" }, - { 0x2f, MOXIE_F1_AB, "mul.l" }, - { 0x30, MOXIE_F1_4, "swi" }, - { 0x31, MOXIE_F1_AB, "div.l" }, - { 0x32, MOXIE_F1_AB, "udiv.l" }, - { 0x33, MOXIE_F1_AB, "mod.l" }, - { 0x34, MOXIE_F1_AB, "umod.l" }, - { 0x35, MOXIE_F1_NARG, "brk" }, - { 0x36, MOXIE_F1_ABi4, "ldo.b" }, - { 0x37, MOXIE_F1_AiB4, "sto.b" }, - { 0x38, MOXIE_F1_ABi4, "ldo.s" }, - { 0x39, MOXIE_F1_AiB4, "sto.s" }, - { 0x3a, MOXIE_F1_NARG, "bad" }, - { 0x3b, MOXIE_F1_NARG, "bad" }, - { 0x3c, MOXIE_F1_NARG, "bad" }, - { 0x3d, MOXIE_F1_NARG, "bad" }, - { 0x3e, MOXIE_F1_NARG, "bad" }, - { 0x3f, MOXIE_F1_NARG, "bad" } - }; - -const moxie_opc_info_t moxie_form2_opc_info[4] = - { - { 0x00, MOXIE_F2_A8V, "inc" }, - { 0x01, MOXIE_F2_A8V, "dec" }, - { 0x02, MOXIE_F2_A8V, "gsr" }, - { 0x03, MOXIE_F2_A8V, "ssr" } - }; - -const moxie_opc_info_t moxie_form3_opc_info[16] = - { - { 0x00, MOXIE_F3_PCREL,"beq" }, - { 0x01, MOXIE_F3_PCREL,"bne" }, - { 0x02, MOXIE_F3_PCREL,"blt" }, - { 0x03, MOXIE_F3_PCREL,"bgt" }, - { 0x04, MOXIE_F3_PCREL,"bltu" }, - { 0x05, MOXIE_F3_PCREL,"bgtu" }, - { 0x06, MOXIE_F3_PCREL,"bge" }, - { 0x07, MOXIE_F3_PCREL,"ble" }, - { 0x08, MOXIE_F3_PCREL,"bgeu" }, - { 0x09, MOXIE_F3_PCREL,"bleu" }, - { 0x0a, MOXIE_F3_NARG, "bad" }, - { 0x0b, MOXIE_F3_NARG, "bad" }, - { 0x0c, MOXIE_F3_NARG, "bad" }, - { 0x0d, MOXIE_F3_NARG, "bad" }, - { 0x0e, MOXIE_F3_NARG, "bad" }, - { 0x0f, MOXIE_F3_NARG, "bad" } - }; - -/* Macros to extract operands from the instruction word. */ -#define OP_A(i) ((i >> 4) & 0xf) -#define OP_B(i) (i & 0xf) -#define INST2OFFSET(o) ((((signed short)((o & ((1<<10)-1))<<6))>>6)<<1) - -static const char * reg_names[16] = - { "$fp", "$sp", "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", - "$r6", "$r7", "$r8", "$r9", "$r10", "$r11", "$r12", "$r13" }; - -int -print_insn_moxie(bfd_vma addr, struct disassemble_info * info) -{ - int length = 2; - int status; - stream = info->stream; - const moxie_opc_info_t * opcode; - bfd_byte buffer[4]; - unsigned short iword; - fprintf_function fpr = info->fprintf_func; - - if ((status = info->read_memory_func(addr, buffer, 2, info))) - goto fail; - iword = (bfd_getb16(buffer) >> 16); - - /* Form 1 instructions have the high bit set to 0. */ - if ((iword & (1<<15)) == 0) { - /* Extract the Form 1 opcode. */ - opcode = &moxie_form1_opc_info[iword >> 8]; - switch (opcode->itype) { - case MOXIE_F1_NARG: - fpr(stream, "%s", opcode->name); - break; - case MOXIE_F1_A: - fpr(stream, "%s\t%s", opcode->name, - reg_names[OP_A(iword)]); - break; - case MOXIE_F1_AB: - fpr(stream, "%s\t%s, %s", opcode->name, - reg_names[OP_A(iword)], - reg_names[OP_B(iword)]); - break; - case MOXIE_F1_A4: - { - unsigned imm; - if ((status = info->read_memory_func(addr + 2, buffer, 4, info))) - goto fail; - imm = bfd_getb32(buffer); - fpr(stream, "%s\t%s, 0x%x", opcode->name, - reg_names[OP_A(iword)], imm); - length = 6; - } - break; - case MOXIE_F1_4: - { - unsigned imm; - if ((status = info->read_memory_func(addr + 2, buffer, 4, info))) - goto fail; - imm = bfd_getb32(buffer); - fpr(stream, "%s\t0x%x", opcode->name, imm); - length = 6; - } - break; - case MOXIE_F1_M: - { - unsigned imm; - if ((status = info->read_memory_func(addr + 2, buffer, 4, info))) - goto fail; - imm = bfd_getb32(buffer); - fpr(stream, "%s\t", opcode->name); - info->print_address_func((bfd_vma) imm, info); - length = 6; - } - break; - case MOXIE_F1_AiB: - fpr (stream, "%s\t(%s), %s", opcode->name, - reg_names[OP_A(iword)], reg_names[OP_B(iword)]); - break; - case MOXIE_F1_ABi: - fpr(stream, "%s\t%s, (%s)", opcode->name, - reg_names[OP_A(iword)], reg_names[OP_B(iword)]); - break; - case MOXIE_F1_4A: - { - unsigned imm; - if ((status = info->read_memory_func(addr + 2, buffer, 4, info))) - goto fail; - imm = bfd_getb32(buffer); - fpr(stream, "%s\t0x%x, %s", - opcode->name, imm, reg_names[OP_A(iword)]); - length = 6; - } - break; - case MOXIE_F1_AiB4: - { - unsigned imm; - if ((status = info->read_memory_func(addr+2, buffer, 4, info))) - goto fail; - imm = bfd_getb32(buffer); - fpr(stream, "%s\t0x%x(%s), %s", opcode->name, - imm, - reg_names[OP_A(iword)], - reg_names[OP_B(iword)]); - length = 6; - } - break; - case MOXIE_F1_ABi4: - { - unsigned imm; - if ((status = info->read_memory_func(addr+2, buffer, 4, info))) - goto fail; - imm = bfd_getb32(buffer); - fpr(stream, "%s\t%s, 0x%x(%s)", - opcode->name, - reg_names[OP_A(iword)], - imm, - reg_names[OP_B(iword)]); - length = 6; - } - break; - default: - abort(); - } - } - else if ((iword & (1<<14)) == 0) { - /* Extract the Form 2 opcode. */ - opcode = &moxie_form2_opc_info[(iword >> 12) & 3]; - switch (opcode->itype) { - case MOXIE_F2_A8V: - fpr(stream, "%s\t%s, 0x%x", - opcode->name, - reg_names[(iword >> 8) & 0xf], - iword & ((1 << 8) - 1)); - break; - case MOXIE_F2_NARG: - fpr(stream, "%s", opcode->name); - break; - default: - abort(); - } - } else { - /* Extract the Form 3 opcode. */ - opcode = &moxie_form3_opc_info[(iword >> 10) & 15]; - switch (opcode->itype) { - case MOXIE_F3_PCREL: - fpr(stream, "%s\t", opcode->name); - info->print_address_func((bfd_vma) (addr + INST2OFFSET(iword) + 2), - info); - break; - default: - abort(); - } - } - - return length; - - fail: - info->memory_error_func(status, addr, info); - return -1; -} diff --git a/disas/nanomips.cpp b/disas/nanomips.cpp index 8ddef897f0d..9be8df75dd6 100644 --- a/disas/nanomips.cpp +++ b/disas/nanomips.cpp @@ -28,9 +28,7 @@ */ #include "qemu/osdep.h" -extern "C" { #include "disas/dis-asm.h" -} #include #include diff --git a/disas/nios2.c b/disas/nios2.c index c3e82140c79..98ac07d72e9 100644 --- a/disas/nios2.c +++ b/disas/nios2.c @@ -3478,54 +3478,37 @@ nios2_disassemble (bfd_vma address, unsigned long opcode, instruction word at the address given, and prints the disassembled instruction on the stream info->stream using info->fprintf_func. */ -static int -print_insn_nios2 (bfd_vma address, disassemble_info *info, - enum bfd_endian endianness) +int print_insn_nios2(bfd_vma address, disassemble_info *info) { - bfd_byte buffer[INSNLEN]; - int status; - - status = (*info->read_memory_func) (address, buffer, INSNLEN, info); - if (status == 0) - { - unsigned long insn; - if (endianness == BFD_ENDIAN_BIG) - insn = (unsigned long) bfd_getb32 (buffer); - else - insn = (unsigned long) bfd_getl32 (buffer); - return nios2_disassemble (address, insn, info); + bfd_byte buffer[INSNLEN]; + int status; + + status = (*info->read_memory_func)(address, buffer, INSNLEN, info); + if (status == 0) { + unsigned long insn; + if (info->endian == BFD_ENDIAN_BIG) { + insn = (unsigned long) bfd_getb32(buffer); + } else { + insn = (unsigned long) bfd_getl32(buffer); + } + return nios2_disassemble(address, insn, info); } - /* We might have a 16-bit R2 instruction at the end of memory. Try that. */ - if (info->mach == bfd_mach_nios2r2) - { - status = (*info->read_memory_func) (address, buffer, 2, info); - if (status == 0) - { - unsigned long insn; - if (endianness == BFD_ENDIAN_BIG) - insn = (unsigned long) bfd_getb16 (buffer); - else - insn = (unsigned long) bfd_getl16 (buffer); - return nios2_disassemble (address, insn, info); - } + /* We might have a 16-bit R2 instruction at the end of memory. Try that. */ + if (info->mach == bfd_mach_nios2r2) { + status = (*info->read_memory_func)(address, buffer, 2, info); + if (status == 0) { + unsigned long insn; + if (info->endian == BFD_ENDIAN_BIG) { + insn = (unsigned long) bfd_getb16(buffer); + } else { + insn = (unsigned long) bfd_getl16(buffer); + } + return nios2_disassemble(address, insn, info); + } } - /* If we got here, we couldn't read anything. */ - (*info->memory_error_func) (status, address, info); - return -1; -} - -/* These two functions are the main entry points, accessed from - disassemble.c. */ -int -print_insn_big_nios2 (bfd_vma address, disassemble_info *info) -{ - return print_insn_nios2 (address, info, BFD_ENDIAN_BIG); -} - -int -print_insn_little_nios2 (bfd_vma address, disassemble_info *info) -{ - return print_insn_nios2 (address, info, BFD_ENDIAN_LITTLE); + /* If we got here, we couldn't read anything. */ + (*info->memory_error_func)(status, address, info); + return -1; } diff --git a/disas/riscv.c b/disas/riscv.c index bccc2c1aac3..ba73dbaf70f 100644 --- a/disas/riscv.c +++ b/disas/riscv.c @@ -478,8 +478,51 @@ typedef enum { rv_op_fsflags = 316, rv_op_fsrmi = 317, rv_op_fsflagsi = 318, + rv_op_bseti = 319, + rv_op_bclri = 320, + rv_op_binvi = 321, + rv_op_bexti = 322, + rv_op_rori = 323, + rv_op_clz = 324, + rv_op_ctz = 325, + rv_op_cpop = 326, + rv_op_sext_h = 327, + rv_op_sext_b = 328, + rv_op_xnor = 329, + rv_op_orn = 330, + rv_op_andn = 331, + rv_op_rol = 332, + rv_op_ror = 333, + rv_op_sh1add = 334, + rv_op_sh2add = 335, + rv_op_sh3add = 336, + rv_op_sh1add_uw = 337, + rv_op_sh2add_uw = 338, + rv_op_sh3add_uw = 339, + rv_op_clmul = 340, + rv_op_clmulr = 341, + rv_op_clmulh = 342, + rv_op_min = 343, + rv_op_minu = 344, + rv_op_max = 345, + rv_op_maxu = 346, + rv_op_clzw = 347, + rv_op_ctzw = 348, + rv_op_cpopw = 349, + rv_op_slli_uw = 350, + rv_op_add_uw = 351, + rv_op_rolw = 352, + rv_op_rorw = 353, + rv_op_rev8 = 354, + rv_op_zext_h = 355, + rv_op_roriw = 356, + rv_op_orc_b = 357, + rv_op_bset = 358, + rv_op_bclr = 359, + rv_op_binv = 360, + rv_op_bext = 361, // CHERI: - rv_op_auipcc = 319, + rv_op_auipcc = 362, rv_op_lc, rv_op_clc, rv_op_clb, @@ -545,7 +588,6 @@ typedef enum { rv_op_cfsw, rv_op_cfld, rv_op_cfsd, - } rv_op; /* structures */ @@ -1209,6 +1251,49 @@ const rv_opcode_data opcode_data[] = { { "fsflags", rv_codec_i_csr, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, { "fsrmi", rv_codec_i_csr, rv_fmt_rd_zimm, NULL, 0, 0, 0 }, { "fsflagsi", rv_codec_i_csr, rv_fmt_rd_zimm, NULL, 0, 0, 0 }, + { "bseti", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 }, + { "bclri", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 }, + { "binvi", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 }, + { "bexti", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 }, + { "rori", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 }, + { "clz", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "ctz", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "cpop", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "sext.h", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "sext.b", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "xnor", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "orn", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "andn", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "rol", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "ror", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "sh1add", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "sh2add", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "sh3add", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "sh1add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "sh2add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "sh3add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "clmul", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "clmulr", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "clmulh", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "min", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "minu", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "max", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "maxu", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "cpopw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "slli.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "rolw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "rorw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "rev8", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "zext.h", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "roriw", rv_codec_i_sh5, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 }, + { "orc.b", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "bset", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "bclr", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "binv", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "bext", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, // CHERI extensions [rv_op_auipcc] = { "auipcc", rv_codec_u, rv_fmt_cd_offset, NULL, 0, 0, 0 }, @@ -1746,7 +1831,20 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags) case 0: op = rv_op_addi; break; case 1: switch (((inst >> 27) & 0b11111)) { - case 0: op = rv_op_slli; break; + case 0b00000: op = rv_op_slli; break; + case 0b00101: op = rv_op_bseti; break; + case 0b01001: op = rv_op_bclri; break; + case 0b01101: op = rv_op_binvi; break; + case 0b01100: + switch (((inst >> 20) & 0b1111111)) { + case 0b0000000: op = rv_op_clz; break; + case 0b0000001: op = rv_op_ctz; break; + case 0b0000010: op = rv_op_cpop; break; + /* 0b0000011 */ + case 0b0000100: op = rv_op_sext_b; break; + case 0b0000101: op = rv_op_sext_h; break; + } + break; } break; case 2: op = rv_op_slti; break; @@ -1754,8 +1852,16 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags) case 4: op = rv_op_xori; break; case 5: switch (((inst >> 27) & 0b11111)) { - case 0: op = rv_op_srli; break; - case 8: op = rv_op_srai; break; + case 0b00000: op = rv_op_srli; break; + case 0b00101: op = rv_op_orc_b; break; + case 0b01000: op = rv_op_srai; break; + case 0b01001: op = rv_op_bexti; break; + case 0b01100: op = rv_op_rori; break; + case 0b01101: + switch ((inst >> 20) & 0b1111111) { + case 0b0111000: op = rv_op_rev8; break; + } + break; } break; case 6: op = rv_op_ori; break; @@ -1771,12 +1877,21 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags) case 1: switch (((inst >> 25) & 0b1111111)) { case 0: op = rv_op_slliw; break; + case 4: op = rv_op_slli_uw; break; + case 48: + switch ((inst >> 20) & 0b11111) { + case 0b00000: op = rv_op_clzw; break; + case 0b00001: op = rv_op_ctzw; break; + case 0b00010: op = rv_op_cpopw; break; + } + break; } break; case 5: switch (((inst >> 25) & 0b1111111)) { case 0: op = rv_op_srliw; break; case 32: op = rv_op_sraiw; break; + case 48: op = rv_op_roriw; break; } break; } @@ -1876,8 +1991,32 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags) case 13: op = rv_op_divu; break; case 14: op = rv_op_rem; break; case 15: op = rv_op_remu; break; + case 36: + switch ((inst >> 20) & 0b11111) { + case 0: op = rv_op_zext_h; break; + } + break; + case 41: op = rv_op_clmul; break; + case 42: op = rv_op_clmulr; break; + case 43: op = rv_op_clmulh; break; + case 44: op = rv_op_min; break; + case 45: op = rv_op_minu; break; + case 46: op = rv_op_max; break; + case 47: op = rv_op_maxu; break; + case 130: op = rv_op_sh1add; break; + case 132: op = rv_op_sh2add; break; + case 134: op = rv_op_sh3add; break; + case 161: op = rv_op_bset; break; case 256: op = rv_op_sub; break; + case 260: op = rv_op_xnor; break; case 261: op = rv_op_sra; break; + case 262: op = rv_op_orn; break; + case 263: op = rv_op_andn; break; + case 289: op = rv_op_bclr; break; + case 293: op = rv_op_bext; break; + case 385: op = rv_op_rol; break; + case 386: op = rv_op_ror; break; + case 417: op = rv_op_binv; break; } break; case 13: op = rv_op_lui; break; @@ -1891,8 +2030,19 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags) case 13: op = rv_op_divuw; break; case 14: op = rv_op_remw; break; case 15: op = rv_op_remuw; break; + case 32: op = rv_op_add_uw; break; + case 36: + switch ((inst >> 20) & 0b11111) { + case 0: op = rv_op_zext_h; break; + } + break; + case 130: op = rv_op_sh1add_uw; break; + case 132: op = rv_op_sh2add_uw; break; + case 134: op = rv_op_sh3add_uw; break; case 256: op = rv_op_subw; break; case 261: op = rv_op_sraw; break; + case 385: op = rv_op_rolw; break; + case 389: op = rv_op_rorw; break; } break; case 16: diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index 8d6d53a5a2c..8ec653f81cf 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -209,9 +209,9 @@ children.0=childs0 \ 3. On Secondary VM's QEMU monitor, issue command -{'execute':'qmp_capabilities'} -{'execute': 'nbd-server-start', 'arguments': {'addr': {'type': 'inet', 'data': {'host': '0.0.0.0', 'port': '9999'} } } } -{'execute': 'nbd-server-add', 'arguments': {'device': 'parent0', 'writable': true } } +{"execute":"qmp_capabilities"} +{"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } } +{"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } } Note: a. The qmp command nbd-server-start and nbd-server-add must be run @@ -222,11 +222,11 @@ Note: will be merged into the parent disk on failover. 4. On Primary VM's QEMU monitor, issue command: -{'execute':'qmp_capabilities'} -{'execute': 'human-monitor-command', 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}} -{'execute': 'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': 'replication0' } } -{'execute': 'migrate-set-capabilities', 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } } -{'execute': 'migrate', 'arguments': {'uri': 'tcp:127.0.0.2:9998' } } +{"execute":"qmp_capabilities"} +{"execute": "human-monitor-command", "arguments": {"command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}} +{"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node": "replication0" } } +{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } } +{"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } } Note: a. There should be only one NBD Client for each primary disk. @@ -249,59 +249,59 @@ if you want to resume the replication, follow "Secondary resume replication" == Primary Failover == The Secondary died, resume on the Primary -{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'child': 'children.1'} } -{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_del replication0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'comp0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'iothread1' } } -{'execute': 'object-del', 'arguments':{ 'id': 'm0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'redire0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'redire1' } } -{'execute': 'x-colo-lost-heartbeat' } +{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "child": "children.1"} } +{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_del replication0" } } +{"execute": "object-del", "arguments":{ "id": "comp0" } } +{"execute": "object-del", "arguments":{ "id": "iothread1" } } +{"execute": "object-del", "arguments":{ "id": "m0" } } +{"execute": "object-del", "arguments":{ "id": "redire0" } } +{"execute": "object-del", "arguments":{ "id": "redire1" } } +{"execute": "x-colo-lost-heartbeat" } == Secondary Failover == The Primary died, resume on the Secondary and prepare to become the new Primary -{'execute': 'nbd-server-stop'} -{'execute': 'x-colo-lost-heartbeat'} +{"execute": "nbd-server-stop"} +{"execute": "x-colo-lost-heartbeat"} -{'execute': 'object-del', 'arguments':{ 'id': 'f2' } } -{'execute': 'object-del', 'arguments':{ 'id': 'f1' } } -{'execute': 'chardev-remove', 'arguments':{ 'id': 'red1' } } -{'execute': 'chardev-remove', 'arguments':{ 'id': 'red0' } } +{"execute": "object-del", "arguments":{ "id": "f2" } } +{"execute": "object-del", "arguments":{ "id": "f1" } } +{"execute": "chardev-remove", "arguments":{ "id": "red1" } } +{"execute": "chardev-remove", "arguments":{ "id": "red0" } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'mirror0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9003' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare1', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9004' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0-0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': false } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': false } } } } +{"execute": "chardev-add", "arguments":{ "id": "mirror0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", "port": "9003" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare1", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", "port": "9004" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9001" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare0-0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9001" } }, "server": false } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare_out", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9005" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare_out0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9005" } }, "server": false } } } } == Primary resume replication == Resume replication after new Secondary is up. Start the new Secondary (Steps 2 and 3 above), then on the Primary: -{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.2:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} } +{"execute": "drive-mirror", "arguments":{ "device": "colo-disk0", "job-id": "resync", "target": "nbd://127.0.0.2:9999/parent0", "mode": "existing", "format": "raw", "sync": "full"} } Wait until disk is synced, then: -{'execute': 'stop'} -{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync'} } +{"execute": "stop"} +{"execute": "block-job-cancel", "arguments":{ "device": "resync"} } -{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}} -{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } } +{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}} +{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "node": "replication0" } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-mirror", "id": "m0", "netdev": "hn0", "queue": "tx", "outdev": "mirror0" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire0", "netdev": "hn0", "queue": "rx", "indev": "compare_out" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire1", "netdev": "hn0", "queue": "rx", "outdev": "compare0" } } +{"execute": "object-add", "arguments":{ "qom-type": "iothread", "id": "iothread1" } } +{"execute": "object-add", "arguments":{ "qom-type": "colo-compare", "id": "comp0", "primary_in": "compare0-0", "secondary_in": "compare1", "outdev": "compare_out0", "iothread": "iothread1" } } -{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } } -{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.2:9998' } } +{"execute": "migrate-set-capabilities", "arguments":{ "capabilities": [ {"capability": "x-colo", "state": true } ] } } +{"execute": "migrate", "arguments":{ "uri": "tcp:127.0.0.2:9998" } } Note: If this Primary previously was a Secondary, then we need to insert the filters before the filter-rewriter by using the -"'insert': 'before', 'position': 'id=rew0'" Options. See below. +""insert": "before", "position": "id=rew0"" Options. See below. == Secondary resume replication == Become Primary and resume replication after new Secondary is up. Note @@ -309,23 +309,23 @@ that now 127.0.0.1 is the Secondary and 127.0.0.2 is the Primary. Start the new Secondary (Steps 2 and 3 above, but with primary_ip=127.0.0.2), then on the old Secondary: -{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.1:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} } +{"execute": "drive-mirror", "arguments":{ "device": "colo-disk0", "job-id": "resync", "target": "nbd://127.0.0.1:9999/parent0", "mode": "existing", "format": "raw", "sync": "full"} } Wait until disk is synced, then: -{'execute': 'stop'} -{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync' } } +{"execute": "stop"} +{"execute": "block-job-cancel", "arguments":{ "device": "resync" } } -{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0'}} -{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } } +{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0"}} +{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "node": "replication0" } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-mirror", "id": "m0", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "tx", "outdev": "mirror0" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire0", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "rx", "indev": "compare_out" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire1", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "rx", "outdev": "compare0" } } +{"execute": "object-add", "arguments":{ "qom-type": "iothread", "id": "iothread1" } } +{"execute": "object-add", "arguments":{ "qom-type": "colo-compare", "id": "comp0", "primary_in": "compare0-0", "secondary_in": "compare1", "outdev": "compare_out0", "iothread": "iothread1" } } -{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } } -{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.1:9998' } } +{"execute": "migrate-set-capabilities", "arguments":{ "capabilities": [ {"capability": "x-colo", "state": true } ] } } +{"execute": "migrate", "arguments":{ "uri": "tcp:127.0.0.1:9998" } } == TODO == 1. Support shared storage. diff --git a/docs/_templates/editpage.html b/docs/_templates/editpage.html deleted file mode 100644 index 4319b0f5ac8..00000000000 --- a/docs/_templates/editpage.html +++ /dev/null @@ -1,5 +0,0 @@ -
- -
diff --git a/docs/_templates/footer.html b/docs/_templates/footer.html new file mode 100644 index 00000000000..977053b5415 --- /dev/null +++ b/docs/_templates/footer.html @@ -0,0 +1,14 @@ +{% extends "!footer.html" %} +{% block extrafooter %} + + +

+ +

This documentation is for QEMU version {{ version }}.

+ +{% trans path=pathto('about/license') %} +

QEMU and this manual are released under the +GNU General Public License, version 2.

+{% endtrans %} +{{ super() }} +{% endblock %} diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst new file mode 100644 index 00000000000..c29a4b8fe64 --- /dev/null +++ b/docs/about/build-platforms.rst @@ -0,0 +1,97 @@ +.. _Supported-build-platforms: + +Supported build platforms +========================= + +QEMU aims to support building and executing on multiple host OS +platforms. This appendix outlines which platforms are the major build +targets. These platforms are used as the basis for deciding upon the +minimum required versions of 3rd party software QEMU depends on. The +supported platforms are the targets for automated testing performed by +the project when patches are submitted for review, and tested before and +after merge. + +If a platform is not listed here, it does not imply that QEMU won't +work. If an unlisted platform has comparable software versions to a +listed platform, there is every expectation that it will work. Bug +reports are welcome for problems encountered on unlisted platforms +unless they are clearly older vintage than what is described here. + +Note that when considering software versions shipped in distros as +support targets, QEMU considers only the version number, and assumes the +features in that distro match the upstream release with the same +version. In other words, if a distro backports extra features to the +software in their distro, QEMU upstream code will not add explicit +support for those backports, unless the feature is auto-detectable in a +manner that works for the upstream releases too. + +The `Repology`_ site is a useful resource to identify +currently shipped versions of software in various operating systems, +though it does not cover all distros listed below. + +Supported host architectures +---------------------------- + +Those hosts are officially supported, with various accelerators: + + .. list-table:: + :header-rows: 1 + + * - CPU Architecture + - Accelerators + * - Arm + - kvm (64 bit only), tcg, xen + * - MIPS + - kvm, tcg + * - PPC + - kvm, tcg + * - RISC-V + - tcg + * - s390x + - kvm, tcg + * - SPARC + - tcg + * - x86 + - hax, hvf (64 bit only), kvm, nvmm, tcg, whpx (64 bit only), xen + +Other host architectures are not supported. It is possible to build QEMU system +emulation on an unsupported host architecture using the configure +``--enable-tcg-interpreter`` option to enable the TCI support, but note that +this is very slow and is not recommended for normal use. QEMU user emulation +requires host-specific support for signal handling, therefore TCI won't help +on unsupported host architectures. + +Non-supported architectures may be removed in the future following the +:ref:`deprecation process`. + +Linux OS, macOS, FreeBSD, NetBSD, OpenBSD +----------------------------------------- + +The project aims to support the most recent major version at all times. Support +for the previous major version will be dropped 2 years after the new major +version is released or when the vendor itself drops support, whichever comes +first. In this context, third-party efforts to extend the lifetime of a distro +are not considered, even when they are endorsed by the vendor (eg. Debian LTS). + +For the purposes of identifying supported software versions available on Linux, +the project will look at CentOS, Debian, Fedora, openSUSE, RHEL, SLES and +Ubuntu LTS. Other distros will be assumed to ship similar software versions. + +For FreeBSD and OpenBSD, decisions will be made based on the contents of the +respective ports repository, while NetBSD will use the pkgsrc repository. + +For macOS, `HomeBrew`_ will be used, although `MacPorts`_ is expected to carry +similar versions. + +Windows +------- + +The project supports building with current versions of the MinGW toolchain, +hosted on Linux (Debian/Fedora). + +The version of the Windows API that's currently targeted is Vista / Server +2008. + +.. _HomeBrew: https://brew.sh/ +.. _MacPorts: https://www.macports.org/ +.. _Repology: https://repology.org/ diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst new file mode 100644 index 00000000000..ff7488cb63b --- /dev/null +++ b/docs/about/deprecated.rst @@ -0,0 +1,440 @@ +.. _Deprecated features: + +Deprecated features +=================== + +In general features are intended to be supported indefinitely once +introduced into QEMU. In the event that a feature needs to be removed, +it will be listed in this section. The feature will remain functional for the +release in which it was deprecated and one further release. After these two +releases, the feature is liable to be removed. Deprecated features may also +generate warnings on the console when QEMU starts up, or if activated via a +monitor command, however, this is not a mandatory requirement. + +Prior to the 2.10.0 release there was no official policy on how +long features would be deprecated prior to their removal, nor +any documented list of which features were deprecated. Thus +any features deprecated prior to 2.10.0 will be treated as if +they were first deprecated in the 2.10.0 release. + +What follows is a list of all features currently marked as +deprecated. + +System emulator command line arguments +-------------------------------------- + +``QEMU_AUDIO_`` environment variables and ``-audio-help`` (since 4.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``-audiodev`` argument is now the preferred way to specify audio +backend settings instead of environment variables. To ease migration to +the new format, the ``-audiodev-help`` option can be used to convert +the current values of the environment variables to ``-audiodev`` options. + +Creating sound card devices and vnc without ``audiodev=`` property (since 4.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +When not using the deprecated legacy audio config, each sound card +should specify an ``audiodev=`` property. Additionally, when using +vnc, you should specify an ``audiodev=`` property if you plan to +transmit audio through the VNC protocol. + +Creating sound card devices using ``-soundhw`` (since 5.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Sound card devices should be created using ``-device`` instead. The +names are the same for most devices. The exceptions are ``hda`` which +needs two devices (``-device intel-hda -device hda-duplex``) and +``pcspk`` which can be activated using ``-machine +pcspk-audiodev=``. + +``-chardev`` backend aliases ``tty`` and ``parport`` (since 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +``tty`` and ``parport`` are aliases that will be removed. Instead, the +actual backend names ``serial`` and ``parallel`` should be used. + +Short-form boolean options (since 6.0) +'''''''''''''''''''''''''''''''''''''' + +Boolean options such as ``share=on``/``share=off`` could be written +in short form as ``share`` and ``noshare``. This is now deprecated +and will cause a warning. + +``delay`` option for socket character devices (since 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The replacement for the ``nodelay`` short-form boolean option is ``nodelay=on`` +rather than ``delay=off``. + +``--enable-fips`` (since 6.0) +''''''''''''''''''''''''''''' + +This option restricts usage of certain cryptographic algorithms when +the host is operating in FIPS mode. + +If FIPS compliance is required, QEMU should be built with the ``libgcrypt`` +library enabled as a cryptography provider. + +Neither the ``nettle`` library, or the built-in cryptography provider are +supported on FIPS enabled hosts. + +``-writeconfig`` (since 6.0) +''''''''''''''''''''''''''''' + +The ``-writeconfig`` option is not able to serialize the entire contents +of the QEMU command line. It is thus considered a failed experiment +and deprecated, with no current replacement. + +Userspace local APIC with KVM (x86, since 6.0) +'''''''''''''''''''''''''''''''''''''''''''''' + +Using ``-M kernel-irqchip=off`` with x86 machine types that include a local +APIC is deprecated. The ``split`` setting is supported, as is using +``-M kernel-irqchip=off`` with the ISA PC machine type. + +hexadecimal sizes with scaling multipliers (since 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Input parameters that take a size value should only use a size suffix +(such as 'k' or 'M') when the base is written in decimal, and not when +the value is hexadecimal. That is, '0x20M' is deprecated, and should +be written either as '32M' or as '0x2000000'. + +``-spice password=string`` (since 6.0) +'''''''''''''''''''''''''''''''''''''' + +This option is insecure because the SPICE password remains visible in +the process listing. This is replaced by the new ``password-secret`` +option which lets the password be securely provided on the command +line using a ``secret`` object instance. + +``opened`` property of ``rng-*`` objects (since 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''' + +The only effect of specifying ``opened=on`` in the command line or QMP +``object-add`` is that the device is opened immediately, possibly before all +other options have been processed. This will either have no effect (if +``opened`` was the last option) or cause errors. The property is therefore +useless and should not be specified. + +``loaded`` property of ``secret`` and ``secret_keyring`` objects (since 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The only effect of specifying ``loaded=on`` in the command line or QMP +``object-add`` is that the secret is loaded immediately, possibly before all +other options have been processed. This will either have no effect (if +``loaded`` was the last option) or cause options to be effectively ignored as +if they were not given. The property is therefore useless and should not be +specified. + +``-display sdl,window_close=...`` (since 6.1) +''''''''''''''''''''''''''''''''''''''''''''' + +Use ``-display sdl,window-close=...`` instead (i.e. with a minus instead of +an underscore between "window" and "close"). + +``-no-quit`` (since 6.1) +'''''''''''''''''''''''' + +The ``-no-quit`` is a synonym for ``-display ...,window-close=off`` which +should be used instead. + +``-alt-grab`` and ``-display sdl,alt_grab=on`` (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``-display sdl,grab-mod=lshift-lctrl-lalt`` instead. + +``-ctrl-grab`` and ``-display sdl,ctrl_grab=on`` (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``-display sdl,grab-mod=rctrl`` instead. + +``-sdl`` (since 6.2) +'''''''''''''''''''' + +Use ``-display sdl`` instead. + +``-curses`` (since 6.2) +''''''''''''''''''''''' + +Use ``-display curses`` instead. + +``-watchdog`` (since 6.2) +''''''''''''''''''''''''' + +Use ``-device`` instead. + +``-smp`` ("parameter=0" SMP configurations) (since 6.2) +''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specified CPU topology parameters must be greater than zero. + +In the SMP configuration, users should either provide a CPU topology +parameter with a reasonable value (greater than zero) or just omit it +and QEMU will compute the missing value. + +However, historically it was implicitly allowed for users to provide +a parameter with zero value, which is meaningless and could also possibly +cause unexpected results in the -smp parsing. So support for this kind of +configurations (e.g. -smp 8,sockets=0) is deprecated since 6.2 and will +be removed in the near future, users have to ensure that all the topology +members described with -smp are greater than zero. + +Plugin argument passing through ``arg=`` (since 6.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Passing TCG plugins arguments through ``arg=`` is redundant is makes the +command-line less readable, especially when the argument itself consist of a +name and a value, e.g. ``-plugin plugin_name,arg="arg_name=arg_value"``. +Therefore, the usage of ``arg`` is redundant. Single-word arguments are treated +as short-form boolean values, and passed to plugins as ``arg_name=on``. +However, short-form booleans are deprecated and full explicit ``arg_name=on`` +form is preferred. + +``-drive if=none`` for the sifive_u OTP device (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Using ``-drive if=none`` to configure the OTP device of the sifive_u +RISC-V machine is deprecated. Use ``-drive if=pflash`` instead. + + +QEMU Machine Protocol (QMP) commands +------------------------------------ + +``blockdev-open-tray``, ``blockdev-close-tray`` argument ``device`` (since 2.8) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use argument ``id`` instead. + +``eject`` argument ``device`` (since 2.8) +''''''''''''''''''''''''''''''''''''''''' + +Use argument ``id`` instead. + +``blockdev-change-medium`` argument ``device`` (since 2.8) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use argument ``id`` instead. + +``block_set_io_throttle`` argument ``device`` (since 2.8) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use argument ``id`` instead. + +``blockdev-add`` empty string argument ``backing`` (since 2.10) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use argument value ``null`` instead. + +``block-commit`` arguments ``base`` and ``top`` (since 3.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use arguments ``base-node`` and ``top-node`` instead. + +``nbd-server-add`` and ``nbd-server-remove`` (since 5.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use the more generic commands ``block-export-add`` and ``block-export-del`` +instead. As part of this deprecation, where ``nbd-server-add`` used a +single ``bitmap``, the new ``block-export-add`` uses a list of ``bitmaps``. + +``query-qmp-schema`` return value member ``values`` (since 6.2) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Member ``values`` in return value elements with meta-type ``enum`` is +deprecated. Use ``members`` instead. + +``drive-backup`` (since 6.2) +'''''''''''''''''''''''''''' + +Use ``blockdev-backup`` in combination with ``blockdev-add`` instead. +This change primarily separates the creation/opening process of the backup +target with explicit, separate steps. ``blockdev-backup`` uses mostly the +same arguments as ``drive-backup``, except the ``format`` and ``mode`` +options are removed in favor of using explicit ``blockdev-create`` and +``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for +details. + +Incorrectly typed ``device_add`` arguments (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Due to shortcomings in the internal implementation of ``device_add``, QEMU +incorrectly accepts certain invalid arguments: Any object or list arguments are +silently ignored. Other argument types are not checked, but an implicit +conversion happens, so that e.g. string values can be assigned to integer +device properties or vice versa. + +This is a bug in QEMU that will be fixed in the future so that previously +accepted incorrect commands will return an error. Users should make sure that +all arguments passed to ``device_add`` are consistent with the documented +property types. + +System accelerators +------------------- + +MIPS ``Trap-and-Emul`` KVM support (since 6.0) +'''''''''''''''''''''''''''''''''''''''''''''' + +The MIPS ``Trap-and-Emul`` KVM host and guest support has been removed +from Linux upstream kernel, declare it deprecated. + +System emulator CPUS +-------------------- + +``Icelake-Client`` CPU Model (since 5.2) +'''''''''''''''''''''''''''''''''''''''' + +``Icelake-Client`` CPU Models are deprecated. Use ``Icelake-Server`` CPU +Models instead. + +MIPS ``I7200`` CPU Model (since 5.2) +'''''''''''''''''''''''''''''''''''' + +The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated +(the ISA has never been upstreamed to a compiler toolchain). Therefore +this CPU is also deprecated. + + +QEMU API (QAPI) events +---------------------- + +``MEM_UNPLUG_ERROR`` (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead. + + +System emulator machines +------------------------ + +Aspeed ``swift-bmc`` machine (since 6.1) +'''''''''''''''''''''''''''''''''''''''' + +This machine is deprecated because we have enough AST2500 based OpenPOWER +machines. It can be easily replaced by the ``witherspoon-bmc`` or the +``romulus-bmc`` machines. + +Backend options +--------------- + +Using non-persistent backing file with pmem=on (since 6.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +This option is used when ``memory-backend-file`` is consumed by emulated NVDIMM +device. However enabling ``memory-backend-file.pmem`` option, when backing file +is (a) not DAX capable or (b) not on a filesystem that support direct mapping +of persistent memory, is not safe and may lead to data loss or corruption in case +of host crash. +Options are: + + - modify VM configuration to set ``pmem=off`` to continue using fake NVDIMM + (without persistence guaranties) with backing file on non DAX storage + - move backing file to NVDIMM storage and keep ``pmem=on`` + (to have NVDIMM with persistence guaranties). + +Device options +-------------- + +Emulated device options +''''''''''''''''''''''' + +``-device virtio-blk,scsi=on|off`` (since 5.0) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature. VIRTIO 1.0 +and later do not support it because the virtio-scsi device was introduced for +full SCSI support. Use virtio-scsi instead when SCSI passthrough is required. + +Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an +alias. + +``-device sga`` (since 6.2) +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``sga`` device loads an option ROM for x86 targets which enables +SeaBIOS to send messages to the serial console. SeaBIOS 1.11.0 onwards +contains native support for this feature and thus use of the option +ROM approach is obsolete. The native SeaBIOS support can be activated +by using ``-machine graphics=off``. + + +Block device options +'''''''''''''''''''' + +``"backing": ""`` (since 2.12) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to prevent QEMU from automatically opening an image's backing +chain, use ``"backing": null`` instead. + +``rbd`` keyvalue pair encoded filenames: ``""`` (since 3.1) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Options for ``rbd`` should be specified according to its runtime options, +like other block drivers. Legacy parsing of keyvalue pair encoded +filenames is useful to open images with the old format for backing files; +These image files should be updated to use the current format. + +Example of legacy encoding:: + + json:{"file.driver":"rbd", "file.filename":"rbd:rbd/name"} + +The above, converted to the current supported format:: + + json:{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"} + +linux-user mode CPUs +-------------------- + +``ppc64abi32`` CPUs (since 5.2) +''''''''''''''''''''''''''''''' + +The ``ppc64abi32`` architecture has a number of issues which regularly +trip up our CI testing and is suspected to be quite broken. For that +reason the maintainers strongly suspect no one actually uses it. + +MIPS ``I7200`` CPU (since 5.2) +'''''''''''''''''''''''''''''' + +The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated +(the ISA has never been upstreamed to a compiler toolchain). Therefore +this CPU is also deprecated. + +Backwards compatibility +----------------------- + +Runnability guarantee of CPU models (since 4.1) +''''''''''''''''''''''''''''''''''''''''''''''' + +Previous versions of QEMU never changed existing CPU models in +ways that introduced additional host software or hardware +requirements to the VM. This allowed management software to +safely change the machine type of an existing VM without +introducing new requirements ("runnability guarantee"). This +prevented CPU models from being updated to include CPU +vulnerability mitigations, leaving guests vulnerable in the +default configuration. + +The CPU model runnability guarantee won't apply anymore to +existing CPU models. Management software that needs runnability +guarantees must resolve the CPU model aliases using the +``alias-of`` field returned by the ``query-cpu-definitions`` QMP +command. + +While those guarantees are kept, the return value of +``query-cpu-definitions`` will have existing CPU model aliases +point to a version that doesn't break runnability guarantees +(specifically, version 1 of those CPU models). In future QEMU +versions, aliases will point to newer CPU model versions +depending on the machine type, so management software must +resolve CPU model aliases before starting a virtual machine. + +Guest Emulator ISAs +------------------- + +nanoMIPS ISA +'''''''''''' + +The ``nanoMIPS`` ISA has never been upstreamed to any compiler toolchain. +As it is hard to generate binaries for it, declare it deprecated. diff --git a/docs/about/index.rst b/docs/about/index.rst new file mode 100644 index 00000000000..5bea653c07c --- /dev/null +++ b/docs/about/index.rst @@ -0,0 +1,28 @@ +---------- +About QEMU +---------- + +QEMU is a generic and open source machine emulator and virtualizer. + +QEMU can be used in several different ways. The most common is for +"system emulation", where it provides a virtual model of an +entire machine (CPU, memory and emulated devices) to run a guest OS. +In this mode the CPU may be fully emulated, or it may work with +a hypervisor such as KVM, Xen, Hax or Hypervisor.Framework to +allow the guest to run directly on the host CPU. + +The second supported way to use QEMU is "user mode emulation", +where QEMU can launch processes compiled for one CPU on another CPU. +In this mode the CPU is always emulated. + +QEMU also provides a number of standalone commandline utilities, +such as the ``qemu-img`` disk image utility that allows you to create, +convert and modify disk images. + +.. toctree:: + :maxdepth: 2 + + build-platforms + deprecated + removed-features + license diff --git a/docs/system/license.rst b/docs/about/license.rst similarity index 100% rename from docs/system/license.rst rename to docs/about/license.rst diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst new file mode 100644 index 00000000000..d42c3341dee --- /dev/null +++ b/docs/about/removed-features.rst @@ -0,0 +1,705 @@ + +Removed features +================ + +What follows is a record of recently removed, formerly deprecated +features that serves as a record for users who have encountered +trouble after a recent upgrade. + +System emulator command line arguments +-------------------------------------- + +``-hdachs`` (removed in 2.12) +''''''''''''''''''''''''''''' + +The geometry defined by ``-hdachs c,h,s,t`` should now be specified via +``-device ide-hd,drive=dr,cyls=c,heads=h,secs=s,bios-chs-trans=t`` +(together with ``-drive if=none,id=dr,...``). + +``-net channel`` (removed in 2.12) +'''''''''''''''''''''''''''''''''' + +This option has been replaced by ``-net user,guestfwd=...``. + +``-net dump`` (removed in 2.12) +''''''''''''''''''''''''''''''' + +``-net dump[,vlan=n][,file=filename][,len=maxlen]`` has been replaced by +``-object filter-dump,id=id,netdev=dev[,file=filename][,maxlen=maxlen]``. +Note that the new syntax works with netdev IDs instead of the old "vlan" hubs. + +``-no-kvm-pit`` (removed in 2.12) +''''''''''''''''''''''''''''''''' + +This was just a dummy option that has been ignored, since the in-kernel PIT +cannot be disabled separately from the irqchip anymore. A similar effect +(which also disables the KVM IOAPIC) can be obtained with +``-M kernel_irqchip=split``. + +``-tdf`` (removed in 2.12) +'''''''''''''''''''''''''' + +There is no replacement, the ``-tdf`` option has just been ignored since the +behaviour that could be changed by this option in qemu-kvm is now the default +when using the KVM PIT. It still can be requested explicitly using +``-global kvm-pit.lost_tick_policy=delay``. + +``-drive secs=s``, ``-drive heads=h`` & ``-drive cyls=c`` (removed in 3.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The drive geometry should now be specified via +``-device ...,drive=dr,cyls=c,heads=h,secs=s`` (together with +``-drive if=none,id=dr,...``). + +``-drive serial=``, ``-drive trans=`` & ``-drive addr=`` (removed in 3.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``-device ...,drive=dr,serial=r,bios-chs-trans=t,addr=a`` instead +(together with ``-drive if=none,id=dr,...``). + +``-net ...,vlan=x`` (removed in 3.0) +'''''''''''''''''''''''''''''''''''' + +The term "vlan" was very confusing for most users in this context (it's about +specifying a hub ID, not about IEEE 802.1Q or something similar), so this +has been removed. To connect one NIC frontend with a network backend, either +use ``-nic ...`` (e.g. for on-board NICs) or use ``-netdev ...,id=n`` together +with ``-device ...,netdev=n`` (for full control over pluggable NICs). To +connect multiple NICs or network backends via a hub device (which is what +vlan did), use ``-nic hubport,hubid=x,...`` or +``-netdev hubport,id=n,hubid=x,...`` (with ``-device ...,netdev=n``) instead. + +``-no-kvm-irqchip`` (removed in 3.0) +'''''''''''''''''''''''''''''''''''' + +Use ``-machine kernel_irqchip=off`` instead. + +``-no-kvm-pit-reinjection`` (removed in 3.0) +'''''''''''''''''''''''''''''''''''''''''''' + +Use ``-global kvm-pit.lost_tick_policy=discard`` instead. + +``-balloon`` (removed in 3.1) +''''''''''''''''''''''''''''' + +The ``-balloon virtio`` option has been replaced by ``-device virtio-balloon``. +The ``-balloon none`` option was a no-op and has no replacement. + +``-bootp`` (removed in 3.1) +''''''''''''''''''''''''''' + +The ``-bootp /some/file`` argument is replaced by either +``-netdev user,id=x,bootp=/some/file`` (for pluggable NICs, accompanied with +``-device ...,netdev=x``), or ``-nic user,bootp=/some/file`` (for on-board NICs). +The new syntax allows different settings to be provided per NIC. + +``-redir`` (removed in 3.1) +''''''''''''''''''''''''''' + +The ``-redir [tcp|udp]:hostport:[guestaddr]:guestport`` option is replaced +by either ``-netdev +user,id=x,hostfwd=[tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport`` +(for pluggable NICs, accompanied with ``-device ...,netdev=x``) or by the option +``-nic user,hostfwd=[tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport`` +(for on-board NICs). The new syntax allows different settings to be provided +per NIC. + +``-smb`` (removed in 3.1) +''''''''''''''''''''''''' + +The ``-smb /some/dir`` argument is replaced by either +``-netdev user,id=x,smb=/some/dir`` (for pluggable NICs, accompanied with +``-device ...,netdev=x``), or ``-nic user,smb=/some/dir`` (for on-board NICs). +The new syntax allows different settings to be provided per NIC. + +``-tftp`` (removed in 3.1) +'''''''''''''''''''''''''' + +The ``-tftp /some/dir`` argument is replaced by either +``-netdev user,id=x,tftp=/some/dir`` (for pluggable NICs, accompanied with +``-device ...,netdev=x``), or ``-nic user,tftp=/some/dir`` (for embedded NICs). +The new syntax allows different settings to be provided per NIC. + +``-localtime`` (removed in 3.1) +''''''''''''''''''''''''''''''' + +Replaced by ``-rtc base=localtime``. + +``-nodefconfig`` (removed in 3.1) +''''''''''''''''''''''''''''''''' + +Use ``-no-user-config`` instead. + +``-rtc-td-hack`` (removed in 3.1) +''''''''''''''''''''''''''''''''' + +Use ``-rtc driftfix=slew`` instead. + +``-startdate`` (removed in 3.1) +''''''''''''''''''''''''''''''' + +Replaced by ``-rtc base=date``. + +``-vnc ...,tls=...``, ``-vnc ...,x509=...`` & ``-vnc ...,x509verify=...`` (removed in 3.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The "tls-creds" option should be used instead to point to a "tls-creds-x509" +object created using "-object". + +``-mem-path`` fallback to RAM (removed in 5.0) +'''''''''''''''''''''''''''''''''''''''''''''' + +If guest RAM allocation from file pointed by ``mem-path`` failed, +QEMU was falling back to allocating from RAM, which might have resulted +in unpredictable behavior since the backing file specified by the user +as ignored. Currently, users are responsible for making sure the backing storage +specified with ``-mem-path`` can actually provide the guest RAM configured with +``-m`` and QEMU fails to start up if RAM allocation is unsuccessful. + +``-net ...,name=...`` (removed in 5.1) +'''''''''''''''''''''''''''''''''''''' + +The ``name`` parameter of the ``-net`` option was a synonym +for the ``id`` parameter, which should now be used instead. + +``-numa node,mem=...`` (removed in 5.1) +''''''''''''''''''''''''''''''''''''''' + +The parameter ``mem`` of ``-numa node`` was used to assign a part of guest RAM +to a NUMA node. But when using it, it's impossible to manage a specified RAM +chunk on the host side (like bind it to a host node, setting bind policy, ...), +so the guest ends up with the fake NUMA configuration with suboptiomal +performance. +However since 2014 there is an alternative way to assign RAM to a NUMA node +using parameter ``memdev``, which does the same as ``mem`` and adds +means to actually manage node RAM on the host side. Use parameter ``memdev`` +with *memory-backend-ram* backend as replacement for parameter ``mem`` +to achieve the same fake NUMA effect or a properly configured +*memory-backend-file* backend to actually benefit from NUMA configuration. +New machine versions (since 5.1) will not accept the option but it will still +work with old machine types. User can check the QAPI schema to see if the legacy +option is supported by looking at MachineInfo::numa-mem-supported property. + +``-numa`` node (without memory specified) (removed in 5.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Splitting RAM by default between NUMA nodes had the same issues as ``mem`` +parameter with the difference that the role of the user plays QEMU using +implicit generic or board specific splitting rule. +Use ``memdev`` with *memory-backend-ram* backend or ``mem`` (if +it's supported by used machine type) to define mapping explicitly instead. +Users of existing VMs, wishing to preserve the same RAM distribution, should +configure it explicitly using ``-numa node,memdev`` options. Current RAM +distribution can be retrieved using HMP command ``info numa`` and if separate +memory devices (pc|nv-dimm) are present use ``info memory-device`` and subtract +device memory from output of ``info numa``. + +``-smp`` (invalid topologies) (removed in 5.2) +'''''''''''''''''''''''''''''''''''''''''''''' + +CPU topology properties should describe whole machine topology including +possible CPUs. + +However, historically it was possible to start QEMU with an incorrect topology +where *n* <= *sockets* * *cores* * *threads* < *maxcpus*, +which could lead to an incorrect topology enumeration by the guest. +Support for invalid topologies is removed, the user must ensure +topologies described with -smp include all possible cpus, i.e. +*sockets* * *cores* * *threads* = *maxcpus*. + +``-machine enforce-config-section=on|off`` (removed in 5.2) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``enforce-config-section`` property was replaced by the +``-global migration.send-configuration={on|off}`` option. + +``-no-kvm`` (removed in 5.2) +'''''''''''''''''''''''''''' + +The ``-no-kvm`` argument was a synonym for setting ``-machine accel=tcg``. + +``-realtime`` (removed in 6.0) +'''''''''''''''''''''''''''''' + +The ``-realtime mlock=on|off`` argument has been replaced by the +``-overcommit mem-lock=on|off`` argument. + +``-show-cursor`` option (removed in 6.0) +'''''''''''''''''''''''''''''''''''''''' + +Use ``-display sdl,show-cursor=on``, ``-display gtk,show-cursor=on`` +or ``-display default,show-cursor=on`` instead. + +``-tb-size`` option (removed in 6.0) +'''''''''''''''''''''''''''''''''''' + +QEMU 5.0 introduced an alternative syntax to specify the size of the translation +block cache, ``-accel tcg,tb-size=``. + +``-usbdevice audio`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''' + +This option lacked the possibility to specify an audio backend device. +Use ``-device usb-audio`` now instead (and specify a corresponding USB +host controller or ``-usb`` if necessary). + +``-vnc acl`` (removed in 6.0) +''''''''''''''''''''''''''''' + +The ``acl`` option to the ``-vnc`` argument has been replaced +by the ``tls-authz`` and ``sasl-authz`` options. + +``-mon ...,control=readline,pretty=on|off`` (removed in 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``pretty=on|off`` switch has no effect for HMP monitors and +its use is rejected. + +``-drive file=json:{...{'driver':'file'}}`` (removed in 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The 'file' driver for drives is no longer appropriate for character or host +devices and will only accept regular files (S_IFREG). The correct driver +for these file types is 'host_cdrom' or 'host_device' as appropriate. + +Floppy controllers' drive properties (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``-device floppy,...`` instead. When configuring onboard floppy +controllers +:: + + -global isa-fdc.driveA=... + -global sysbus-fdc.driveA=... + -global SUNW,fdtwo.drive=... + +become +:: + + -device floppy,unit=0,drive=... + +and +:: + + -global isa-fdc.driveB=... + -global sysbus-fdc.driveB=... + +become +:: + + -device floppy,unit=1,drive=... + +When plugging in a floppy controller +:: + + -device isa-fdc,...,driveA=... + +becomes +:: + + -device isa-fdc,... + -device floppy,unit=0,drive=... + +and +:: + + -device isa-fdc,...,driveB=... + +becomes +:: + + -device isa-fdc,... + -device floppy,unit=1,drive=... + +``-drive`` with bogus interface type (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Drives with interface types other than ``if=none`` are for onboard +devices. Drives the board doesn't pick up can no longer be used with +-device. Use ``if=none`` instead. + +``-usbdevice ccid`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''' + +This option was undocumented and not used in the field. +Use ``-device usb-ccid`` instead. + +RISC-V firmware not booted by default (removed in 5.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +QEMU 5.1 changes the default behaviour from ``-bios none`` to ``-bios default`` +for the RISC-V ``virt`` machine and ``sifive_u`` machine. + +QEMU Machine Protocol (QMP) commands +------------------------------------ + +``block-dirty-bitmap-add`` "autoload" parameter (removed in 4.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The "autoload" parameter has been ignored since 2.12.0. All bitmaps +are automatically loaded from qcow2 images. + +``cpu-add`` (removed in 5.2) +'''''''''''''''''''''''''''' + +Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``. See +documentation of ``query-hotpluggable-cpus`` for additional details. + +``change`` (removed in 6.0) +''''''''''''''''''''''''''' + +Use ``blockdev-change-medium`` or ``change-vnc-password`` instead. + +``query-events`` (removed in 6.0) +''''''''''''''''''''''''''''''''' + +The ``query-events`` command has been superseded by the more powerful +and accurate ``query-qmp-schema`` command. + +``migrate_set_cache_size`` and ``query-migrate-cache-size`` (removed in 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``migrate_set_parameter`` and ``info migrate_parameters`` instead. + +``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``migrate_set_parameter`` instead. + +``query-cpus`` (removed in 6.0) +''''''''''''''''''''''''''''''' + +The ``query-cpus`` command is replaced by the ``query-cpus-fast`` command. + +``query-cpus-fast`` ``arch`` output member (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``arch`` output member of the ``query-cpus-fast`` command is +replaced by the ``target`` output member. + +chardev client socket with ``wait`` option (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Character devices creating sockets in client mode should not specify +the 'wait' field, which is only applicable to sockets in server mode + +``query-named-block-nodes`` result ``encryption_key_missing`` (removed in 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Removed with no replacement. + +``query-block`` result ``inserted.encryption_key_missing`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Removed with no replacement. + +``query-named-block-nodes`` and ``query-block`` result dirty-bitmaps[i].status (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``status`` field of the ``BlockDirtyInfo`` structure, returned by +these commands is removed. Two new boolean fields, ``recording`` and +``busy`` effectively replace it. + +``query-block`` result field ``dirty-bitmaps`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``dirty-bitmaps`` field of the ``BlockInfo`` structure, returned by +the query-block command is itself now removed. The ``dirty-bitmaps`` +field of the ``BlockDeviceInfo`` struct should be used instead, which is the +type of the ``inserted`` field in query-block replies, as well as the +type of array items in query-named-block-nodes. + +``object-add`` option ``props`` (removed in 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''' + +Specify the properties for the object as top-level arguments instead. + +Human Monitor Protocol (HMP) commands +------------------------------------- + +``usb_add`` and ``usb_remove`` (removed in 2.12) +'''''''''''''''''''''''''''''''''''''''''''''''' + +Replaced by ``device_add`` and ``device_del`` (use ``device_add help`` for a +list of available devices). + +``host_net_add`` and ``host_net_remove`` (removed in 2.12) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Replaced by ``netdev_add`` and ``netdev_del``. + +The ``hub_id`` parameter of ``hostfwd_add`` / ``hostfwd_remove`` (removed in 5.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``[hub_id name]`` parameter tuple of the 'hostfwd_add' and +'hostfwd_remove' HMP commands has been replaced by ``netdev_id``. + +``cpu-add`` (removed in 5.2) +'''''''''''''''''''''''''''' + +Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``. See +documentation of ``query-hotpluggable-cpus`` for additional details. + +``change vnc TARGET`` (removed in 6.0) +'''''''''''''''''''''''''''''''''''''' + +No replacement. The ``change vnc password`` and ``change DEVICE MEDIUM`` +commands are not affected. + +``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, ``acl_remove`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, and +``acl_remove`` commands were removed with no replacement. Authorization +for VNC should be performed using the pluggable QAuthZ objects. + +``migrate-set-cache-size`` and ``info migrate-cache-size`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``migrate-set-parameters`` and ``info migrate-parameters`` instead. + +``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use ``migrate-set-parameters`` instead. + +``info cpustats`` (removed in 6.1) +'''''''''''''''''''''''''''''''''' + +This command didn't produce any output already. Removed with no replacement. + +Guest Emulator ISAs +------------------- + +RISC-V ISA privilege specification version 1.09.1 (removed in 5.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The RISC-V ISA privilege specification version 1.09.1 has been removed. +QEMU supports both the newer version 1.10.0 and the ratified version 1.11.0, these +should be used instead of the 1.09.1 version. + +System emulator CPUS +-------------------- + +KVM guest support on 32-bit Arm hosts (removed in 5.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The Linux kernel has dropped support for allowing 32-bit Arm systems +to host KVM guests as of the 5.7 kernel. Accordingly, QEMU is deprecating +its support for this configuration and will remove it in a future version. +Running 32-bit guests on a 64-bit Arm host remains supported. + +RISC-V ISA Specific CPUs (removed in 5.1) +''''''''''''''''''''''''''''''''''''''''' + +The RISC-V cpus with the ISA version in the CPU name have been removed. The +four CPUs are: ``rv32gcsu-v1.9.1``, ``rv32gcsu-v1.10.0``, ``rv64gcsu-v1.9.1`` and +``rv64gcsu-v1.10.0``. Instead the version can be specified via the CPU ``priv_spec`` +option when using the ``rv32`` or ``rv64`` CPUs. + +RISC-V no MMU CPUs (removed in 5.1) +''''''''''''''''''''''''''''''''''' + +The RISC-V no MMU cpus have been removed. The two CPUs: ``rv32imacu-nommu`` and +``rv64imacu-nommu`` can no longer be used. Instead the MMU status can be specified +via the CPU ``mmu`` option when using the ``rv32`` or ``rv64`` CPUs. + +``compat`` property of server class POWER CPUs (removed in 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``max-cpu-compat`` property of the ``pseries`` machine type should be used +instead. + +``moxie`` CPU (removed in 6.1) +'''''''''''''''''''''''''''''' + +Nobody was using this CPU emulation in QEMU, and there were no test images +available to make sure that the code is still working, so it has been removed +without replacement. + +``lm32`` CPUs (removed in 6.1) +'''''''''''''''''''''''''''''' + +The only public user of this architecture was the milkymist project, +which has been dead for years; there was never an upstream Linux +port. Removed without replacement. + +``unicore32`` CPUs (removed in 6.1) +''''''''''''''''''''''''''''''''''' + +Support for this CPU was removed from the upstream Linux kernel, and +there is no available upstream toolchain to build binaries for it. +Removed without replacement. + +System emulator machines +------------------------ + +``s390-virtio`` (removed in 2.6) +'''''''''''''''''''''''''''''''' + +Use the ``s390-ccw-virtio`` machine instead. + +The m68k ``dummy`` machine (removed in 2.9) +''''''''''''''''''''''''''''''''''''''''''' + +Use the ``none`` machine with the ``loader`` device instead. + +``xlnx-ep108`` (removed in 3.0) +''''''''''''''''''''''''''''''' + +The EP108 was an early access development board that is no longer used. +Use the ``xlnx-zcu102`` machine instead. + +``spike_v1.9.1`` and ``spike_v1.10`` (removed in 5.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The version specific Spike machines have been removed in favour of the +generic ``spike`` machine. If you need to specify an older version of the RISC-V +spec you can use the ``-cpu rv64gcsu,priv_spec=v1.10.0`` command line argument. + +mips ``r4k`` platform (removed in 5.2) +'''''''''''''''''''''''''''''''''''''' + +This machine type was very old and unmaintained. Users should use the ``malta`` +machine type instead. + +mips ``fulong2e`` machine alias (removed in 6.0) +'''''''''''''''''''''''''''''''''''''''''''''''' + +This machine has been renamed ``fuloong2e``. + +``pc-0.10`` up to ``pc-1.3`` (removed in 4.0 up to 6.0) +''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +These machine types were very old and likely could not be used for live +migration from old QEMU versions anymore. Use a newer machine type instead. + +Raspberry Pi ``raspi2`` and ``raspi3`` machines (removed in 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The Raspberry Pi machines come in various models (A, A+, B, B+). To be able +to distinguish which model QEMU is implementing, the ``raspi2`` and ``raspi3`` +machines have been renamed ``raspi2b`` and ``raspi3b``. + + +linux-user mode CPUs +-------------------- + +``tilegx`` CPUs (removed in 6.0) +'''''''''''''''''''''''''''''''' + +The ``tilegx`` guest CPU support has been removed without replacement. It was +only implemented in linux-user mode, but support for this CPU was removed from +the upstream Linux kernel in 2018, and it has also been dropped from glibc, so +there is no new Linux development taking place with this architecture. For +running the old binaries, you can use older versions of QEMU. + +System emulator devices +----------------------- + +``spapr-pci-vfio-host-bridge`` (removed in 2.12) +''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``spapr-pci-vfio-host-bridge`` device type has been replaced by the +``spapr-pci-host-bridge`` device type. + +``ivshmem`` (removed in 4.0) +'''''''''''''''''''''''''''' + +Replaced by either the ``ivshmem-plain`` or ``ivshmem-doorbell``. + +``ide-drive`` (removed in 6.0) +'''''''''''''''''''''''''''''' + +The 'ide-drive' device has been removed. Users should use 'ide-hd' or +'ide-cd' as appropriate to get an IDE hard disk or CD-ROM as needed. + +``scsi-disk`` (removed in 6.0) +'''''''''''''''''''''''''''''' + +The 'scsi-disk' device has been removed. Users should use 'scsi-hd' or +'scsi-cd' as appropriate to get a SCSI hard disk or CD-ROM as needed. + +Related binaries +---------------- + +``qemu-nbd --partition`` (removed in 5.0) +''''''''''''''''''''''''''''''''''''''''' + +The ``qemu-nbd --partition $digit`` code (also spelled ``-P``) +could only handle MBR partitions, and never correctly handled logical +partitions beyond partition 5. Exporting a partition can still be +done by utilizing the ``--image-opts`` option with a raw blockdev +using the ``offset`` and ``size`` parameters layered on top of +any other existing blockdev. For example, if partition 1 is 100MiB +long starting at 1MiB, the old command:: + + qemu-nbd -t -P 1 -f qcow2 file.qcow2 + +can be rewritten as:: + + qemu-nbd -t --image-opts driver=raw,offset=1M,size=100M,file.driver=qcow2,file.file.driver=file,file.file.filename=file.qcow2 + +``qemu-img convert -n -o`` (removed in 5.1) +''''''''''''''''''''''''''''''''''''''''''' + +All options specified in ``-o`` are image creation options, so +they are now rejected when used with ``-n`` to skip image creation. + + +``qemu-img create -b bad file $size`` (removed in 5.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +When creating an image with a backing file that could not be opened, +``qemu-img create`` used to issue a warning about the failure but +proceed with the image creation if an explicit size was provided. +However, as the ``-u`` option exists for this purpose, it is safer to +enforce that any failure to open the backing image (including if the +backing file is missing or an incorrect format was specified) is an +error when ``-u`` is not used. + +``qemu-img amend`` to adjust backing file (removed in 6.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The use of ``qemu-img amend`` to modify the name or format of a qcow2 +backing image was never fully documented or tested, and interferes +with other amend operations that need access to the original backing +image (such as deciding whether a v3 zero cluster may be left +unallocated when converting to a v2 image). Any changes to the +backing chain should be performed with ``qemu-img rebase -u`` either +before or after the remaining changes being performed by amend, as +appropriate. + +``qemu-img`` backing file without format (removed in 6.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The use of ``qemu-img create``, ``qemu-img rebase``, or ``qemu-img +convert`` to create or modify an image that depends on a backing file +now requires that an explicit backing format be provided. This is +for safety: if QEMU probes a different format than what you thought, +the data presented to the guest will be corrupt; similarly, presenting +a raw image to a guest allows a potential security exploit if a future +probe sees a non-raw image based on guest writes. + +To avoid creating unsafe backing chains, you must pass ``-o +backing_fmt=`` (or the shorthand ``-F`` during create) to specify the +intended backing format. You may use ``qemu-img rebase -u`` to +retroactively add a backing format to an existing image. However, be +aware that there are already potential security risks to blindly using +``qemu-img info`` to probe the format of an untrusted backing image, +when deciding what format to add into an existing image. + +Block devices +------------- + +VXHS backend (removed in 5.1) +''''''''''''''''''''''''''''' + +The VXHS code did not compile since v2.12.0. It was removed in 5.1. + +``sheepdog`` driver (removed in 6.0) +'''''''''''''''''''''''''''''''''''' + +The corresponding upstream server project is no longer maintained. +Users are recommended to switch to an alternative distributed block +device driver such as RBD. diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt index 145896aec78..ffca382b5f5 100644 --- a/docs/amd-memory-encryption.txt +++ b/docs/amd-memory-encryption.txt @@ -1,38 +1,48 @@ Secure Encrypted Virtualization (SEV) is a feature found on AMD processors. SEV is an extension to the AMD-V architecture which supports running encrypted -virtual machine (VMs) under the control of KVM. Encrypted VMs have their pages +virtual machines (VMs) under the control of KVM. Encrypted VMs have their pages (code and data) secured such that only the guest itself has access to the unencrypted version. Each encrypted VM is associated with a unique encryption -key; if its data is accessed to a different entity using a different key the +key; if its data is accessed by a different entity using a different key the encrypted guests data will be incorrectly decrypted, leading to unintelligible data. -The key management of this feature is handled by separate processor known as -AMD secure processor (AMD-SP) which is present in AMD SOCs. Firmware running -inside the AMD-SP provide commands to support common VM lifecycle. This +Key management for this feature is handled by a separate processor known as the +AMD secure processor (AMD-SP), which is present in AMD SOCs. Firmware running +inside the AMD-SP provides commands to support a common VM lifecycle. This includes commands for launching, snapshotting, migrating and debugging the -encrypted guest. Those SEV command can be issued via KVM_MEMORY_ENCRYPT_OP +encrypted guest. These SEV commands can be issued via KVM_MEMORY_ENCRYPT_OP ioctls. +Secure Encrypted Virtualization - Encrypted State (SEV-ES) builds on the SEV +support to additionally protect the guest register state. In order to allow a +hypervisor to perform functions on behalf of a guest, there is architectural +support for notifying a guest's operating system when certain types of VMEXITs +are about to occur. This allows the guest to selectively share information with +the hypervisor to satisfy the requested function. + Launching --------- -Boot images (such as bios) must be encrypted before guest can be booted. -MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images :LAUNCH_START, +Boot images (such as bios) must be encrypted before a guest can be booted. The +MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images: LAUNCH_START, LAUNCH_UPDATE_DATA, LAUNCH_MEASURE and LAUNCH_FINISH. These four commands together generate a fresh memory encryption key for the VM, encrypt the boot -images and provide a measurement than can be used as an attestation of the +images and provide a measurement than can be used as an attestation of a successful launch. +For a SEV-ES guest, the LAUNCH_UPDATE_VMSA command is also used to encrypt the +guest register state, or VM save area (VMSA), for all of the guest vCPUs. + LAUNCH_START is called first to create a cryptographic launch context within -the firmware. To create this context, guest owner must provides guest policy, +the firmware. To create this context, guest owner must provide a guest policy, its public Diffie-Hellman key (PDH) and session parameters. These inputs -should be treated as binary blob and must be passed as-is to the SEV firmware. +should be treated as a binary blob and must be passed as-is to the SEV firmware. -The guest policy is passed as plaintext and hypervisor may able to read it +The guest policy is passed as plaintext. A hypervisor may choose to read it, but should not modify it (any modification of the policy bits will result in bad measurement). The guest policy is a 4-byte data structure containing -several flags that restricts what can be done on running SEV guest. +several flags that restricts what can be done on a running SEV guest. See KM Spec section 3 and 6.2 for more details. The guest policy can be provided via the 'policy' property (see below) @@ -40,31 +50,42 @@ The guest policy can be provided via the 'policy' property (see below) # ${QEMU} \ sev-guest,id=sev0,policy=0x1...\ -Guest owners provided DH certificate and session parameters will be used to +Setting the "SEV-ES required" policy bit (bit 2) will launch the guest as a +SEV-ES guest (see below) + +# ${QEMU} \ + sev-guest,id=sev0,policy=0x5...\ + +The guest owner provided DH certificate and session parameters will be used to establish a cryptographic session with the guest owner to negotiate keys used for the attestation. -The DH certificate and session blob can be provided via 'dh-cert-file' and -'session-file' property (see below +The DH certificate and session blob can be provided via the 'dh-cert-file' and +'session-file' properties (see below) # ${QEMU} \ sev-guest,id=sev0,dh-cert-file=,session-file= LAUNCH_UPDATE_DATA encrypts the memory region using the cryptographic context -created via LAUNCH_START command. If required, this command can be called +created via the LAUNCH_START command. If required, this command can be called multiple times to encrypt different memory regions. The command also calculates the measurement of the memory contents as it encrypts. -LAUNCH_MEASURE command can be used to retrieve the measurement of encrypted -memory. This measurement is a signature of the memory contents that can be -sent to the guest owner as an attestation that the memory was encrypted +LAUNCH_UPDATE_VMSA encrypts all the vCPU VMSAs for a SEV-ES guest using the +cryptographic context created via the LAUNCH_START command. The command also +calculates the measurement of the VMSAs as it encrypts them. + +LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory and, +for a SEV-ES guest, encrypted VMSAs. This measurement is a signature of the +memory contents and, for a SEV-ES guest, the VMSA contents, that can be sent +to the guest owner as an attestation that the memory and VMSAs were encrypted correctly by the firmware. The guest owner may wait to provide the guest confidential information until it can verify the attestation measurement. Since the guest owner knows the initial contents of the guest at boot, the attestation measurement can be verified by comparing it to what the guest owner expects. -LAUNCH_FINISH command finalizes the guest launch and destroy's the cryptographic +LAUNCH_FINISH finalizes the guest launch and destroys the cryptographic context. See SEV KM API Spec [1] 'Launching a guest' usage flow (Appendix A) for the @@ -76,12 +97,28 @@ To launch a SEV guest -machine ...,confidential-guest-support=sev0 \ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 +To launch a SEV-ES guest + +# ${QEMU} \ + -machine ...,confidential-guest-support=sev0 \ + -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1,policy=0x5 + +An SEV-ES guest has some restrictions as compared to a SEV guest. Because the +guest register state is encrypted and cannot be updated by the VMM/hypervisor, +a SEV-ES guest: + - Does not support SMM - SMM support requires updating the guest register + state. + - Does not support reboot - a system reset requires updating the guest register + state. + - Requires in-kernel irqchip - the burden is placed on the hypervisor to + manage booting APs. + Debugging ----------- -Since memory contents of SEV guest is encrypted hence hypervisor access to the -guest memory will get a cipher text. If guest policy allows debugging, then -hypervisor can use DEBUG_DECRYPT and DEBUG_ENCRYPT commands access the guest -memory region for debug purposes. This is not supported in QEMU yet. +Since the memory contents of a SEV guest are encrypted, hypervisor access to +the guest memory will return cipher text. If the guest policy allows debugging, +then a hypervisor can use the DEBUG_DECRYPT and DEBUG_ENCRYPT commands to access +the guest memory region for debug purposes. This is not supported in QEMU yet. Snapshot/Restore ----------------- @@ -102,8 +139,10 @@ Secure Encrypted Virtualization Key Management: KVM Forum slides: http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf +https://www.linux-kvm.org/images/9/94/Extending-Secure-Encrypted-Virtualization-with-SEV-ES-Thomas-Lendacky-AMD.pdf AMD64 Architecture Programmer's Manual: http://support.amd.com/TechDocs/24593.pdf SME is section 7.10 SEV is section 15.34 + SEV-ES is section 15.35 diff --git a/docs/barrier.txt b/docs/barrier.txt deleted file mode 100644 index b21d15015d9..00000000000 --- a/docs/barrier.txt +++ /dev/null @@ -1,370 +0,0 @@ - QEMU Barrier Client - - -* About - - Barrier is a KVM (Keyboard-Video-Mouse) software forked from Symless's - synergy 1.9 codebase. - - See https://github.com/debauchee/barrier - -* QEMU usage - - Generally, mouse and keyboard are grabbed through the QEMU video - interface emulation. - - But when we want to use a video graphic adapter via a PCI passthrough - there is no way to provide the keyboard and mouse inputs to the VM - except by plugging a second set of mouse and keyboard to the host - or by installing a KVM software in the guest OS. - - The QEMU Barrier client avoids this by implementing directly the Barrier - protocol into QEMU. - - This protocol is enabled by adding an input-barrier object to QEMU. - - Syntax: input-barrier,id=,name= - [,server=][,port=] - [,x-origin=][,y-origin=] - [,width=][,height=] - - The object can be added on the QEMU command line, for instance with: - - ... -object input-barrier,id=barrier0,name=VM-1 ... - - where VM-1 is the name the display configured int the Barrier server - on the host providing the mouse and the keyboard events. - - by default is "localhost", port is 24800, - and are set to 0, and to - 1920 and 1080. - - If Barrier server is stopped QEMU needs to be reconnected manually, - by removing and re-adding the input-barrier object, for instance - with the help of the HMP monitor: - - (qemu) object_del barrier0 - (qemu) object_add input-barrier,id=barrier0,name=VM-1 - -* Message format - - Message format between the server and client is in two parts: - - 1- the payload length is a 32bit integer in network endianness, - 2- the payload - - The payload starts with a 4byte string (without NUL) which is the - command. The first command between the server and the client - is the only command not encoded on 4 bytes ("Barrier"). - The remaining part of the payload is decoded according to the command. - -* Protocol Description (from barrier/src/lib/barrier/protocol_types.h) - - - barrierCmdHello "Barrier" - - Direction: server -> client - Parameters: { int16_t minor, int16_t major } - Description: - - Say hello to client - minor = protocol major version number supported by server - major = protocol minor version number supported by server - - - barrierCmdHelloBack "Barrier" - - Direction: client ->server - Parameters: { int16_t minor, int16_t major, char *name} - Description: - - Respond to hello from server - minor = protocol major version number supported by client - major = protocol minor version number supported by client - name = client name - - - barrierCmdDInfo "DINF" - - Direction: client ->server - Parameters: { int16_t x_origin, int16_t y_origin, int16_t width, int16_t height, int16_t x, int16_t y} - Description: - - The client screen must send this message in response to the - barrierCmdQInfo message. It must also send this message when the - screen's resolution changes. In this case, the client screen should - ignore any barrierCmdDMouseMove messages until it receives a - barrierCmdCInfoAck in order to prevent attempts to move the mouse off - the new screen area. - - - barrierCmdCNoop "CNOP" - - Direction: client -> server - Parameters: None - Description: - - No operation - - - barrierCmdCClose "CBYE" - - Direction: server -> client - Parameters: None - Description: - - Close connection - - - barrierCmdCEnter "CINN" - - Direction: server -> client - Parameters: { int16_t x, int16_t y, int32_t seq, int16_t modifier } - Description: - - Enter screen. - x,y = entering screen absolute coordinates - seq = sequence number, which is used to order messages between - screens. the secondary screen must return this number - with some messages - modifier = modifier key mask. this will have bits set for each - toggle modifier key that is activated on entry to the - screen. the secondary screen should adjust its toggle - modifiers to reflect that state. - - - barrierCmdCLeave "COUT" - - Direction: server -> client - Parameters: None - Description: - - Leaving screen. the secondary screen should send clipboard data in - response to this message for those clipboards that it has grabbed - (i.e. has sent a barrierCmdCClipboard for and has not received a - barrierCmdCClipboard for with a greater sequence number) and that - were grabbed or have changed since the last leave. - - - barrierCmdCClipboard "CCLP" - - Direction: server -> client - Parameters: { int8_t id, int32_t seq } - Description: - - Grab clipboard. Sent by screen when some other app on that screen - grabs a clipboard. - id = the clipboard identifier - seq = sequence number. Client must use the sequence number passed in - the most recent barrierCmdCEnter. the server always sends 0. - - - barrierCmdCScreenSaver "CSEC" - - Direction: server -> client - Parameters: { int8_t started } - Description: - - Screensaver change. - started = Screensaver on primary has started (1) or closed (0) - - - barrierCmdCResetOptions "CROP" - - Direction: server -> client - Parameters: None - Description: - - Reset options. Client should reset all of its options to their - defaults. - - - barrierCmdCInfoAck "CIAK" - - Direction: server -> client - Parameters: None - Description: - - Resolution change acknowledgment. Sent by server in response to a - client screen's barrierCmdDInfo. This is sent for every - barrierCmdDInfo, whether or not the server had sent a barrierCmdQInfo. - - - barrierCmdCKeepAlive "CALV" - - Direction: server -> client - Parameters: None - Description: - - Keep connection alive. Sent by the server periodically to verify - that connections are still up and running. clients must reply in - kind on receipt. if the server gets an error sending the message or - does not receive a reply within a reasonable time then the server - disconnects the client. if the client doesn't receive these (or any - message) periodically then it should disconnect from the server. the - appropriate interval is defined by an option. - - - barrierCmdDKeyDown "DKDN" - - Direction: server -> client - Parameters: { int16_t keyid, int16_t modifier [,int16_t button] } - Description: - - Key pressed. - keyid = X11 key id - modified = modified mask - button = X11 Xkb keycode (optional) - - - barrierCmdDKeyRepeat "DKRP" - - Direction: server -> client - Parameters: { int16_t keyid, int16_t modifier, int16_t repeat [,int16_t button] } - Description: - - Key auto-repeat. - keyid = X11 key id - modified = modified mask - repeat = number of repeats - button = X11 Xkb keycode (optional) - - - barrierCmdDKeyUp "DKUP" - - Direction: server -> client - Parameters: { int16_t keyid, int16_t modifier [,int16_t button] } - Description: - - Key released. - keyid = X11 key id - modified = modified mask - button = X11 Xkb keycode (optional) - - - barrierCmdDMouseDown "DMDN" - - Direction: server -> client - Parameters: { int8_t button } - Description: - - Mouse button pressed. - button = button id - - - barrierCmdDMouseUp "DMUP" - - Direction: server -> client - Parameters: { int8_t button } - Description: - - Mouse button release. - button = button id - - - barrierCmdDMouseMove "DMMV" - - Direction: server -> client - Parameters: { int16_t x, int16_t y } - Description: - - Absolute mouse moved. - x,y = absolute screen coordinates - - - barrierCmdDMouseRelMove "DMRM" - - Direction: server -> client - Parameters: { int16_t x, int16_t y } - Description: - - Relative mouse moved. - x,y = r relative screen coordinates - - - barrierCmdDMouseWheel "DMWM" - - Direction: server -> client - Parameters: { int16_t x , int16_t y } or { int16_t y } - Description: - - Mouse scroll. The delta should be +120 for one tick forward (away - from the user) or right and -120 for one tick backward (toward the - user) or left. - x = x delta - y = y delta - - - barrierCmdDClipboard "DCLP" - - Direction: server -> client - Parameters: { int8_t id, int32_t seq, int8_t mark, char *data } - Description: - - Clipboard data. - id = clipboard id - seq = sequence number. The sequence number is 0 when sent by the - server. Client screens should use the/ sequence number from - the most recent barrierCmdCEnter. - - - barrierCmdDSetOptions "DSOP" - - Direction: server -> client - Parameters: { int32 t nb, { int32_t id, int32_t val }[] } - Description: - - Set options. Client should set the given option/value pairs. - nb = numbers of { id, val } entries - id = option id - val = option new value - - - barrierCmdDFileTransfer "DFTR" - - Direction: server -> client - Parameters: { int8_t mark, char *content } - Description: - - Transfer file data. - mark = 0 means the content followed is the file size - 1 means the content followed is the chunk data - 2 means the file transfer is finished - - - barrierCmdDDragInfo "DDRG" int16_t char * - - Direction: server -> client - Parameters: { int16_t nb, char *content } - Description: - - Drag information. - nb = number of dragging objects - content = object's directory - - - barrierCmdQInfo "QINF" - - Direction: server -> client - Parameters: None - Description: - - Query screen info - Client should reply with a barrierCmdDInfo - - - barrierCmdEIncompatible "EICV" - - Direction: server -> client - Parameters: { int16_t nb, major *minor } - Description: - - Incompatible version. - major = major version - minor = minor version - - - barrierCmdEBusy "EBSY" - - Direction: server -> client - Parameters: None - Description: - - Name provided when connecting is already in use. - - - barrierCmdEUnknown "EUNK" - - Direction: server -> client - Parameters: None - Description: - - Unknown client. Name provided when connecting is not in primary's - screen configuration map. - - - barrierCmdEBad "EBAD" - - Direction: server -> client - Parameters: None - Description: - - Protocol violation. Server should disconnect after sending this - message. - -* TO DO - - - Enable SSL - - Manage SetOptions/ResetOptions commands - diff --git a/docs/block-replication.txt b/docs/block-replication.txt index 108e9166a8b..b0f23761c6e 100644 --- a/docs/block-replication.txt +++ b/docs/block-replication.txt @@ -79,7 +79,7 @@ Primary | || Secondary disk <--------- hidden-disk 5 <--------- || | | || | | || '-------------------------' - || drive-backup sync=none 6 + || blockdev-backup sync=none 6 1) The disk on the primary is represented by a block device with two children, providing replication between a primary disk and the host that @@ -101,7 +101,7 @@ should support bdrv_make_empty() and backing file. that is modified by the primary VM. It should also start as an empty disk, and the driver supports bdrv_make_empty() and backing file. -6) The drive-backup job (sync=none) is run to allow hidden-disk to buffer +6) The blockdev-backup job (sync=none) is run to allow hidden-disk to buffer any state that would otherwise be lost by the speculative write-through of the NBD server into the secondary disk. So before block replication, the primary disk and secondary disk should contain the same data. @@ -156,15 +156,15 @@ Primary: children.0.driver=raw Run qmp command in primary qemu: - { 'execute': 'human-monitor-command', - 'arguments': { - 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xxxx,file.port=xxxx,file.export=colo1,node-name=nbd_client1' + { "execute": "human-monitor-command", + "arguments": { + "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xxxx,file.port=xxxx,file.export=colo1,node-name=nbd_client1" } } - { 'execute': 'x-blockdev-change', - 'arguments': { - 'parent': 'colo1', - 'node': 'nbd_client1' + { "execute": "x-blockdev-change", + "arguments": { + "parent": "colo1", + "node": "nbd_client1" } } Note: @@ -189,21 +189,21 @@ Secondary: vote-threshold=1,children.0=childs1 Then run qmp command in secondary qemu: - { 'execute': 'nbd-server-start', - 'arguments': { - 'addr': { - 'type': 'inet', - 'data': { - 'host': 'xxx', - 'port': 'xxx' + { "execute": "nbd-server-start", + "arguments": { + "addr": { + "type": "inet", + "data": { + "host": "xxx", + "port": "xxx" } } } } - { 'execute': 'nbd-server-add', - 'arguments': { - 'device': 'colo1', - 'writable': true + { "execute": "nbd-server-add", + "arguments": { + "device": "colo1", + "writable": true } } @@ -223,22 +223,22 @@ After Failover: Primary: The secondary host is down, so we should run the following qmp command to remove the nbd child from the quorum: - { 'execute': 'x-blockdev-change', - 'arguments': { - 'parent': 'colo1', - 'child': 'children.1' + { "execute": "x-blockdev-change", + "arguments": { + "parent": "colo1", + "child": "children.1" } } - { 'execute': 'human-monitor-command', - 'arguments': { - 'command-line': 'drive_del xxxx' + { "execute": "human-monitor-command", + "arguments": { + "command-line": "drive_del xxxx" } } Note: there is no qmp command to remove the blockdev now Secondary: The primary host is down, so we should do the following thing: - { 'execute': 'nbd-server-stop' } + { "execute": "nbd-server-stop" } Promote Secondary to Primary: see COLO-FT.txt diff --git a/docs/bootindex.txt b/docs/bootindex.txt deleted file mode 100644 index 6937862ba0d..00000000000 --- a/docs/bootindex.txt +++ /dev/null @@ -1,52 +0,0 @@ -= Bootindex property = - -Block and net devices have bootindex property. This property is used to -determine the order in which firmware will consider devices for booting -the guest OS. If the bootindex property is not set for a device, it gets -lowest boot priority. There is no particular order in which devices with -unset bootindex property will be considered for booting, but they will -still be bootable. - -== Example == - -Let's assume we have a QEMU machine with two NICs (virtio, e1000) and two -disks (IDE, virtio): - -qemu -drive file=disk1.img,if=none,id=disk1 - -device ide-hd,drive=disk1,bootindex=4 - -drive file=disk2.img,if=none,id=disk2 - -device virtio-blk-pci,drive=disk2,bootindex=3 - -netdev type=user,id=net0 -device virtio-net-pci,netdev=net0,bootindex=2 - -netdev type=user,id=net1 -device e1000,netdev=net1,bootindex=1 - -Given the command above, firmware should try to boot from the e1000 NIC -first. If this fails, it should try the virtio NIC next; if this fails -too, it should try the virtio disk, and then the IDE disk. - -== Limitations == - -1. Some firmware has limitations on which devices can be considered for -booting. For instance, the PC BIOS boot specification allows only one -disk to be bootable. If boot from disk fails for some reason, the BIOS -won't retry booting from other disk. It can still try to boot from -floppy or net, though. - -2. Sometimes, firmware cannot map the device path QEMU wants firmware to -boot from to a boot method. It doesn't happen for devices the firmware -can natively boot from, but if firmware relies on an option ROM for -booting, and the same option ROM is used for booting from more then one -device, the firmware may not be able to ask the option ROM to boot from -a particular device reliably. For instance with the PC BIOS, if a SCSI HBA -has three bootable devices target1, target3, target5 connected to it, -the option ROM will have a boot method for each of them, but it is not -possible to map from boot method back to a specific target. This is a -shortcoming of the PC BIOS boot specification. - -== Mixing bootindex and boot order parameters == - -Note that it does not make sense to use the bootindex property together -with the "-boot order=..." (or "-boot once=...") parameter. The guest -firmware implementations normally either support the one or the other, -but not both parameters at the same time. Mixing them will result in -undefined behavior, and thus the guest firmware will likely not boot -from the expected devices. diff --git a/docs/bypass-iommu.txt b/docs/bypass-iommu.txt new file mode 100644 index 00000000000..e6677bddd32 --- /dev/null +++ b/docs/bypass-iommu.txt @@ -0,0 +1,89 @@ +BYPASS IOMMU PROPERTY +===================== + +Description +=========== +Traditionally, there is a global switch to enable/disable vIOMMU. All +devices in the system can only support go through vIOMMU or not, which +is not flexible. We introduce this bypass iommu property to support +coexist of devices go through vIOMMU and devices not. This is useful to +passthrough devices with no-iommu mode and devices go through vIOMMU in +the same virtual machine. + +PCI host bridges have a bypass_iommu property. This property is used to +determine whether the devices attached on the PCI host bridge will bypass +virtual iommu. The bypass_iommu property is valid only when there is a +virtual iommu in the system, it is implemented to allow some devices to +bypass vIOMMU. When bypass_iommu property is not set for a host bridge, +the attached devices will go through vIOMMU by default. + +Usage +===== +The bypass iommu feature support PXB host bridge and default main host +bridge, we add a bypass_iommu property for PXB and default_bus_bypass_iommu +for machine. Note that default_bus_bypass_iommu is available only when +the 'q35' machine type on x86 architecture and the 'virt' machine type +on AArch64. Other machine types do not support bypass iommu for default +root bus. + +1. The following is the bypass iommu options: + (1) PCI expander bridge + qemu -device pxb-pcie,bus_nr=0x10,addr=0x1,bypass_iommu=true + (2) Arm default host bridge + qemu -machine virt,iommu=smmuv3,default_bus_bypass_iommu=true + (3) X86 default root bus bypass iommu: + qemu -machine q35,default_bus_bypass_iommu=true + +2. Here is the detailed qemu command line for 'virt' machine with PXB on +AArch64: + +qemu-system-aarch64 \ + -machine virt,kernel_irqchip=on,iommu=smmuv3,default_bus_bypass_iommu=true \ + -device pxb-pcie,bus_nr=0x10,id=pci.10,bus=pcie.0,addr=0x3.0x1 \ + -device pxb-pcie,bus_nr=0x20,id=pci.20,bus=pcie.0,addr=0x3.0x2,bypass_iommu=true \ + +And we got: + - a default host bridge which bypass SMMUv3 + - a pxb host bridge which go through SMMUv3 + - a pxb host bridge which bypass SMMUv3 + +3. Here is the detailed qemu command line for 'q35' machine with PXB on +x86 architecture: + +qemu-system-x86_64 \ + -machine q35,accel=kvm,default_bus_bypass_iommu=true \ + -device pxb-pcie,bus_nr=0x10,id=pci.10,bus=pcie.0,addr=0x3 \ + -device pxb-pcie,bus_nr=0x20,id=pci.20,bus=pcie.0,addr=0x4,bypass_iommu=true \ + -device intel-iommu \ + +And we got: + - a default host bridge which bypass iommu + - a pxb host bridge which go through iommu + - a pxb host bridge which bypass iommu + +Limitations +=========== +There might be potential security risk when devices bypass iommu, because +devices might send malicious dma request to virtual machine if there is no +iommu isolation. So it would be necessary to only bypass iommu for trusted +device. + +Implementation +============== +The bypass iommu feature includes: + - Address space + Add bypass iommu property check of PCI Host and do not get iommu address + space for devices bypass iommu. + - Arm SMMUv3 support + We traverse all PCI root bus and get bus number ranges, then build explicit + RID mapping for devices which do not bypass iommu. + - X86 IOMMU support + To support Intel iommu, we traverse all PCI host bridge and get information + of devices which do not bypass iommu, then fill the DMAR drhd struct with + explicit device scope info. To support AMD iommu, add check of bypass iommu + when traverse the PCI hsot bridge. + - Machine and PXB options + We add bypass iommu options in machine option for default root bus, and add + option for PXB also. Note that the default value of bypass iommu is false, + so that the devices will by default go through iommu if there exist one. + diff --git a/docs/ccid.txt b/docs/ccid.txt index c97fbd2de0a..2b85b1bd42c 100644 --- a/docs/ccid.txt +++ b/docs/ccid.txt @@ -34,15 +34,14 @@ reader and smart card (i.e. not backed by a physical device) using this device. 2. Building -The cryptographic functions and access to the physical card is done via NSS. - -Installing NSS: +The cryptographic functions and access to the physical card is done via the +libcacard library, whose development package must be installed prior to +building QEMU: In redhat/fedora: - yum install nss-devel -In ubuntu/debian: - apt-get install libnss3-dev - (not tested on ubuntu) + yum install libcacard-devel +In ubuntu: + apt-get install libcacard-dev Configuring and building: ./configure --enable-smartcard && make @@ -51,7 +50,7 @@ Configuring and building: 3. Using ccid-card-emulated with hardware Assuming you have a working smartcard on the host with the current -user, using NSS, qemu acts as another NSS client using ccid-card-emulated: +user, using libcacard, QEMU acts as another client using ccid-card-emulated: qemu -usb -device usb-ccid -device ccid-card-emulated diff --git a/docs/conf.py b/docs/conf.py index 2ee61118725..763e7d24344 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,6 +29,7 @@ import os import sys import sphinx +from distutils.version import LooseVersion from sphinx.errors import ConfigError # Make Sphinx fail cleanly if using an old Python, rather than obscurely @@ -73,7 +74,7 @@ extensions = ['kerneldoc', 'qmp_lexer', 'hxtool', 'depfile', 'qapidoc'] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = [os.path.join(qemu_docdir, '_templates')] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: @@ -84,9 +85,14 @@ # The master toctree document. master_doc = 'index' +# Interpret `single-backticks` to be a cross-reference to any kind of +# referenceable object. Unresolvable or ambiguous references will emit a +# warning at build time. +default_role = 'any' + # General information about the project. project = u'QEMU' -copyright = u'2020, The QEMU Project Developers' +copyright = u'2021, The QEMU Project Developers' author = u'The QEMU Project Developers' # The version info for the project you're documenting, acts as replacement for @@ -150,38 +156,52 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +try: + import sphinx_rtd_theme +except ImportError: + raise ConfigError( + 'The Sphinx \'sphinx_rtd_theme\' HTML theme was not found.\n' + ) + +html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -# We initialize this to empty here, so the per-manual conf.py can just -# add individual key/value entries. -html_theme_options = { -} +if LooseVersion(sphinx_rtd_theme.__version__) >= LooseVersion("0.4.3"): + html_theme_options = { + "style_nav_header_background": "#802400", + "navigation_with_keys": True, + } + +html_logo = os.path.join(qemu_docdir, "../ui/icons/qemu_128x128.png") + +html_favicon = os.path.join(qemu_docdir, "../ui/icons/qemu_32x32.png") # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -# QEMU doesn't yet have any static files, so comment this out so we don't -# get a warning about a missing directory. -# If we do ever add this then it would probably be better to call the -# subdirectory sphinx_static, as the Linux kernel does. -# html_static_path = ['_static'] +html_static_path = [os.path.join(qemu_docdir, "sphinx-static")] + +html_css_files = [ + 'theme_overrides.css', +] + +html_js_files = [ + 'custom.js', +] + +html_context = { + "display_gitlab": True, + "gitlab_user": "qemu-project", + "gitlab_repo": "qemu", + "gitlab_version": "master", + "conf_py_path": "/docs/", # Path in the checkout to the docs root +} # Custom sidebar templates, must be a dictionary that maps document names # to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars -html_sidebars = { - '**': [ - 'about.html', - 'editpage.html', - 'navigation.html', - 'searchbox.html', - ] -} +#html_sidebars = {} # Don't copy the rST source files to the HTML output directory, # and don't put links to the sources into the output HTML. @@ -269,6 +289,7 @@ ['Stefan Hajnoczi ', 'Masayoshi Mizuma '], 1), ] +man_make_section_directory = False # -- Options for Texinfo output ------------------------------------------- diff --git a/docs/devel/_templates/editpage.html b/docs/devel/_templates/editpage.html deleted file mode 100644 index a86d22bca86..00000000000 --- a/docs/devel/_templates/editpage.html +++ /dev/null @@ -1,5 +0,0 @@ -
- -
diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst index 7ef36f42d0f..431caba7aa0 100644 --- a/docs/devel/build-system.rst +++ b/docs/devel/build-system.rst @@ -42,73 +42,21 @@ perform a build: ../configure make -For now, checks on the compilation environment are found in configure -rather than meson.build, though this is expected to change. The command -line is parsed in the configure script and, whenever needed, converted -into the appropriate options to Meson. - -New checks should be added to Meson, which usually comprises the -following tasks: - - - Add a Meson build option to meson_options.txt. - - - Add support to the command line arg parser to handle any new - `--enable-XXX`/`--disable-XXX` flags required by the feature. - - - Add information to the help output message to report on the new - feature flag. - - - Add code to perform the actual feature check. - - - Add code to include the feature status in `config-host.h` - - - Add code to print out the feature status in the configure summary - upon completion. - - -Taking the probe for SDL2_Image as an example, we have the following pieces -in configure:: - - # Initial variable state - sdl_image=auto - - ..snip.. - - # Configure flag processing - --disable-sdl-image) sdl_image=disabled - ;; - --enable-sdl-image) sdl_image=enabled - ;; - - ..snip.. - - # Help output feature message - sdl-image SDL Image support for icons - - ..snip.. - - # Meson invocation - -Dsdl_image=$sdl_image - -In meson_options.txt:: - - option('sdl', type : 'feature', value : 'auto', - description: 'SDL Image support for icons') - -In meson.build:: - - # Detect dependency - sdl_image = dependency('SDL2_image', required: get_option('sdl_image'), - method: 'pkg-config', - kwargs: static_kwargs) - - # Create config-host.h (if applicable) - config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found()) - - # Summary - summary_info += {'SDL image support': sdl_image.found()} - - +The configure script automatically recognizes +command line options for which a same-named Meson option exists; +dashes in the command line are replaced with underscores. + +Many checks on the compilation environment are still found in configure +rather than ``meson.build``, but new checks should be added directly to +``meson.build``. + +Patches are also welcome to move existing checks from the configure +phase to ``meson.build``. When doing so, ensure that ``meson.build`` does +not use anymore the keys that you have removed from ``config-host.mak``. +Typically these will be replaced in ``meson.build`` by boolean variables, +``get_option('optname')`` invocations, or ``dep.found()`` expressions. +In general, the remaining checks have little or no interdependencies, +so they can be moved one by one. Helper functions ---------------- @@ -116,51 +64,51 @@ Helper functions The configure script provides a variety of helper functions to assist developers in checking for system features: -`do_cc $ARGS...` +``do_cc $ARGS...`` Attempt to run the system C compiler passing it $ARGS... -`do_cxx $ARGS...` +``do_cxx $ARGS...`` Attempt to run the system C++ compiler passing it $ARGS... -`compile_object $CFLAGS` +``compile_object $CFLAGS`` Attempt to compile a test program with the system C compiler using $CFLAGS. The test program must have been previously written to a file - called $TMPC. The replacement in Meson is the compiler object `cc`, - which has methods such as `cc.compiles()`, - `cc.check_header()`, `cc.has_function()`. + called $TMPC. The replacement in Meson is the compiler object ``cc``, + which has methods such as ``cc.compiles()``, + ``cc.check_header()``, ``cc.has_function()``. -`compile_prog $CFLAGS $LDFLAGS` +``compile_prog $CFLAGS $LDFLAGS`` Attempt to compile a test program with the system C compiler using $CFLAGS and link it with the system linker using $LDFLAGS. The test program must have been previously written to a file called $TMPC. - The replacement in Meson is `cc.find_library()` and `cc.links()`. + The replacement in Meson is ``cc.find_library()`` and ``cc.links()``. -`has $COMMAND` +``has $COMMAND`` Determine if $COMMAND exists in the current environment, either as a shell builtin, or executable binary, returning 0 on success. The - replacement in Meson is `find_program()`. + replacement in Meson is ``find_program()``. -`check_define $NAME` +``check_define $NAME`` Determine if the macro $NAME is defined by the system C compiler -`check_include $NAME` +``check_include $NAME`` Determine if the include $NAME file is available to the system C - compiler. The replacement in Meson is `cc.has_header()`. + compiler. The replacement in Meson is ``cc.has_header()``. -`write_c_skeleton` +``write_c_skeleton`` Write a minimal C program main() function to the temporary file indicated by $TMPC -`feature_not_found $NAME $REMEDY` +``feature_not_found $NAME $REMEDY`` Print a message to stderr that the feature $NAME was not available on the system, suggesting the user try $REMEDY to address the problem. -`error_exit $MESSAGE $MORE...` +``error_exit $MESSAGE $MORE...`` Print $MESSAGE to stderr, followed by $MORE... and then exit from the configure script with non-zero status -`query_pkg_config $ARGS...` +``query_pkg_config $ARGS...`` Run pkg-config passing it $ARGS. If QEMU is doing a static build, then --static will be automatically added to $ARGS @@ -173,11 +121,11 @@ process for: 1) executables, which include: - - Tools - qemu-img, qemu-nbd, qga (guest agent), etc + - Tools - ``qemu-img``, ``qemu-nbd``, ``qga`` (guest agent), etc - - System emulators - qemu-system-$ARCH + - System emulators - ``qemu-system-$ARCH`` - - Userspace emulators - qemu-$ARCH + - Userspace emulators - ``qemu-$ARCH`` - Unit tests @@ -187,7 +135,7 @@ process for: 4) other data files, such as icons or desktop files -All executables are built by default, except for some `contrib/` +All executables are built by default, except for some ``contrib/`` binaries that are known to fail to build on some platforms (for example 32-bit or big-endian platforms). Tests are also built by default, though that might change in the future. @@ -195,14 +143,14 @@ though that might change in the future. The source code is highly modularized, split across many files to facilitate building of all of these components with as little duplicated compilation as possible. Using the Meson "sourceset" functionality, -`meson.build` files group the source files in rules that are +``meson.build`` files group the source files in rules that are enabled according to the available system libraries and to various configuration symbols. Sourcesets belong to one of four groups: Subsystem sourcesets: Various subsystems that are common to both tools and emulators have - their own sourceset, for example `block_ss` for the block device subsystem, - `chardev_ss` for the character device subsystem, etc. These sourcesets + their own sourceset, for example ``block_ss`` for the block device subsystem, + ``chardev_ss`` for the character device subsystem, etc. These sourcesets are then turned into static libraries as follows:: libchardev = static_library('chardev', chardev_ss.sources(), @@ -211,8 +159,8 @@ Subsystem sourcesets: chardev = declare_dependency(link_whole: libchardev) - As of Meson 0.55.1, the special `.fa` suffix should be used for everything - that is used with `link_whole`, to ensure that the link flags are placed + As of Meson 0.55.1, the special ``.fa`` suffix should be used for everything + that is used with ``link_whole``, to ensure that the link flags are placed correctly in the command line. Target-independent emulator sourcesets: @@ -221,38 +169,38 @@ Target-independent emulator sourcesets: This includes error handling infrastructure, standard data structures, platform portability wrapper functions, etc. - Target-independent code lives in the `common_ss`, `softmmu_ss` and - `user_ss` sourcesets. `common_ss` is linked into all emulators, - `softmmu_ss` only in system emulators, `user_ss` only in user-mode + Target-independent code lives in the ``common_ss``, ``softmmu_ss`` and + ``user_ss`` sourcesets. ``common_ss`` is linked into all emulators, + ``softmmu_ss`` only in system emulators, ``user_ss`` only in user-mode emulators. Target-independent sourcesets must exercise particular care when using - `if_false` rules. The `if_false` rule will be used correctly when linking + ``if_false`` rules. The ``if_false`` rule will be used correctly when linking emulator binaries; however, when *compiling* target-independent files - into .o files, Meson may need to pick *both* the `if_true` and - `if_false` sides to cater for targets that want either side. To + into .o files, Meson may need to pick *both* the ``if_true`` and + ``if_false`` sides to cater for targets that want either side. To achieve that, you can add a special rule using the ``CONFIG_ALL`` symbol:: # Some targets have CONFIG_ACPI, some don't, so this is not enough - softmmu_ss.add(when: 'CONFIG_ACPI`, if_true: files('acpi.c'), + softmmu_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi.c'), if_false: files('acpi-stub.c')) # This is required as well: - softmmu_ss.add(when: 'CONFIG_ALL`, if_true: files('acpi-stub.c')) + softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c')) Target-dependent emulator sourcesets: In the target-dependent set lives CPU emulation, some device emulation and much glue code. This sometimes also has to be compiled multiple times, once for each target being built. Target-dependent files are included - in the `specific_ss` sourceset. + in the ``specific_ss`` sourceset. - Each emulator also includes sources for files in the `hw/` and `target/` + Each emulator also includes sources for files in the ``hw/`` and ``target/`` subdirectories. The subdirectory used for each emulator comes from the target's definition of ``TARGET_BASE_ARCH`` or (if missing) - ``TARGET_ARCH``, as found in `default-configs/targets/*.mak`. + ``TARGET_ARCH``, as found in ``default-configs/targets/*.mak``. - Each subdirectory in `hw/` adds one sourceset to the `hw_arch` dictionary, + Each subdirectory in ``hw/`` adds one sourceset to the ``hw_arch`` dictionary, for example:: arm_ss = ss.source_set() @@ -262,8 +210,8 @@ Target-dependent emulator sourcesets: The sourceset is only used for system emulators. - Each subdirectory in `target/` instead should add one sourceset to each - of the `target_arch` and `target_softmmu_arch`, which are used respectively + Each subdirectory in ``target/`` instead should add one sourceset to each + of the ``target_arch`` and ``target_softmmu_arch``, which are used respectively for all emulators and for system emulators only. For example:: arm_ss = ss.source_set() @@ -272,16 +220,33 @@ Target-dependent emulator sourcesets: target_arch += {'arm': arm_ss} target_softmmu_arch += {'arm': arm_softmmu_ss} +Module sourcesets: + There are two dictionaries for modules: ``modules`` is used for + target-independent modules and ``target_modules`` is used for + target-dependent modules. When modules are disabled the ``module`` + source sets are added to ``softmmu_ss`` and the ``target_modules`` + source sets are added to ``specific_ss``. + + Both dictionaries are nested. One dictionary is created per + subdirectory, and these per-subdirectory dictionaries are added to + the toplevel dictionaries. For example:: + + hw_display_modules = {} + qxl_ss = ss.source_set() + ... + hw_display_modules += { 'qxl': qxl_ss } + modules += { 'hw-display': hw_display_modules } + Utility sourcesets: - All binaries link with a static library `libqemuutil.a`. This library + All binaries link with a static library ``libqemuutil.a``. This library is built from several sourcesets; most of them however host generated - code, and the only two of general interest are `util_ss` and `stub_ss`. + code, and the only two of general interest are ``util_ss`` and ``stub_ss``. The separation between these two is purely for documentation purposes. - `util_ss` contains generic utility files. Even though this code is only + ``util_ss`` contains generic utility files. Even though this code is only linked in some binaries, sometimes it requires hooks only in some of these and depend on other functions that are not fully implemented by - all QEMU binaries. `stub_ss` links dummy stubs that will only be linked + all QEMU binaries. ``stub_ss`` links dummy stubs that will only be linked into the binary if the real implementation is not present. In a way, the stubs can be thought of as a portable implementation of the weak symbols concept. @@ -290,8 +255,8 @@ Utility sourcesets: The following files concur in the definition of which files are linked into each emulator: -`default-configs/devices/*.mak` - The files under `default-configs/devices/` control the boards and devices +``default-configs/devices/*.mak`` + The files under ``default-configs/devices/`` control the boards and devices that are built into each QEMU system emulation targets. They merely contain a list of config variable definitions such as:: @@ -299,18 +264,18 @@ into each emulator: CONFIG_XLNX_ZYNQMP_ARM=y CONFIG_XLNX_VERSAL=y -`*/Kconfig` - These files are processed together with `default-configs/devices/*.mak` and +``*/Kconfig`` + These files are processed together with ``default-configs/devices/*.mak`` and describe the dependencies between various features, subsystems and device models. They are described in :ref:`kconfig` -`default-configs/targets/*.mak` - These files mostly define symbols that appear in the `*-config-target.h` +``default-configs/targets/*.mak`` + These files mostly define symbols that appear in the ``*-config-target.h`` file for each emulator [#cfgtarget]_. However, the ``TARGET_ARCH`` - and ``TARGET_BASE_ARCH`` will also be used to select the `hw/` and - `target/` subdirectories that are compiled into each target. + and ``TARGET_BASE_ARCH`` will also be used to select the ``hw/`` and + ``target/`` subdirectories that are compiled into each target. -.. [#cfgtarget] This header is included by `qemu/osdep.h` when +.. [#cfgtarget] This header is included by ``qemu/osdep.h`` when compiling files from the target-specific sourcesets. These files rarely need changing unless you are adding a completely @@ -318,23 +283,77 @@ new target, or enabling new devices or hardware for a particular system/userspace emulation target +Adding checks +------------- + +New checks should be added to Meson. Compiler checks can be as simple as +the following:: + + config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h')) + +A more complex task such as adding a new dependency usually +comprises the following tasks: + + - Add a Meson build option to meson_options.txt. + + - Add code to perform the actual feature check. + + - Add code to include the feature status in ``config-host.h`` + + - Add code to print out the feature status in the configure summary + upon completion. + +Taking the probe for SDL2_Image as an example, we have the following +in ``meson_options.txt``:: + + option('sdl_image', type : 'feature', value : 'auto', + description: 'SDL Image support for icons') + +Unless the option was given a non-``auto`` value (on the configure +command line), the detection code must be performed only if the +dependency will be used:: + + sdl_image = not_found + if not get_option('sdl_image').auto() or have_system + sdl_image = dependency('SDL2_image', required: get_option('sdl_image'), + method: 'pkg-config', + static: enable_static) + endif + +This avoids warnings on static builds of user-mode emulators, for example. +Most of the libraries used by system-mode emulators are not available for +static linking. + +The other supporting code is generally simple:: + + # Create config-host.h (if applicable) + config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found()) + + # Summary + summary_info += {'SDL image support': sdl_image.found()} + +For the configure script to parse the new option, the +``scripts/meson-buildoptions.sh`` file must be up-to-date; ``make +update-buildoptions`` (or just ``make``) will take care of updating it. + + Support scripts --------------- Meson has a special convention for invoking Python scripts: if their -first line is `#! /usr/bin/env python3` and the file is *not* executable, +first line is ``#! /usr/bin/env python3`` and the file is *not* executable, find_program() arranges to invoke the script under the same Python interpreter that was used to invoke Meson. This is the most common and preferred way to invoke support scripts from Meson build files, because it automatically uses the value of configure's --python= option. -In case the script is not written in Python, use a `#! /usr/bin/env ...` +In case the script is not written in Python, use a ``#! /usr/bin/env ...`` line and make the script executable. Scripts written in Python, where it is desirable to make the script executable (for example for test scripts that developers may want to invoke from the command line, such as tests/qapi-schema/test-qapi.py), -should be invoked through the `python` variable in meson.build. For +should be invoked through the ``python`` variable in meson.build. For example:: test('QAPI schema regression tests', python, @@ -358,10 +377,20 @@ rules and wraps them so that e.g. submodules are built before QEMU. The resulting build system is largely non-recursive in nature, in contrast to common practices seen with automake. -Tests are also ran by the Makefile with the traditional `make check` -phony target, while benchmarks are run with `make bench`. Meson test -suites such as `unit` can be ran with `make check-unit` too. It is also -possible to run tests defined in meson.build with `meson test`. +Tests are also ran by the Makefile with the traditional ``make check`` +phony target, while benchmarks are run with ``make bench``. Meson test +suites such as ``unit`` can be ran with ``make check-unit`` too. It is also +possible to run tests defined in meson.build with ``meson test``. + +Useful make targets +------------------- + +``help`` + Print a help message for the most common build targets. + +``print-VAR`` + Print the value of the variable VAR. Useful for debugging the build + system. Important files for the build system ==================================== @@ -373,28 +402,28 @@ The following key files are statically defined in the source tree, with the rules needed to build QEMU. Their behaviour is influenced by a number of dynamically created files listed later. -`Makefile` +``Makefile`` The main entry point used when invoking make to build all the components of QEMU. The default 'all' target will naturally result in the build of every component. Makefile takes care of recursively building submodules directly via a non-recursive set of rules. -`*/meson.build` +``*/meson.build`` The meson.build file in the root directory is the main entry point for the Meson build system, and it coordinates the configuration and build of all executables. Build rules for various subdirectories are included in other meson.build files spread throughout the QEMU source tree. -`tests/Makefile.include` +``tests/Makefile.include`` Rules for external test harnesses. These include the TCG tests, - `qemu-iotests` and the Avocado-based acceptance tests. + ``qemu-iotests`` and the Avocado-based integration tests. -`tests/docker/Makefile.include` +``tests/docker/Makefile.include`` Rules for Docker tests. Like tests/Makefile, this file is included directly by the top level Makefile, anything defined in this file will influence the entire build system. -`tests/vm/Makefile.include` +``tests/vm/Makefile.include`` Rules for VM-based tests. Like tests/Makefile, this file is included directly by the top level Makefile, anything defined in this file will influence the entire build system. @@ -410,11 +439,11 @@ Makefile. Built by configure: -`config-host.mak` +``config-host.mak`` When configure has determined the characteristics of the build host it will write a long list of variables to config-host.mak file. This provides the various install directories, compiler / linker flags and a - variety of `CONFIG_*` variables related to optionally enabled features. + variety of ``CONFIG_*`` variables related to optionally enabled features. This is imported by the top level Makefile and meson.build in order to tailor the build output. @@ -429,41 +458,29 @@ Built by configure: Built by Meson: -`${TARGET-NAME}-config-devices.mak` +``${TARGET-NAME}-config-devices.mak`` TARGET-NAME is again the name of a system or userspace emulator. The config-devices.mak file is automatically generated by make using the scripts/make_device_config.sh program, feeding it the default-configs/$TARGET-NAME file as input. -`config-host.h`, `$TARGET-NAME/config-target.h`, `$TARGET-NAME/config-devices.h` - These files are used by source code to determine what features - are enabled. They are generated from the contents of the corresponding - `*.h` files using the scripts/create_config program. This extracts - relevant variables and formats them as C preprocessor macros. +``config-host.h``, ``$TARGET_NAME-config-target.h``, ``$TARGET_NAME-config-devices.h`` + These files are used by source code to determine what features are + enabled. They are generated from the contents of the corresponding + ``*.mak`` files using Meson's ``configure_file()`` function. -`build.ninja` +``build.ninja`` The build rules. Built by Makefile: -`Makefile.ninja` +``Makefile.ninja`` A Makefile include that bridges to ninja for the actual build. The Makefile is mostly a list of targets that Meson included in build.ninja. -`Makefile.mtest` +``Makefile.mtest`` The Makefile definitions that let "make check" run tests defined in meson.build. The rules are produced from Meson's JSON description of tests (obtained with "meson introspect --tests") through the script scripts/mtest2make.py. - - -Useful make targets -------------------- - -`help` - Print a help message for the most common build targets. - -`print-VAR` - Print the value of the variable VAR. Useful for debugging the build - system. diff --git a/docs/devel/ci-definitions.rst.inc b/docs/devel/ci-definitions.rst.inc new file mode 100644 index 00000000000..6d5c6fd9f20 --- /dev/null +++ b/docs/devel/ci-definitions.rst.inc @@ -0,0 +1,121 @@ +Definition of terms +=================== + +This section defines the terms used in this document and correlates them with +what is currently used on QEMU. + +Automated tests +--------------- + +An automated test is written on a test framework using its generic test +functions/classes. The test framework can run the tests and report their +success or failure [1]_. + +An automated test has essentially three parts: + +1. The test initialization of the parameters, where the expected parameters, + like inputs and expected results, are set up; +2. The call to the code that should be tested; +3. An assertion, comparing the result from the previous call with the expected + result set during the initialization of the parameters. If the result + matches the expected result, the test has been successful; otherwise, it has + failed. + +Unit testing +------------ + +A unit test is responsible for exercising individual software components as a +unit, like interfaces, data structures, and functionality, uncovering errors +within the boundaries of a component. The verification effort is in the +smallest software unit and focuses on the internal processing logic and data +structures. A test case of unit tests should be designed to uncover errors due +to erroneous computations, incorrect comparisons, or improper control flow [2]_. + +On QEMU, unit testing is represented by the 'check-unit' target from 'make'. + +Functional testing +------------------ + +A functional test focuses on the functional requirement of the software. +Deriving sets of input conditions, the functional tests should fully exercise +all the functional requirements for a program. Functional testing is +complementary to other testing techniques, attempting to find errors like +incorrect or missing functions, interface errors, behavior errors, and +initialization and termination errors [3]_. + +On QEMU, functional testing is represented by the 'check-qtest' target from +'make'. + +System testing +-------------- + +System tests ensure all application elements mesh properly while the overall +functionality and performance are achieved [4]_. Some or all system components +are integrated to create a complete system to be tested as a whole. System +testing ensures that components are compatible, interact correctly, and +transfer the right data at the right time across their interfaces. As system +testing focuses on interactions, use case-based testing is a practical approach +to system testing [5]_. Note that, in some cases, system testing may require +interaction with third-party software, like operating system images, databases, +networks, and so on. + +On QEMU, system testing is represented by the 'check-avocado' target from +'make'. + +Flaky tests +----------- + +A flaky test is defined as a test that exhibits both a passing and a failing +result with the same code on different runs. Some usual reasons for an +intermittent/flaky test are async wait, concurrency, and test order dependency +[6]_. + +Gating +------ + +A gate restricts the move of code from one stage to another on a +test/deployment pipeline. The step move is granted with approval. The approval +can be a manual intervention or a set of tests succeeding [7]_. + +On QEMU, the gating process happens during the pull request. The approval is +done by the project leader running its own set of tests. The pull request gets +merged when the tests succeed. + +Continuous Integration (CI) +--------------------------- + +Continuous integration (CI) requires the builds of the entire application and +the execution of a comprehensive set of automated tests every time there is a +need to commit any set of changes [8]_. The automated tests can be composed of +the unit, functional, system, and other tests. + +Keynotes about continuous integration (CI) [9]_: + +1. System tests may depend on external software (operating system images, + firmware, database, network). +2. It may take a long time to build and test. It may be impractical to build + the system being developed several times per day. +3. If the development platform is different from the target platform, it may + not be possible to run system tests in the developer’s private workspace. + There may be differences in hardware, operating system, or installed + software. Therefore, more time is required for testing the system. + +References +---------- + +.. [1] Sommerville, Ian (2016). Software Engineering. p. 233. +.. [2] Pressman, Roger S. & Maxim, Bruce R. (2020). Software Engineering, + A Practitioner’s Approach. p. 48, 376, 378, 381. +.. [3] Pressman, Roger S. & Maxim, Bruce R. (2020). Software Engineering, + A Practitioner’s Approach. p. 388. +.. [4] Pressman, Roger S. & Maxim, Bruce R. (2020). Software Engineering, + A Practitioner’s Approach. Software Engineering, p. 377. +.. [5] Sommerville, Ian (2016). Software Engineering. p. 59, 232, 240. +.. [6] Luo, Qingzhou, et al. An empirical analysis of flaky tests. + Proceedings of the 22nd ACM SIGSOFT International Symposium on + Foundations of Software Engineering. 2014. +.. [7] Humble, Jez & Farley, David (2010). Continuous Delivery: + Reliable Software Releases Through Build, Test, and Deployment, p. 122. +.. [8] Humble, Jez & Farley, David (2010). Continuous Delivery: + Reliable Software Releases Through Build, Test, and Deployment, p. 55. +.. [9] Sommerville, Ian (2016). Software Engineering. p. 743. diff --git a/docs/devel/ci-jobs.rst.inc b/docs/devel/ci-jobs.rst.inc new file mode 100644 index 00000000000..db3f571d5f3 --- /dev/null +++ b/docs/devel/ci-jobs.rst.inc @@ -0,0 +1,58 @@ +Custom CI/CD variables +====================== + +QEMU CI pipelines can be tuned by setting some CI environment variables. + +Set variable globally in the user's CI namespace +------------------------------------------------ + +Variables can be set globally in the user's CI namespace setting. + +For further information about how to set these variables, please refer to:: + + https://docs.gitlab.com/ee/ci/variables/#add-a-cicd-variable-to-a-project + +Set variable manually when pushing a branch or tag to the user's repository +--------------------------------------------------------------------------- + +Variables can be set manually when pushing a branch or tag, using +git-push command line arguments. + +Example setting the QEMU_CI_EXAMPLE_VAR variable: + +.. code:: + + git push -o ci.variable="QEMU_CI_EXAMPLE_VAR=value" myrepo mybranch + +For further information about how to set these variables, please refer to:: + + https://docs.gitlab.com/ee/user/project/push_options.html#push-options-for-gitlab-cicd + +Here is a list of the most used variables: + +QEMU_CI_AVOCADO_TESTING +~~~~~~~~~~~~~~~~~~~~~~~ +By default, tests using the Avocado framework are not run automatically in +the pipelines (because multiple artifacts have to be downloaded, and if +these artifacts are not already cached, downloading them make the jobs +reach the timeout limit). Set this variable to have the tests using the +Avocado framework run automatically. + +AARCH64_RUNNER_AVAILABLE +~~~~~~~~~~~~~~~~~~~~~~~~ +If you've got access to an aarch64 host that can be used as a gitlab-CI +runner, you can set this variable to enable the tests that require this +kind of host. The runner should be tagged with "aarch64". + +S390X_RUNNER_AVAILABLE +~~~~~~~~~~~~~~~~~~~~~~ +If you've got access to an IBM Z host that can be used as a gitlab-CI +runner, you can set this variable to enable the tests that require this +kind of host. The runner should be tagged with "s390x". + +CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you've got access to a CentOS Stream 8 x86_64 host that can be +used as a gitlab-CI runner, you can set this variable to enable the +tests that require this kind of host. The runner should be tagged with +both "centos_stream_8" and "x86_64". diff --git a/docs/devel/ci-runners.rst.inc b/docs/devel/ci-runners.rst.inc new file mode 100644 index 00000000000..7817001fb28 --- /dev/null +++ b/docs/devel/ci-runners.rst.inc @@ -0,0 +1,117 @@ +Jobs on Custom Runners +====================== + +Besides the jobs run under the various CI systems listed before, there +are a number additional jobs that will run before an actual merge. +These use the same GitLab CI's service/framework already used for all +other GitLab based CI jobs, but rely on additional systems, not the +ones provided by GitLab as "shared runners". + +The architecture of GitLab's CI service allows different machines to +be set up with GitLab's "agent", called gitlab-runner, which will take +care of running jobs created by events such as a push to a branch. +Here, the combination of a machine, properly configured with GitLab's +gitlab-runner, is called a "custom runner". + +The GitLab CI jobs definition for the custom runners are located under:: + + .gitlab-ci.d/custom-runners.yml + +Custom runners entail custom machines. To see a list of the machines +currently deployed in the QEMU GitLab CI and their maintainers, please +refer to the QEMU `wiki `__. + +Machine Setup Howto +------------------- + +For all Linux based systems, the setup can be mostly automated by the +execution of two Ansible playbooks. Create an ``inventory`` file +under ``scripts/ci/setup``, such as this:: + + fully.qualified.domain + other.machine.hostname + +You may need to set some variables in the inventory file itself. One +very common need is to tell Ansible to use a Python 3 interpreter on +those hosts. This would look like:: + + fully.qualified.domain ansible_python_interpreter=/usr/bin/python3 + other.machine.hostname ansible_python_interpreter=/usr/bin/python3 + +Build environment +~~~~~~~~~~~~~~~~~ + +The ``scripts/ci/setup/build-environment.yml`` Ansible playbook will +set up machines with the environment needed to perform builds and run +QEMU tests. This playbook consists on the installation of various +required packages (and a general package update while at it). It +currently covers a number of different Linux distributions, but it can +be expanded to cover other systems. + +The minimum required version of Ansible successfully tested in this +playbook is 2.8.0 (a version check is embedded within the playbook +itself). To run the playbook, execute:: + + cd scripts/ci/setup + ansible-playbook -i inventory build-environment.yml + +Please note that most of the tasks in the playbook require superuser +privileges, such as those from the ``root`` account or those obtained +by ``sudo``. If necessary, please refer to ``ansible-playbook`` +options such as ``--become``, ``--become-method``, ``--become-user`` +and ``--ask-become-pass``. + +gitlab-runner setup and registration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The gitlab-runner agent needs to be installed on each machine that +will run jobs. The association between a machine and a GitLab project +happens with a registration token. To find the registration token for +your repository/project, navigate on GitLab's web UI to: + + * Settings (the gears-like icon at the bottom of the left hand side + vertical toolbar), then + * CI/CD, then + * Runners, and click on the "Expand" button, then + * Under "Set up a specific Runner manually", look for the value under + "And this registration token:" + +Copy the ``scripts/ci/setup/vars.yml.template`` file to +``scripts/ci/setup/vars.yml``. Then, set the +``gitlab_runner_registration_token`` variable to the value obtained +earlier. + +To run the playbook, execute:: + + cd scripts/ci/setup + ansible-playbook -i inventory gitlab-runner.yml + +Following the registration, it's necessary to configure the runner tags, +and optionally other configurations on the GitLab UI. Navigate to: + + * Settings (the gears like icon), then + * CI/CD, then + * Runners, and click on the "Expand" button, then + * "Runners activated for this project", then + * Click on the "Edit" icon (next to the "Lock" Icon) + +Tags are very important as they are used to route specific jobs to +specific types of runners, so it's a good idea to double check that +the automatically created tags are consistent with the OS and +architecture. For instance, an Ubuntu 20.04 aarch64 system should +have tags set as:: + + ubuntu_20.04,aarch64 + +Because the job definition at ``.gitlab-ci.d/custom-runners.yml`` +would contain:: + + ubuntu-20.04-aarch64-all: + tags: + - ubuntu_20.04 + - aarch64 + +It's also recommended to: + + * increase the "Maximum job timeout" to something like ``2h`` + * give it a better Description diff --git a/docs/devel/ci.rst b/docs/devel/ci.rst new file mode 100644 index 00000000000..d106610096e --- /dev/null +++ b/docs/devel/ci.rst @@ -0,0 +1,13 @@ +== +CI +== + +QEMU has configurations enabled for a number of different CI services. +The most up to date information about them and their status can be +found at:: + + https://wiki.qemu.org/Testing/CI + +.. include:: ci-definitions.rst.inc +.. include:: ci-jobs.rst.inc +.. include:: ci-runners.rst.inc diff --git a/docs/devel/clocks.rst b/docs/devel/clocks.rst index 956bd147ea0..675fbeb6abe 100644 --- a/docs/devel/clocks.rst +++ b/docs/devel/clocks.rst @@ -260,6 +260,29 @@ clocks get the new clock period value: *Clock 2*, *Clock 3* and *Clock 4*. It is not possible to disconnect a clock or to change the clock connection after it is connected. +Clock multiplier and divider settings +------------------------------------- + +By default, when clocks are connected together, the child +clocks run with the same period as their source (parent) clock. +The Clock API supports a built-in period multiplier/divider +mechanism so you can configure a clock to make its children +run at a different period from its own. If you call the +``clock_set_mul_div()`` function you can specify the clock's +multiplier and divider values. The children of that clock +will all run with a period of ``parent_period * multiplier / divider``. +For instance, if the clock has a frequency of 8MHz and you set its +multiplier to 2 and its divider to 3, the child clocks will run +at 12MHz. + +You can change the multiplier and divider of a clock at runtime, +so you can use this to model clock controller devices which +have guest-programmable frequency multipliers or dividers. + +Note that ``clock_set_mul_div()`` does not automatically call +``clock_propagate()``. If you make a runtime change to the +multiplier or divider you must call clock_propagate() yourself. + Unconnected input clocks ------------------------ diff --git a/docs/devel/code-of-conduct.rst b/docs/devel/code-of-conduct.rst index 277b5250d13..195444d1b48 100644 --- a/docs/devel/code-of-conduct.rst +++ b/docs/devel/code-of-conduct.rst @@ -55,6 +55,6 @@ Sources ------- This document is based on the `Fedora Code of Conduct -`__ and the -`Contributor Covenant version 1.3.0 +`__ +(as of April 2021) and the `Contributor Covenant version 1.3.0 `__. diff --git a/docs/devel/control-flow-integrity.rst b/docs/devel/control-flow-integrity.rst index d89d70733d8..e6b73a4fe1a 100644 --- a/docs/devel/control-flow-integrity.rst +++ b/docs/devel/control-flow-integrity.rst @@ -39,7 +39,7 @@ later). Given the use of LTO, a version of AR that supports LLVM IR is required. The easies way of doing this is by selecting the AR provided by LLVM:: - AR=llvm-ar-9 CC=clang-9 CXX=lang++-9 /path/to/configure --enable-cfi + AR=llvm-ar-9 CC=clang-9 CXX=clang++-9 /path/to/configure --enable-cfi CFI is enabled on every binary produced. @@ -131,7 +131,7 @@ lld with version 11+. In other words, to compile with fuzzing and CFI, clang 11+ is required, and lld needs to be used as a linker:: - AR=llvm-ar-11 CC=clang-11 CXX=lang++-11 /path/to/configure --enable-cfi \ + AR=llvm-ar-11 CC=clang-11 CXX=clang++-11 /path/to/configure --enable-cfi \ -enable-fuzzing --extra-ldflags="-fuse-ld=lld" and then, compile the fuzzers as usual. diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst index 33a576058ca..68f6065d642 100644 --- a/docs/devel/decodetree.rst +++ b/docs/devel/decodetree.rst @@ -40,9 +40,6 @@ and returns an integral value extracted from there. A field with no ``unnamed_fields`` and no ``!function`` is in error. -FIXME: the fields of the structure into which this result will be stored -is restricted to ``int``. Which means that we cannot expand 64-bit items. - Field examples: +---------------------------+---------------------------------------------+ @@ -66,9 +63,14 @@ Argument Sets Syntax:: args_def := '&' identifier ( args_elt )+ ( !extern )? - args_elt := identifier + args_elt := identifier (':' identifier)? Each *args_elt* defines an argument within the argument set. +If the form of the *args_elt* contains a colon, the first +identifier is the argument name and the second identifier is +the argument type. If the colon is missing, the argument +type will be ``int``. + Each argument set will be rendered as a C structure "arg_$name" with each of the fields being one of the member arguments. @@ -86,6 +88,7 @@ Argument set examples:: ®3 ra rb rc &loadstore reg base offset + &longldst reg base offset:int64_t Formats diff --git a/docs/devel/ebpf_rss.rst b/docs/devel/ebpf_rss.rst new file mode 100644 index 00000000000..4a68682b31a --- /dev/null +++ b/docs/devel/ebpf_rss.rst @@ -0,0 +1,125 @@ +=========================== +eBPF RSS virtio-net support +=========================== + +RSS(Receive Side Scaling) is used to distribute network packets to guest virtqueues +by calculating packet hash. Usually every queue is processed then by a specific guest CPU core. + +For now there are 2 RSS implementations in qemu: +- 'in-qemu' RSS (functions if qemu receives network packets, i.e. vhost=off) +- eBPF RSS (can function with also with vhost=on) + +eBPF support (CONFIG_EBPF) is enabled by 'configure' script. +To enable eBPF RSS support use './configure --enable-bpf'. + +If steering BPF is not set for kernel's TUN module, the TUN uses automatic selection +of rx virtqueue based on lookup table built according to calculated symmetric hash +of transmitted packets. +If steering BPF is set for TUN the BPF code calculates the hash of packet header and +returns the virtqueue number to place the packet to. + +Simplified decision formula: + +.. code:: C + + queue_index = indirection_table[hash()%] + + +Not for all packets, the hash can/should be calculated. + +Note: currently, eBPF RSS does not support hash reporting. + +eBPF RSS turned on by different combinations of vhost-net, vitrio-net and tap configurations: + +- eBPF is used: + + tap,vhost=off & virtio-net-pci,rss=on,hash=off + +- eBPF is used: + + tap,vhost=on & virtio-net-pci,rss=on,hash=off + +- 'in-qemu' RSS is used: + + tap,vhost=off & virtio-net-pci,rss=on,hash=on + +- eBPF is used, hash population feature is not reported to the guest: + + tap,vhost=on & virtio-net-pci,rss=on,hash=on + +If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported. +Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed to load or set to TUN. + +RSS eBPF program +---------------- + +RSS program located in ebpf/rss.bpf.skeleton.h generated by bpftool. +So the program is part of the qemu binary. +Initially, the eBPF program was compiled by clang and source code located at tools/ebpf/rss.bpf.c. +Prerequisites to recompile the eBPF program (regenerate ebpf/rss.bpf.skeleton.h): + + llvm, clang, kernel source tree, bpftool + Adjust Makefile.ebpf to reflect the location of the kernel source tree + + $ cd tools/ebpf + $ make -f Makefile.ebpf + +Current eBPF RSS implementation uses 'bounded loops' with 'backward jump instructions' which present in the last kernels. +Overall eBPF RSS works on kernels 5.8+. + +eBPF RSS implementation +----------------------- + +eBPF RSS loading functionality located in ebpf/ebpf_rss.c and ebpf/ebpf_rss.h. + +The ``struct EBPFRSSContext`` structure that holds 4 file descriptors: + +- ctx - pointer of the libbpf context. +- program_fd - file descriptor of the eBPF RSS program. +- map_configuration - file descriptor of the 'configuration' map. This map contains one element of 'struct EBPFRSSConfig'. This configuration determines eBPF program behavior. +- map_toeplitz_key - file descriptor of the 'Toeplitz key' map. One element of the 40byte key prepared for the hashing algorithm. +- map_indirections_table - 128 elements of queue indexes. + +``struct EBPFRSSConfig`` fields: + +- redirect - "boolean" value, should the hash be calculated, on false - ``default_queue`` would be used as the final decision. +- populate_hash - for now, not used. eBPF RSS doesn't support hash reporting. +- hash_types - binary mask of different hash types. See ``VIRTIO_NET_RSS_HASH_TYPE_*`` defines. If for packet hash should not be calculated - ``default_queue`` would be used. +- indirections_len - length of the indirections table, maximum 128. +- default_queue - the queue index that used for packet that shouldn't be hashed. For some packets, the hash can't be calculated(g.e ARP). + +Functions: + +- ``ebpf_rss_init()`` - sets ctx to NULL, which indicates that EBPFRSSContext is not loaded. +- ``ebpf_rss_load()`` - creates 3 maps and loads eBPF program from the rss.bpf.skeleton.h. Returns 'true' on success. After that, program_fd can be used to set steering for TAP. +- ``ebpf_rss_set_all()`` - sets values for eBPF maps. ``indirections_table`` length is in EBPFRSSConfig. ``toeplitz_key`` is VIRTIO_NET_RSS_MAX_KEY_SIZE aka 40 bytes array. +- ``ebpf_rss_unload()`` - close all file descriptors and set ctx to NULL. + +Simplified eBPF RSS workflow: + +.. code:: C + + struct EBPFRSSConfig config; + config.redirect = 1; + config.hash_types = VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4; + config.indirections_len = VIRTIO_NET_RSS_MAX_TABLE_LEN; + config.default_queue = 0; + + uint16_t table[VIRTIO_NET_RSS_MAX_TABLE_LEN] = {...}; + uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {...}; + + struct EBPFRSSContext ctx; + ebpf_rss_init(&ctx); + ebpf_rss_load(&ctx); + ebpf_rss_set_all(&ctx, &config, table, key); + if (net_client->info->set_steering_ebpf != NULL) { + net_client->info->set_steering_ebpf(net_client, ctx->program_fd); + } + ... + ebpf_unload(&ctx); + + +NetClientState SetSteeringEBPF() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For now, ``set_steering_ebpf()`` method supported by Linux TAP NetClientState. The method requires an eBPF program file descriptor as an argument. diff --git a/docs/devel/fuzzing.rst b/docs/devel/fuzzing.rst index 2749bb9bed3..784ecb99e66 100644 --- a/docs/devel/fuzzing.rst +++ b/docs/devel/fuzzing.rst @@ -182,10 +182,11 @@ The output should contain a complete list of matched MemoryRegions. OSS-Fuzz -------- -QEMU is continuously fuzzed on `OSS-Fuzz` __(https://github.com/google/oss-fuzz). -By default, the OSS-Fuzz build will try to fuzz every fuzz-target. Since the -generic-fuzz target requires additional information provided in environment -variables, we pre-define some generic-fuzz configs in +QEMU is continuously fuzzed on `OSS-Fuzz +`_. By default, the OSS-Fuzz build +will try to fuzz every fuzz-target. Since the generic-fuzz target +requires additional information provided in environment variables, we +pre-define some generic-fuzz configs in ``tests/qtest/fuzz/generic_fuzz_configs.h``. Each config must specify: - ``.name``: To identify the fuzzer config diff --git a/docs/devel/index.rst b/docs/devel/index.rst index 6cf7e2d2330..afd937535e9 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -1,15 +1,11 @@ -.. This is the top level page for the 'devel' manual. - - +--------------------- Developer Information -===================== +--------------------- -This manual documents various parts of the internals of QEMU. +This section of the manual documents various parts of the internals of QEMU. You only need to read it if you are interested in reading or modifying QEMU's source code. -Contents: - .. toctree:: :maxdepth: 2 :includehidden: @@ -27,6 +23,7 @@ Contents: migration atomics stable-process + ci qtest decodetree secure-coding-practices @@ -36,9 +33,18 @@ Contents: multi-thread-tcg tcg-plugins bitops + ui reset s390-dasd-ipl clocks qom + modules block-coroutine-wrapper multi-process + ebpf_rss + vfio-migration + qapi-code-gen + writing-monitor-commands + trivial-patches + submitting-a-patch + submitting-a-pull-request diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst index cb2d7ffac0f..a1cdbec7512 100644 --- a/docs/devel/kconfig.rst +++ b/docs/devel/kconfig.rst @@ -303,5 +303,5 @@ variable:: host_kconfig = \ ('CONFIG_TPM' in config_host ? ['CONFIG_TPM=y'] : []) + \ ('CONFIG_SPICE' in config_host ? ['CONFIG_SPICE=y'] : []) + \ - ('CONFIG_IVSHMEM' in config_host ? ['CONFIG_IVSHMEM=y'] : []) + \ + (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ ... diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst index 568274baec0..8f0035c821b 100644 --- a/docs/devel/loads-stores.rst +++ b/docs/devel/loads-stores.rst @@ -68,15 +68,19 @@ Regexes for git grep - ``\`` - ``\`` -``cpu_{ld,st}*_mmuidx_ra`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ +``cpu_{ld,st}*_mmu`` +~~~~~~~~~~~~~~~~~~~~ -These functions operate on a guest virtual address plus a context, -known as a "mmu index" or ``mmuidx``, which controls how that virtual -address is translated. The meaning of the indexes are target specific, -but specifying a particular index might be necessary if, for instance, -the helper requires an "always as non-privileged" access rather that -the default access for the current state of the guest CPU. +These functions operate on a guest virtual address, plus a context +known as a "mmu index" which controls how that virtual address is +translated, plus a ``MemOp`` which contains alignment requirements +among other things. The ``MemOp`` and mmu index are combined into +a single argument of type ``MemOpIdx``. + +The meaning of the indexes are target specific, but specifying a +particular index might be necessary if, for instance, the helper +requires a "always as non-privileged" access rather than the +default access for the current state of the guest CPU. These functions may cause a guest CPU exception to be taken (e.g. for an alignment fault or MMU fault) which will result in @@ -99,6 +103,35 @@ function, which is a return address into the generated code [#gpc]_. Function names follow the pattern: +load: ``cpu_ld{size}{end}_mmu(env, ptr, oi, retaddr)`` + +store: ``cpu_st{size}{end}_mmu(env, ptr, val, oi, retaddr)`` + +``size`` + - ``b`` : 8 bits + - ``w`` : 16 bits + - ``l`` : 32 bits + - ``q`` : 64 bits + +``end`` + - (empty) : for target endian, or 8 bit sizes + - ``_be`` : big endian + - ``_le`` : little endian + +Regexes for git grep: + - ``\`` + - ``\`` + + +``cpu_{ld,st}*_mmuidx_ra`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These functions work like the ``cpu_{ld,st}_mmu`` functions except +that the ``mmuidx`` parameter is not combined with a ``MemOp``, +and therefore there is no required alignment supplied or enforced. + +Function names follow the pattern: + load: ``cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmuidx, retaddr)`` store: ``cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmuidx, retaddr)`` @@ -132,7 +165,8 @@ of the guest CPU, as determined by ``cpu_mmu_index(env, false)``. These are generally the preferred way to do accesses by guest virtual address from helper functions, unless the access should -be performed with a context other than the default. +be performed with a context other than the default, or alignment +should be enforced for the access. Function names follow the pattern: diff --git a/docs/devel/lockcnt.txt b/docs/devel/lockcnt.txt index 2d85462fe37..a3fb3bc5d8d 100644 --- a/docs/devel/lockcnt.txt +++ b/docs/devel/lockcnt.txt @@ -145,7 +145,7 @@ can also be more efficient in two ways: - on some platforms, one can implement QemuLockCnt to hold the lock and the mutex in a single word, making the fast path no more expensive than simply managing a counter using atomic operations (see - docs/devel/atomics.txt). This can be very helpful if concurrent access to + docs/devel/atomics.rst). This can be very helpful if concurrent access to the data structure is expected to be rare. diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 19c3d4f3eac..24012534827 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -53,7 +53,7 @@ savevm/loadvm functionality. Debugging ========= -The migration stream can be analyzed thanks to `scripts/analyze-migration.py`. +The migration stream can be analyzed thanks to ``scripts/analyze-migration.py``. Example usage: @@ -75,8 +75,8 @@ Common infrastructure ===================== The files, sockets or fd's that carry the migration stream are abstracted by -the ``QEMUFile`` type (see `migration/qemu-file.h`). In most cases this -is connected to a subtype of ``QIOChannel`` (see `io/`). +the ``QEMUFile`` type (see ``migration/qemu-file.h``). In most cases this +is connected to a subtype of ``QIOChannel`` (see ``io/``). Saving the state of one device @@ -166,14 +166,14 @@ An example (from hw/input/pckbd.c) }; We are declaring the state with name "pckbd". -The `version_id` is 3, and the fields are 4 uint8_t in a KBDState structure. +The ``version_id`` is 3, and the fields are 4 uint8_t in a KBDState structure. We registered this with: .. code:: c vmstate_register(NULL, 0, &vmstate_kbd, s); -For devices that are `qdev` based, we can register the device in the class +For devices that are ``qdev`` based, we can register the device in the class init function: .. code:: c @@ -210,9 +210,9 @@ another to load the state back. SaveVMHandlers *ops, void *opaque); -Two functions in the ``ops`` structure are the `save_state` -and `load_state` functions. Notice that `load_state` receives a version_id -parameter to know what state format is receiving. `save_state` doesn't +Two functions in the ``ops`` structure are the ``save_state`` +and ``load_state`` functions. Notice that ``load_state`` receives a version_id +parameter to know what state format is receiving. ``save_state`` doesn't have a version_id parameter because it always uses the latest version. Note that because the VMState macros still save the data in a raw @@ -385,18 +385,18 @@ migration of a device, and using them breaks backward-migration compatibility; in general most changes can be made by adding Subsections (see above) or _TEST macros (see above) which won't break compatibility. -Each version is associated with a series of fields saved. The `save_state` always saves -the state as the newer version. But `load_state` sometimes is able to +Each version is associated with a series of fields saved. The ``save_state`` always saves +the state as the newer version. But ``load_state`` sometimes is able to load state from an older version. You can see that there are several version fields: -- `version_id`: the maximum version_id supported by VMState for that device. -- `minimum_version_id`: the minimum version_id that VMState is able to understand +- ``version_id``: the maximum version_id supported by VMState for that device. +- ``minimum_version_id``: the minimum version_id that VMState is able to understand for that device. -- `minimum_version_id_old`: For devices that were not able to port to vmstate, we can +- ``minimum_version_id_old``: For devices that were not able to port to vmstate, we can assign a function that knows how to read this old state. This field is - ignored if there is no `load_state_old` handler. + ignored if there is no ``load_state_old`` handler. VMState is able to read versions from minimum_version_id to version_id. And the function ``load_state_old()`` (if present) is able to @@ -454,7 +454,7 @@ data and then transferred to the main structure. If you use memory API functions that update memory layout outside initialization (i.e., in response to a guest action), this is a strong -indication that you need to call these functions in a `post_load` callback. +indication that you need to call these functions in a ``post_load`` callback. Examples of such memory API functions are: - memory_region_add_subregion() @@ -823,12 +823,12 @@ Postcopy migration with shared memory needs explicit support from the other processes that share memory and from QEMU. There are restrictions on the type of memory that userfault can support shared. -The Linux kernel userfault support works on `/dev/shm` memory and on `hugetlbfs` -(although the kernel doesn't provide an equivalent to `madvise(MADV_DONTNEED)` +The Linux kernel userfault support works on ``/dev/shm`` memory and on ``hugetlbfs`` +(although the kernel doesn't provide an equivalent to ``madvise(MADV_DONTNEED)`` for hugetlbfs which may be a problem in some configurations). The vhost-user code in QEMU supports clients that have Postcopy support, -and the `vhost-user-bridge` (in `tests/`) and the DPDK package have changes +and the ``vhost-user-bridge`` (in ``tests/``) and the DPDK package have changes to support postcopy. The client needs to open a userfaultfd and register the areas diff --git a/docs/devel/modules.rst b/docs/devel/modules.rst new file mode 100644 index 00000000000..8e999c4fa48 --- /dev/null +++ b/docs/devel/modules.rst @@ -0,0 +1,5 @@ +============ +QEMU modules +============ + +.. kernel-doc:: include/qemu/module.h diff --git a/docs/devel/multi-process.rst b/docs/devel/multi-process.rst index 69699329d62..e4801751f23 100644 --- a/docs/devel/multi-process.rst +++ b/docs/devel/multi-process.rst @@ -1,15 +1,17 @@ -This is the design document for multi-process QEMU. It does not -necessarily reflect the status of the current implementation, which -may lack features or be considerably different from what is described -in this document. This document is still useful as a description of -the goals and general direction of this feature. - -Please refer to the following wiki for latest details: -https://wiki.qemu.org/Features/MultiProcessQEMU - Multi-process QEMU =================== +.. note:: + + This is the design document for multi-process QEMU. It does not + necessarily reflect the status of the current implementation, which + may lack features or be considerably different from what is described + in this document. This document is still useful as a description of + the goals and general direction of this feature. + + Please refer to the following wiki for latest details: + https://wiki.qemu.org/Features/MultiProcessQEMU + QEMU is often used as the hypervisor for virtual machines running in the Oracle cloud. Since one of the advantages of cloud computing is the ability to run many VMs from different tenants in the same cloud @@ -185,9 +187,9 @@ desired, in which the emulation application should only be allowed to access the files or devices the VM it's running on behalf of can access. #### qemu-io model -Qemu-io is a test harness used to test changes to the QEMU block backend -object code. (e.g., the code that implements disk images for disk driver -emulation) Qemu-io is not a device emulation application per se, but it +``qemu-io`` is a test harness used to test changes to the QEMU block backend +object code (e.g., the code that implements disk images for disk driver +emulation). ``qemu-io`` is not a device emulation application per se, but it does compile the QEMU block objects into a separate binary from the main QEMU one. This could be useful for disk device emulation, since its emulation applications will need to include the QEMU block objects. @@ -639,7 +641,7 @@ the CPU that issued the MMIO. +==========+========================+ | rid | range MMIO is within | +----------+------------------------+ -| offset | offset withing *rid* | +| offset | offset within *rid* | +----------+------------------------+ | type | e.g., load or store | +----------+------------------------+ diff --git a/docs/devel/multi-thread-tcg.rst b/docs/devel/multi-thread-tcg.rst index 92a9eba13c9..c9541a7b20a 100644 --- a/docs/devel/multi-thread-tcg.rst +++ b/docs/devel/multi-thread-tcg.rst @@ -4,8 +4,9 @@ This work is licensed under the terms of the GNU GPL, version 2 or later. See the COPYING file in the top-level directory. -Introduction -============ +================== +Multi-threaded TCG +================== This document outlines the design for multi-threaded TCG (a.k.a MTTCG) system-mode emulation. user-mode emulation has always mirrored the @@ -227,7 +228,7 @@ Emulated hardware state Currently thanks to KVM work any access to IO memory is automatically protected by the global iothread mutex, also known as the BQL (Big -Qemu Lock). Any IO region that doesn't use global mutex is expected to +QEMU Lock). Any IO region that doesn't use global mutex is expected to do its own locking. However IO memory isn't the only way emulated hardware state can be diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst new file mode 100644 index 00000000000..a3b54730894 --- /dev/null +++ b/docs/devel/qapi-code-gen.rst @@ -0,0 +1,1932 @@ +================================== +How to use the QAPI code generator +================================== + +.. + Copyright IBM Corp. 2011 + Copyright (C) 2012-2016 Red Hat, Inc. + + This work is licensed under the terms of the GNU GPL, version 2 or + later. See the COPYING file in the top-level directory. + + +Introduction +============ + +QAPI is a native C API within QEMU which provides management-level +functionality to internal and external users. For external +users/processes, this interface is made available by a JSON-based wire +format for the QEMU Monitor Protocol (QMP) for controlling qemu, as +well as the QEMU Guest Agent (QGA) for communicating with the guest. +The remainder of this document uses "Client JSON Protocol" when +referring to the wire contents of a QMP or QGA connection. + +To map between Client JSON Protocol interfaces and the native C API, +we generate C code from a QAPI schema. This document describes the +QAPI schema language, and how it gets mapped to the Client JSON +Protocol and to C. It additionally provides guidance on maintaining +Client JSON Protocol compatibility. + + +The QAPI schema language +======================== + +The QAPI schema defines the Client JSON Protocol's commands and +events, as well as types used by them. Forward references are +allowed. + +It is permissible for the schema to contain additional types not used +by any commands or events, for the side effect of generated C code +used internally. + +There are several kinds of types: simple types (a number of built-in +types, such as ``int`` and ``str``; as well as enumerations), arrays, +complex types (structs and two flavors of unions), and alternate types +(a choice between other types). + + +Schema syntax +------------- + +Syntax is loosely based on `JSON `_. +Differences: + +* Comments: start with a hash character (``#``) that is not part of a + string, and extend to the end of the line. + +* Strings are enclosed in ``'single quotes'``, not ``"double quotes"``. + +* Strings are restricted to printable ASCII, and escape sequences to + just ``\\``. + +* Numbers and ``null`` are not supported. + +A second layer of syntax defines the sequences of JSON texts that are +a correctly structured QAPI schema. We provide a grammar for this +syntax in an EBNF-like notation: + +* Production rules look like ``non-terminal = expression`` +* Concatenation: expression ``A B`` matches expression ``A``, then ``B`` +* Alternation: expression ``A | B`` matches expression ``A`` or ``B`` +* Repetition: expression ``A...`` matches zero or more occurrences of + expression ``A`` +* Repetition: expression ``A, ...`` matches zero or more occurrences of + expression ``A`` separated by ``,`` +* Grouping: expression ``( A )`` matches expression ``A`` +* JSON's structural characters are terminals: ``{ } [ ] : ,`` +* JSON's literal names are terminals: ``false true`` +* String literals enclosed in ``'single quotes'`` are terminal, and match + this JSON string, with a leading ``*`` stripped off +* When JSON object member's name starts with ``*``, the member is + optional. +* The symbol ``STRING`` is a terminal, and matches any JSON string +* The symbol ``BOOL`` is a terminal, and matches JSON ``false`` or ``true`` +* ALL-CAPS words other than ``STRING`` are non-terminals + +The order of members within JSON objects does not matter unless +explicitly noted. + +A QAPI schema consists of a series of top-level expressions:: + + SCHEMA = TOP-LEVEL-EXPR... + +The top-level expressions are all JSON objects. Code and +documentation is generated in schema definition order. Code order +should not matter. + +A top-level expressions is either a directive or a definition:: + + TOP-LEVEL-EXPR = DIRECTIVE | DEFINITION + +There are two kinds of directives and six kinds of definitions:: + + DIRECTIVE = INCLUDE | PRAGMA + DEFINITION = ENUM | STRUCT | UNION | ALTERNATE | COMMAND | EVENT + +These are discussed in detail below. + + +Built-in Types +-------------- + +The following types are predefined, and map to C as follows: + + ============= ============== ============================================ + Schema C JSON + ============= ============== ============================================ + ``str`` ``char *`` any JSON string, UTF-8 + ``number`` ``double`` any JSON number + ``int`` ``int64_t`` a JSON number without fractional part + that fits into the C integer type + ``int8`` ``int8_t`` likewise + ``int16`` ``int16_t`` likewise + ``int32`` ``int32_t`` likewise + ``int64`` ``int64_t`` likewise + ``uint8`` ``uint8_t`` likewise + ``uint16`` ``uint16_t`` likewise + ``uint32`` ``uint32_t`` likewise + ``uint64`` ``uint64_t`` likewise + ``size`` ``uint64_t`` like ``uint64_t``, except + ``StringInputVisitor`` accepts size suffixes + ``bool`` ``bool`` JSON ``true`` or ``false`` + ``null`` ``QNull *`` JSON ``null`` + ``any`` ``QObject *`` any JSON value + ``QType`` ``QType`` JSON string matching enum ``QType`` values + ============= ============== ============================================ + + +Include directives +------------------ + +Syntax:: + + INCLUDE = { 'include': STRING } + +The QAPI schema definitions can be modularized using the 'include' directive:: + + { 'include': 'path/to/file.json' } + +The directive is evaluated recursively, and include paths are relative +to the file using the directive. Multiple includes of the same file +are idempotent. + +As a matter of style, it is a good idea to have all files be +self-contained, but at the moment, nothing prevents an included file +from making a forward reference to a type that is only introduced by +an outer file. The parser may be made stricter in the future to +prevent incomplete include files. + +.. _pragma: + +Pragma directives +----------------- + +Syntax:: + + PRAGMA = { 'pragma': { + '*doc-required': BOOL, + '*command-name-exceptions': [ STRING, ... ], + '*command-returns-exceptions': [ STRING, ... ], + '*member-name-exceptions': [ STRING, ... ] } } + +The pragma directive lets you control optional generator behavior. + +Pragma's scope is currently the complete schema. Setting the same +pragma to different values in parts of the schema doesn't work. + +Pragma 'doc-required' takes a boolean value. If true, documentation +is required. Default is false. + +Pragma 'command-name-exceptions' takes a list of commands whose names +may contain ``"_"`` instead of ``"-"``. Default is none. + +Pragma 'command-returns-exceptions' takes a list of commands that may +violate the rules on permitted return types. Default is none. + +Pragma 'member-name-exceptions' takes a list of types whose member +names may contain uppercase letters, and ``"_"`` instead of ``"-"``. +Default is none. + +.. _ENUM-VALUE: + +Enumeration types +----------------- + +Syntax:: + + ENUM = { 'enum': STRING, + 'data': [ ENUM-VALUE, ... ], + '*prefix': STRING, + '*if': COND, + '*features': FEATURES } + ENUM-VALUE = STRING + | { 'name': STRING, + '*if': COND, + '*features': FEATURES } + +Member 'enum' names the enum type. + +Each member of the 'data' array defines a value of the enumeration +type. The form STRING is shorthand for :code:`{ 'name': STRING }`. The +'name' values must be be distinct. + +Example:: + + { 'enum': 'MyEnum', 'data': [ 'value1', 'value2', 'value3' ] } + +Nothing prevents an empty enumeration, although it is probably not +useful. + +On the wire, an enumeration type's value is represented by its +(string) name. In C, it's represented by an enumeration constant. +These are of the form PREFIX_NAME, where PREFIX is derived from the +enumeration type's name, and NAME from the value's name. For the +example above, the generator maps 'MyEnum' to MY_ENUM and 'value1' to +VALUE1, resulting in the enumeration constant MY_ENUM_VALUE1. The +optional 'prefix' member overrides PREFIX. + +The generated C enumeration constants have values 0, 1, ..., N-1 (in +QAPI schema order), where N is the number of values. There is an +additional enumeration constant PREFIX__MAX with value N. + +Do not use string or an integer type when an enumeration type can do +the job satisfactorily. + +The optional 'if' member specifies a conditional. See `Configuring the +schema`_ below for more on this. + +The optional 'features' member specifies features. See Features_ +below for more on this. + + +.. _TYPE-REF: + +Type references and array types +------------------------------- + +Syntax:: + + TYPE-REF = STRING | ARRAY-TYPE + ARRAY-TYPE = [ STRING ] + +A string denotes the type named by the string. + +A one-element array containing a string denotes an array of the type +named by the string. Example: ``['int']`` denotes an array of ``int``. + + +Struct types +------------ + +Syntax:: + + STRUCT = { 'struct': STRING, + 'data': MEMBERS, + '*base': STRING, + '*if': COND, + '*features': FEATURES } + MEMBERS = { MEMBER, ... } + MEMBER = STRING : TYPE-REF + | STRING : { 'type': TYPE-REF, + '*if': COND, + '*features': FEATURES } + +Member 'struct' names the struct type. + +Each MEMBER of the 'data' object defines a member of the struct type. + +.. _MEMBERS: + +The MEMBER's STRING name consists of an optional ``*`` prefix and the +struct member name. If ``*`` is present, the member is optional. + +The MEMBER's value defines its properties, in particular its type. +The form TYPE-REF_ is shorthand for :code:`{ 'type': TYPE-REF }`. + +Example:: + + { 'struct': 'MyType', + 'data': { 'member1': 'str', 'member2': ['int'], '*member3': 'str' } } + +A struct type corresponds to a struct in C, and an object in JSON. +The C struct's members are generated in QAPI schema order. + +The optional 'base' member names a struct type whose members are to be +included in this type. They go first in the C struct. + +Example:: + + { 'struct': 'BlockdevOptionsGenericFormat', + 'data': { 'file': 'str' } } + { 'struct': 'BlockdevOptionsGenericCOWFormat', + 'base': 'BlockdevOptionsGenericFormat', + 'data': { '*backing': 'str' } } + +An example BlockdevOptionsGenericCOWFormat object on the wire could use +both members like this:: + + { "file": "/some/place/my-image", + "backing": "/some/place/my-backing-file" } + +The optional 'if' member specifies a conditional. See `Configuring +the schema`_ below for more on this. + +The optional 'features' member specifies features. See Features_ +below for more on this. + + +Union types +----------- + +Syntax:: + + UNION = { 'union': STRING, + 'base': ( MEMBERS | STRING ), + 'discriminator': STRING, + 'data': BRANCHES, + '*if': COND, + '*features': FEATURES } + BRANCHES = { BRANCH, ... } + BRANCH = STRING : TYPE-REF + | STRING : { 'type': TYPE-REF, '*if': COND } + +Member 'union' names the union type. + +The 'base' member defines the common members. If it is a MEMBERS_ +object, it defines common members just like a struct type's 'data' +member defines struct type members. If it is a STRING, it names a +struct type whose members are the common members. + +Member 'discriminator' must name a non-optional enum-typed member of +the base struct. That member's value selects a branch by its name. +If no such branch exists, an empty branch is assumed. + +Each BRANCH of the 'data' object defines a branch of the union. A +union must have at least one branch. + +The BRANCH's STRING name is the branch name. It must be a value of +the discriminator enum type. + +The BRANCH's value defines the branch's properties, in particular its +type. The type must a struct type. The form TYPE-REF_ is shorthand +for :code:`{ 'type': TYPE-REF }`. + +In the Client JSON Protocol, a union is represented by an object with +the common members (from the base type) and the selected branch's +members. The two sets of member names must be disjoint. + +Example:: + + { 'enum': 'BlockdevDriver', 'data': [ 'file', 'qcow2' ] } + { 'union': 'BlockdevOptions', + 'base': { 'driver': 'BlockdevDriver', '*read-only': 'bool' }, + 'discriminator': 'driver', + 'data': { 'file': 'BlockdevOptionsFile', + 'qcow2': 'BlockdevOptionsQcow2' } } + +Resulting in these JSON objects:: + + { "driver": "file", "read-only": true, + "filename": "/some/place/my-image" } + { "driver": "qcow2", "read-only": false, + "backing": "/some/place/my-image", "lazy-refcounts": true } + +The order of branches need not match the order of the enum values. +The branches need not cover all possible enum values. In the +resulting generated C data types, a union is represented as a struct +with the base members in QAPI schema order, and then a union of +structures for each branch of the struct. + +The optional 'if' member specifies a conditional. See `Configuring +the schema`_ below for more on this. + +The optional 'features' member specifies features. See Features_ +below for more on this. + + +Alternate types +--------------- + +Syntax:: + + ALTERNATE = { 'alternate': STRING, + 'data': ALTERNATIVES, + '*if': COND, + '*features': FEATURES } + ALTERNATIVES = { ALTERNATIVE, ... } + ALTERNATIVE = STRING : STRING + | STRING : { 'type': STRING, '*if': COND } + +Member 'alternate' names the alternate type. + +Each ALTERNATIVE of the 'data' object defines a branch of the +alternate. An alternate must have at least one branch. + +The ALTERNATIVE's STRING name is the branch name. + +The ALTERNATIVE's value defines the branch's properties, in particular +its type. The form STRING is shorthand for :code:`{ 'type': STRING }`. + +Example:: + + { 'alternate': 'BlockdevRef', + 'data': { 'definition': 'BlockdevOptions', + 'reference': 'str' } } + +An alternate type is like a union type, except there is no +discriminator on the wire. Instead, the branch to use is inferred +from the value. An alternate can only express a choice between types +represented differently on the wire. + +If a branch is typed as the 'bool' built-in, the alternate accepts +true and false; if it is typed as any of the various numeric +built-ins, it accepts a JSON number; if it is typed as a 'str' +built-in or named enum type, it accepts a JSON string; if it is typed +as the 'null' built-in, it accepts JSON null; and if it is typed as a +complex type (struct or union), it accepts a JSON object. + +The example alternate declaration above allows using both of the +following example objects:: + + { "file": "my_existing_block_device_id" } + { "file": { "driver": "file", + "read-only": false, + "filename": "/tmp/mydisk.qcow2" } } + +The optional 'if' member specifies a conditional. See `Configuring +the schema`_ below for more on this. + +The optional 'features' member specifies features. See Features_ +below for more on this. + + +Commands +-------- + +Syntax:: + + COMMAND = { 'command': STRING, + ( + '*data': ( MEMBERS | STRING ), + | + 'data': STRING, + 'boxed': true, + ) + '*returns': TYPE-REF, + '*success-response': false, + '*gen': false, + '*allow-oob': true, + '*allow-preconfig': true, + '*coroutine': true, + '*if': COND, + '*features': FEATURES } + +Member 'command' names the command. + +Member 'data' defines the arguments. It defaults to an empty MEMBERS_ +object. + +If 'data' is a MEMBERS_ object, then MEMBERS defines arguments just +like a struct type's 'data' defines struct type members. + +If 'data' is a STRING, then STRING names a complex type whose members +are the arguments. A union type requires ``'boxed': true``. + +Member 'returns' defines the command's return type. It defaults to an +empty struct type. It must normally be a complex type or an array of +a complex type. To return anything else, the command must be listed +in pragma 'commands-returns-exceptions'. If you do this, extending +the command to return additional information will be harder. Use of +the pragma for new commands is strongly discouraged. + +A command's error responses are not specified in the QAPI schema. +Error conditions should be documented in comments. + +In the Client JSON Protocol, the value of the "execute" or "exec-oob" +member is the command name. The value of the "arguments" member then +has to conform to the arguments, and the value of the success +response's "return" member will conform to the return type. + +Some example commands:: + + { 'command': 'my-first-command', + 'data': { 'arg1': 'str', '*arg2': 'str' } } + { 'struct': 'MyType', 'data': { '*value': 'str' } } + { 'command': 'my-second-command', + 'returns': [ 'MyType' ] } + +which would validate this Client JSON Protocol transaction:: + + => { "execute": "my-first-command", + "arguments": { "arg1": "hello" } } + <= { "return": { } } + => { "execute": "my-second-command" } + <= { "return": [ { "value": "one" }, { } ] } + +The generator emits a prototype for the C function implementing the +command. The function itself needs to be written by hand. See +section `Code generated for commands`_ for examples. + +The function returns the return type. When member 'boxed' is absent, +it takes the command arguments as arguments one by one, in QAPI schema +order. Else it takes them wrapped in the C struct generated for the +complex argument type. It takes an additional ``Error **`` argument in +either case. + +The generator also emits a marshalling function that extracts +arguments for the user's function out of an input QDict, calls the +user's function, and if it succeeded, builds an output QObject from +its return value. This is for use by the QMP monitor core. + +In rare cases, QAPI cannot express a type-safe representation of a +corresponding Client JSON Protocol command. You then have to suppress +generation of a marshalling function by including a member 'gen' with +boolean value false, and instead write your own function. For +example:: + + { 'command': 'netdev_add', + 'data': {'type': 'str', 'id': 'str'}, + 'gen': false } + +Please try to avoid adding new commands that rely on this, and instead +use type-safe unions. + +Normally, the QAPI schema is used to describe synchronous exchanges, +where a response is expected. But in some cases, the action of a +command is expected to change state in a way that a successful +response is not possible (although the command will still return an +error object on failure). When a successful reply is not possible, +the command definition includes the optional member 'success-response' +with boolean value false. So far, only QGA makes use of this member. + +Member 'allow-oob' declares whether the command supports out-of-band +(OOB) execution. It defaults to false. For example:: + + { 'command': 'migrate_recover', + 'data': { 'uri': 'str' }, 'allow-oob': true } + +See qmp-spec.txt for out-of-band execution syntax and semantics. + +Commands supporting out-of-band execution can still be executed +in-band. + +When a command is executed in-band, its handler runs in the main +thread with the BQL held. + +When a command is executed out-of-band, its handler runs in a +dedicated monitor I/O thread with the BQL *not* held. + +An OOB-capable command handler must satisfy the following conditions: + +- It terminates quickly. +- It does not invoke system calls that may block. +- It does not access guest RAM that may block when userfaultfd is + enabled for postcopy live migration. +- It takes only "fast" locks, i.e. all critical sections protected by + any lock it takes also satisfy the conditions for OOB command + handler code. + +The restrictions on locking limit access to shared state. Such access +requires synchronization, but OOB commands can't take the BQL or any +other "slow" lock. + +When in doubt, do not implement OOB execution support. + +Member 'allow-preconfig' declares whether the command is available +before the machine is built. It defaults to false. For example:: + + { 'enum': 'QMPCapability', + 'data': [ 'oob' ] } + { 'command': 'qmp_capabilities', + 'data': { '*enable': [ 'QMPCapability' ] }, + 'allow-preconfig': true } + +QMP is available before the machine is built only when QEMU was +started with --preconfig. + +Member 'coroutine' tells the QMP dispatcher whether the command handler +is safe to be run in a coroutine. It defaults to false. If it is true, +the command handler is called from coroutine context and may yield while +waiting for an external event (such as I/O completion) in order to avoid +blocking the guest and other background operations. + +Coroutine safety can be hard to prove, similar to thread safety. Common +pitfalls are: + +- The global mutex isn't held across ``qemu_coroutine_yield()``, so + operations that used to assume that they execute atomically may have + to be more careful to protect against changes in the global state. + +- Nested event loops (``AIO_WAIT_WHILE()`` etc.) are problematic in + coroutine context and can easily lead to deadlocks. They should be + replaced by yielding and reentering the coroutine when the condition + becomes false. + +Since the command handler may assume coroutine context, any callers +other than the QMP dispatcher must also call it in coroutine context. +In particular, HMP commands calling such a QMP command handler must be +marked ``.coroutine = true`` in hmp-commands.hx. + +It is an error to specify both ``'coroutine': true`` and ``'allow-oob': true`` +for a command. We don't currently have a use case for both together and +without a use case, it's not entirely clear what the semantics should +be. + +The optional 'if' member specifies a conditional. See `Configuring +the schema`_ below for more on this. + +The optional 'features' member specifies features. See Features_ +below for more on this. + + +Events +------ + +Syntax:: + + EVENT = { 'event': STRING, + ( + '*data': ( MEMBERS | STRING ), + | + 'data': STRING, + 'boxed': true, + ) + '*if': COND, + '*features': FEATURES } + +Member 'event' names the event. This is the event name used in the +Client JSON Protocol. + +Member 'data' defines the event-specific data. It defaults to an +empty MEMBERS object. + +If 'data' is a MEMBERS object, then MEMBERS defines event-specific +data just like a struct type's 'data' defines struct type members. + +If 'data' is a STRING, then STRING names a complex type whose members +are the event-specific data. A union type requires ``'boxed': true``. + +An example event is:: + + { 'event': 'EVENT_C', + 'data': { '*a': 'int', 'b': 'str' } } + +Resulting in this JSON object:: + + { "event": "EVENT_C", + "data": { "b": "test string" }, + "timestamp": { "seconds": 1267020223, "microseconds": 435656 } } + +The generator emits a function to send the event. When member 'boxed' +is absent, it takes event-specific data one by one, in QAPI schema +order. Else it takes them wrapped in the C struct generated for the +complex type. See section `Code generated for events`_ for examples. + +The optional 'if' member specifies a conditional. See `Configuring +the schema`_ below for more on this. + +The optional 'features' member specifies features. See Features_ +below for more on this. + + +.. _FEATURE: + +Features +-------- + +Syntax:: + + FEATURES = [ FEATURE, ... ] + FEATURE = STRING + | { 'name': STRING, '*if': COND } + +Sometimes, the behaviour of QEMU changes compatibly, but without a +change in the QMP syntax (usually by allowing values or operations +that previously resulted in an error). QMP clients may still need to +know whether the extension is available. + +For this purpose, a list of features can be specified for a command or +struct type. Each list member can either be ``{ 'name': STRING, '*if': +COND }``, or STRING, which is shorthand for ``{ 'name': STRING }``. + +The optional 'if' member specifies a conditional. See `Configuring +the schema`_ below for more on this. + +Example:: + + { 'struct': 'TestType', + 'data': { 'number': 'int' }, + 'features': [ 'allow-negative-numbers' ] } + +The feature strings are exposed to clients in introspection, as +explained in section `Client JSON Protocol introspection`_. + +Intended use is to have each feature string signal that this build of +QEMU shows a certain behaviour. + + +Special features +~~~~~~~~~~~~~~~~ + +Feature "deprecated" marks a command, event, enum value, or struct +member as deprecated. It is not supported elsewhere so far. +Interfaces so marked may be withdrawn in future releases in accordance +with QEMU's deprecation policy. + +Feature "unstable" marks a command, event, enum value, or struct +member as unstable. It is not supported elsewhere so far. Interfaces +so marked may be withdrawn or changed incompatibly in future releases. + + +Naming rules and reserved names +------------------------------- + +All names must begin with a letter, and contain only ASCII letters, +digits, hyphen, and underscore. There are two exceptions: enum values +may start with a digit, and names that are downstream extensions (see +section `Downstream extensions`_) start with underscore. + +Names beginning with ``q_`` are reserved for the generator, which uses +them for munging QMP names that resemble C keywords or other +problematic strings. For example, a member named ``default`` in qapi +becomes ``q_default`` in the generated C code. + +Types, commands, and events share a common namespace. Therefore, +generally speaking, type definitions should always use CamelCase for +user-defined type names, while built-in types are lowercase. + +Type names ending with ``Kind`` or ``List`` are reserved for the +generator, which uses them for implicit union enums and array types, +respectively. + +Command names, and member names within a type, should be all lower +case with words separated by a hyphen. However, some existing older +commands and complex types use underscore; when extending them, +consistency is preferred over blindly avoiding underscore. + +Event names should be ALL_CAPS with words separated by underscore. + +Member name ``u`` and names starting with ``has-`` or ``has_`` are reserved +for the generator, which uses them for unions and for tracking +optional members. + +Names beginning with ``x-`` used to signify "experimental". This +convention has been replaced by special feature "unstable". + +Pragmas ``command-name-exceptions`` and ``member-name-exceptions`` let +you violate naming rules. Use for new code is strongly discouraged. See +`Pragma directives`_ for details. + + +Downstream extensions +--------------------- + +QAPI schema names that are externally visible, say in the Client JSON +Protocol, need to be managed with care. Names starting with a +downstream prefix of the form __RFQDN_ are reserved for the downstream +who controls the valid, reverse fully qualified domain name RFQDN. +RFQDN may only contain ASCII letters, digits, hyphen and period. + +Example: Red Hat, Inc. controls redhat.com, and may therefore add a +downstream command ``__com.redhat_drive-mirror``. + + +Configuring the schema +---------------------- + +Syntax:: + + COND = STRING + | { 'all: [ COND, ... ] } + | { 'any: [ COND, ... ] } + | { 'not': COND } + +All definitions take an optional 'if' member. Its value must be a +string, or an object with a single member 'all', 'any' or 'not'. + +The C code generated for the definition will then be guarded by an #if +preprocessing directive with an operand generated from that condition: + + * STRING will generate defined(STRING) + * { 'all': [COND, ...] } will generate (COND && ...) + * { 'any': [COND, ...] } will generate (COND || ...) + * { 'not': COND } will generate !COND + +Example: a conditional struct :: + + { 'struct': 'IfStruct', 'data': { 'foo': 'int' }, + 'if': { 'all': [ 'CONFIG_FOO', 'HAVE_BAR' ] } } + +gets its generated code guarded like this:: + + #if defined(CONFIG_FOO) && defined(HAVE_BAR) + ... generated code ... + #endif /* defined(HAVE_BAR) && defined(CONFIG_FOO) */ + +Individual members of complex types, commands arguments, and +event-specific data can also be made conditional. This requires the +longhand form of MEMBER. + +Example: a struct type with unconditional member 'foo' and conditional +member 'bar' :: + + { 'struct': 'IfStruct', + 'data': { 'foo': 'int', + 'bar': { 'type': 'int', 'if': 'IFCOND'} } } + +A union's discriminator may not be conditional. + +Likewise, individual enumeration values be conditional. This requires +the longhand form of ENUM-VALUE_. + +Example: an enum type with unconditional value 'foo' and conditional +value 'bar' :: + + { 'enum': 'IfEnum', + 'data': [ 'foo', + { 'name' : 'bar', 'if': 'IFCOND' } ] } + +Likewise, features can be conditional. This requires the longhand +form of FEATURE_. + +Example: a struct with conditional feature 'allow-negative-numbers' :: + + { 'struct': 'TestType', + 'data': { 'number': 'int' }, + 'features': [ { 'name': 'allow-negative-numbers', + 'if': 'IFCOND' } ] } + +Please note that you are responsible to ensure that the C code will +compile with an arbitrary combination of conditions, since the +generator is unable to check it at this point. + +The conditions apply to introspection as well, i.e. introspection +shows a conditional entity only when the condition is satisfied in +this particular build. + + +Documentation comments +---------------------- + +A multi-line comment that starts and ends with a ``##`` line is a +documentation comment. + +If the documentation comment starts like :: + + ## + # @SYMBOL: + +it documents the definition of SYMBOL, else it's free-form +documentation. + +See below for more on `Definition documentation`_. + +Free-form documentation may be used to provide additional text and +structuring content. + + +Headings and subheadings +~~~~~~~~~~~~~~~~~~~~~~~~ + +A free-form documentation comment containing a line which starts with +some ``=`` symbols and then a space defines a section heading:: + + ## + # = This is a top level heading + # + # This is a free-form comment which will go under the + # top level heading. + ## + + ## + # == This is a second level heading + ## + +A heading line must be the first line of the documentation +comment block. + +Section headings must always be correctly nested, so you can only +define a third-level heading inside a second-level heading, and so on. + + +Documentation markup +~~~~~~~~~~~~~~~~~~~~ + +Documentation comments can use most rST markup. In particular, +a ``::`` literal block can be used for examples:: + + # :: + # + # Text of the example, may span + # multiple lines + +``*`` starts an itemized list:: + + # * First item, may span + # multiple lines + # * Second item + +You can also use ``-`` instead of ``*``. + +A decimal number followed by ``.`` starts a numbered list:: + + # 1. First item, may span + # multiple lines + # 2. Second item + +The actual number doesn't matter. + +Lists of either kind must be preceded and followed by a blank line. +If a list item's text spans multiple lines, then the second and +subsequent lines must be correctly indented to line up with the +first character of the first line. + +The usual ****strong****, *\*emphasized\** and ````literal```` markup +should be used. If you need a single literal ``*``, you will need to +backslash-escape it. As an extension beyond the usual rST syntax, you +can also use ``@foo`` to reference a name in the schema; this is rendered +the same way as ````foo````. + +Example:: + + ## + # Some text foo with **bold** and *emphasis* + # 1. with a list + # 2. like that + # + # And some code: + # + # :: + # + # $ echo foo + # -> do this + # <- get that + ## + + +Definition documentation +~~~~~~~~~~~~~~~~~~~~~~~~ + +Definition documentation, if present, must immediately precede the +definition it documents. + +When documentation is required (see pragma_ 'doc-required'), every +definition must have documentation. + +Definition documentation starts with a line naming the definition, +followed by an optional overview, a description of each argument (for +commands and events), member (for structs and unions), branch (for +alternates), or value (for enums), a description of each feature (if +any), and finally optional tagged sections. + +The description of an argument or feature 'name' starts with +'\@name:'. The description text can start on the line following the +'\@name:', in which case it must not be indented at all. It can also +start on the same line as the '\@name:'. In this case if it spans +multiple lines then second and subsequent lines must be indented to +line up with the first character of the first line of the +description:: + + # @argone: + # This is a two line description + # in the first style. + # + # @argtwo: This is a two line description + # in the second style. + +The number of spaces between the ':' and the text is not significant. + +.. admonition:: FIXME + + The parser accepts these things in almost any order. + +.. admonition:: FIXME + + union branches should be described, too. + +Extensions added after the definition was first released carry a +'(since x.y.z)' comment. + +The feature descriptions must be preceded by a line "Features:", like +this:: + + # Features: + # @feature: Description text + +A tagged section starts with one of the following words: +"Note:"/"Notes:", "Since:", "Example"/"Examples", "Returns:", "TODO:". +The section ends with the start of a new section. + +The text of a section can start on a new line, in +which case it must not be indented at all. It can also start +on the same line as the 'Note:', 'Returns:', etc tag. In this +case if it spans multiple lines then second and subsequent +lines must be indented to match the first, in the same way as +multiline argument descriptions. + +A 'Since: x.y.z' tagged section lists the release that introduced the +definition. + +An 'Example' or 'Examples' section is automatically rendered +entirely as literal fixed-width text. In other sections, +the text is formatted, and rST markup can be used. + +For example:: + + ## + # @BlockStats: + # + # Statistics of a virtual block device or a block backing device. + # + # @device: If the stats are for a virtual block device, the name + # corresponding to the virtual block device. + # + # @node-name: The node name of the device. (since 2.3) + # + # ... more members ... + # + # Since: 0.14.0 + ## + { 'struct': 'BlockStats', + 'data': {'*device': 'str', '*node-name': 'str', + ... more members ... } } + + ## + # @query-blockstats: + # + # Query the @BlockStats for all virtual block devices. + # + # @query-nodes: If true, the command will query all the + # block nodes ... explain, explain ... (since 2.3) + # + # Returns: A list of @BlockStats for each virtual block devices. + # + # Since: 0.14.0 + # + # Example: + # + # -> { "execute": "query-blockstats" } + # <- { + # ... lots of output ... + # } + # + ## + { 'command': 'query-blockstats', + 'data': { '*query-nodes': 'bool' }, + 'returns': ['BlockStats'] } + + +Client JSON Protocol introspection +================================== + +Clients of a Client JSON Protocol commonly need to figure out what +exactly the server (QEMU) supports. + +For this purpose, QMP provides introspection via command +query-qmp-schema. QGA currently doesn't support introspection. + +While Client JSON Protocol wire compatibility should be maintained +between qemu versions, we cannot make the same guarantees for +introspection stability. For example, one version of qemu may provide +a non-variant optional member of a struct, and a later version rework +the member to instead be non-optional and associated with a variant. +Likewise, one version of qemu may list a member with open-ended type +'str', and a later version could convert it to a finite set of strings +via an enum type; or a member may be converted from a specific type to +an alternate that represents a choice between the original type and +something else. + +query-qmp-schema returns a JSON array of SchemaInfo objects. These +objects together describe the wire ABI, as defined in the QAPI schema. +There is no specified order to the SchemaInfo objects returned; a +client must search for a particular name throughout the entire array +to learn more about that name, but is at least guaranteed that there +will be no collisions between type, command, and event names. + +However, the SchemaInfo can't reflect all the rules and restrictions +that apply to QMP. It's interface introspection (figuring out what's +there), not interface specification. The specification is in the QAPI +schema. To understand how QMP is to be used, you need to study the +QAPI schema. + +Like any other command, query-qmp-schema is itself defined in the QAPI +schema, along with the SchemaInfo type. This text attempts to give an +overview how things work. For details you need to consult the QAPI +schema. + +SchemaInfo objects have common members "name", "meta-type", +"features", and additional variant members depending on the value of +meta-type. + +Each SchemaInfo object describes a wire ABI entity of a certain +meta-type: a command, event or one of several kinds of type. + +SchemaInfo for commands and events have the same name as in the QAPI +schema. + +Command and event names are part of the wire ABI, but type names are +not. Therefore, the SchemaInfo for types have auto-generated +meaningless names. For readability, the examples in this section use +meaningful type names instead. + +Optional member "features" exposes the entity's feature strings as a +JSON array of strings. + +To examine a type, start with a command or event using it, then follow +references by name. + +QAPI schema definitions not reachable that way are omitted. + +The SchemaInfo for a command has meta-type "command", and variant +members "arg-type", "ret-type" and "allow-oob". On the wire, the +"arguments" member of a client's "execute" command must conform to the +object type named by "arg-type". The "return" member that the server +passes in a success response conforms to the type named by "ret-type". +When "allow-oob" is true, it means the command supports out-of-band +execution. It defaults to false. + +If the command takes no arguments, "arg-type" names an object type +without members. Likewise, if the command returns nothing, "ret-type" +names an object type without members. + +Example: the SchemaInfo for command query-qmp-schema :: + + { "name": "query-qmp-schema", "meta-type": "command", + "arg-type": "q_empty", "ret-type": "SchemaInfoList" } + + Type "q_empty" is an automatic object type without members, and type + "SchemaInfoList" is the array of SchemaInfo type. + +The SchemaInfo for an event has meta-type "event", and variant member +"arg-type". On the wire, a "data" member that the server passes in an +event conforms to the object type named by "arg-type". + +If the event carries no additional information, "arg-type" names an +object type without members. The event may not have a data member on +the wire then. + +Each command or event defined with 'data' as MEMBERS object in the +QAPI schema implicitly defines an object type. + +Example: the SchemaInfo for EVENT_C from section Events_ :: + + { "name": "EVENT_C", "meta-type": "event", + "arg-type": "q_obj-EVENT_C-arg" } + + Type "q_obj-EVENT_C-arg" is an implicitly defined object type with + the two members from the event's definition. + +The SchemaInfo for struct and union types has meta-type "object". + +The SchemaInfo for a struct type has variant member "members". + +The SchemaInfo for a union type additionally has variant members "tag" +and "variants". + +"members" is a JSON array describing the object's common members, if +any. Each element is a JSON object with members "name" (the member's +name), "type" (the name of its type), "features" (a JSON array of +feature strings), and "default". The latter two are optional. The +member is optional if "default" is present. Currently, "default" can +only have value null. Other values are reserved for future +extensions. The "members" array is in no particular order; clients +must search the entire object when learning whether a particular +member is supported. + +Example: the SchemaInfo for MyType from section `Struct types`_ :: + + { "name": "MyType", "meta-type": "object", + "members": [ + { "name": "member1", "type": "str" }, + { "name": "member2", "type": "int" }, + { "name": "member3", "type": "str", "default": null } ] } + +"features" exposes the command's feature strings as a JSON array of +strings. + +Example: the SchemaInfo for TestType from section Features_:: + + { "name": "TestType", "meta-type": "object", + "members": [ + { "name": "number", "type": "int" } ], + "features": ["allow-negative-numbers"] } + +"tag" is the name of the common member serving as type tag. +"variants" is a JSON array describing the object's variant members. +Each element is a JSON object with members "case" (the value of type +tag this element applies to) and "type" (the name of an object type +that provides the variant members for this type tag value). The +"variants" array is in no particular order, and is not guaranteed to +list cases in the same order as the corresponding "tag" enum type. + +Example: the SchemaInfo for union BlockdevOptions from section +`Union types`_ :: + + { "name": "BlockdevOptions", "meta-type": "object", + "members": [ + { "name": "driver", "type": "BlockdevDriver" }, + { "name": "read-only", "type": "bool", "default": null } ], + "tag": "driver", + "variants": [ + { "case": "file", "type": "BlockdevOptionsFile" }, + { "case": "qcow2", "type": "BlockdevOptionsQcow2" } ] } + +Note that base types are "flattened": its members are included in the +"members" array. + +The SchemaInfo for an alternate type has meta-type "alternate", and +variant member "members". "members" is a JSON array. Each element is +a JSON object with member "type", which names a type. Values of the +alternate type conform to exactly one of its member types. There is +no guarantee on the order in which "members" will be listed. + +Example: the SchemaInfo for BlockdevRef from section `Alternate types`_ :: + + { "name": "BlockdevRef", "meta-type": "alternate", + "members": [ + { "type": "BlockdevOptions" }, + { "type": "str" } ] } + +The SchemaInfo for an array type has meta-type "array", and variant +member "element-type", which names the array's element type. Array +types are implicitly defined. For convenience, the array's name may +resemble the element type; however, clients should examine member +"element-type" instead of making assumptions based on parsing member +"name". + +Example: the SchemaInfo for ['str'] :: + + { "name": "[str]", "meta-type": "array", + "element-type": "str" } + +The SchemaInfo for an enumeration type has meta-type "enum" and +variant member "members". + +"members" is a JSON array describing the enumeration values. Each +element is a JSON object with member "name" (the member's name), and +optionally "features" (a JSON array of feature strings). The +"members" array is in no particular order; clients must search the +entire array when learning whether a particular value is supported. + +Example: the SchemaInfo for MyEnum from section `Enumeration types`_ :: + + { "name": "MyEnum", "meta-type": "enum", + "members": [ + { "name": "value1" }, + { "name": "value2" }, + { "name": "value3" } + ] } + +The SchemaInfo for a built-in type has the same name as the type in +the QAPI schema (see section `Built-in Types`_), with one exception +detailed below. It has variant member "json-type" that shows how +values of this type are encoded on the wire. + +Example: the SchemaInfo for str :: + + { "name": "str", "meta-type": "builtin", "json-type": "string" } + +The QAPI schema supports a number of integer types that only differ in +how they map to C. They are identical as far as SchemaInfo is +concerned. Therefore, they get all mapped to a single type "int" in +SchemaInfo. + +As explained above, type names are not part of the wire ABI. Not even +the names of built-in types. Clients should examine member +"json-type" instead of hard-coding names of built-in types. + + +Compatibility considerations +============================ + +Maintaining backward compatibility at the Client JSON Protocol level +while evolving the schema requires some care. This section is about +syntactic compatibility, which is necessary, but not sufficient, for +actual compatibility. + +Clients send commands with argument data, and receive command +responses with return data and events with event data. + +Adding opt-in functionality to the send direction is backwards +compatible: adding commands, optional arguments, enumeration values, +union and alternate branches; turning an argument type into an +alternate of that type; making mandatory arguments optional. Clients +oblivious of the new functionality continue to work. + +Incompatible changes include removing commands, command arguments, +enumeration values, union and alternate branches, adding mandatory +command arguments, and making optional arguments mandatory. + +The specified behavior of an absent optional argument should remain +the same. With proper documentation, this policy still allows some +flexibility; for example, when an optional 'buffer-size' argument is +specified to default to a sensible buffer size, the actual default +value can still be changed. The specified default behavior is not the +exact size of the buffer, only that the default size is sensible. + +Adding functionality to the receive direction is generally backwards +compatible: adding events, adding return and event data members. +Clients are expected to ignore the ones they don't know. + +Removing "unreachable" stuff like events that can't be triggered +anymore, optional return or event data members that can't be sent +anymore, and return or event data member (enumeration) values that +can't be sent anymore makes no difference to clients, except for +introspection. The latter can conceivably confuse clients, so tread +carefully. + +Incompatible changes include removing return and event data members. + +Any change to a command definition's 'data' or one of the types used +there (recursively) needs to consider send direction compatibility. + +Any change to a command definition's 'return', an event definition's +'data', or one of the types used there (recursively) needs to consider +receive direction compatibility. + +Any change to types used in both contexts need to consider both. + +Enumeration type values and complex and alternate type members may be +reordered freely. For enumerations and alternate types, this doesn't +affect the wire encoding. For complex types, this might make the +implementation emit JSON object members in a different order, which +the Client JSON Protocol permits. + +Since type names are not visible in the Client JSON Protocol, types +may be freely renamed. Even certain refactorings are invisible, such +as splitting members from one type into a common base type. + + +Code generation +=============== + +The QAPI code generator qapi-gen.py generates code and documentation +from the schema. Together with the core QAPI libraries, this code +provides everything required to take JSON commands read in by a Client +JSON Protocol server, unmarshal the arguments into the underlying C +types, call into the corresponding C function, map the response back +to a Client JSON Protocol response to be returned to the user, and +introspect the commands. + +As an example, we'll use the following schema, which describes a +single complex user-defined type, along with command which takes a +list of that type as a parameter, and returns a single element of that +type. The user is responsible for writing the implementation of +qmp_my_command(); everything else is produced by the generator. :: + + $ cat example-schema.json + { 'struct': 'UserDefOne', + 'data': { 'integer': 'int', '*string': 'str' } } + + { 'command': 'my-command', + 'data': { 'arg1': ['UserDefOne'] }, + 'returns': 'UserDefOne' } + + { 'event': 'MY_EVENT' } + +We run qapi-gen.py like this:: + + $ python scripts/qapi-gen.py --output-dir="qapi-generated" \ + --prefix="example-" example-schema.json + +For a more thorough look at generated code, the testsuite includes +tests/qapi-schema/qapi-schema-tests.json that covers more examples of +what the generator will accept, and compiles the resulting C code as +part of 'make check-unit'. + + +Code generated for QAPI types +----------------------------- + +The following files are created: + + ``$(prefix)qapi-types.h`` + C types corresponding to types defined in the schema + + ``$(prefix)qapi-types.c`` + Cleanup functions for the above C types + +The $(prefix) is an optional parameter used as a namespace to keep the +generated code from one schema/code-generation separated from others so code +can be generated/used from multiple schemas without clobbering previously +created code. + +Example:: + + $ cat qapi-generated/example-qapi-types.h + [Uninteresting stuff omitted...] + + #ifndef EXAMPLE_QAPI_TYPES_H + #define EXAMPLE_QAPI_TYPES_H + + #include "qapi/qapi-builtin-types.h" + + typedef struct UserDefOne UserDefOne; + + typedef struct UserDefOneList UserDefOneList; + + typedef struct q_obj_my_command_arg q_obj_my_command_arg; + + struct UserDefOne { + int64_t integer; + bool has_string; + char *string; + }; + + void qapi_free_UserDefOne(UserDefOne *obj); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOne, qapi_free_UserDefOne) + + struct UserDefOneList { + UserDefOneList *next; + UserDefOne *value; + }; + + void qapi_free_UserDefOneList(UserDefOneList *obj); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOneList, qapi_free_UserDefOneList) + + struct q_obj_my_command_arg { + UserDefOneList *arg1; + }; + + #endif /* EXAMPLE_QAPI_TYPES_H */ + $ cat qapi-generated/example-qapi-types.c + [Uninteresting stuff omitted...] + + void qapi_free_UserDefOne(UserDefOne *obj) + { + Visitor *v; + + if (!obj) { + return; + } + + v = qapi_dealloc_visitor_new(); + visit_type_UserDefOne(v, NULL, &obj, NULL); + visit_free(v); + } + + void qapi_free_UserDefOneList(UserDefOneList *obj) + { + Visitor *v; + + if (!obj) { + return; + } + + v = qapi_dealloc_visitor_new(); + visit_type_UserDefOneList(v, NULL, &obj, NULL); + visit_free(v); + } + + [Uninteresting stuff omitted...] + +For a modular QAPI schema (see section `Include directives`_), code for +each sub-module SUBDIR/SUBMODULE.json is actually generated into :: + + SUBDIR/$(prefix)qapi-types-SUBMODULE.h + SUBDIR/$(prefix)qapi-types-SUBMODULE.c + +If qapi-gen.py is run with option --builtins, additional files are +created: + + ``qapi-builtin-types.h`` + C types corresponding to built-in types + + ``qapi-builtin-types.c`` + Cleanup functions for the above C types + + +Code generated for visiting QAPI types +-------------------------------------- + +These are the visitor functions used to walk through and convert +between a native QAPI C data structure and some other format (such as +QObject); the generated functions are named visit_type_FOO() and +visit_type_FOO_members(). + +The following files are generated: + + ``$(prefix)qapi-visit.c`` + Visitor function for a particular C type, used to automagically + convert QObjects into the corresponding C type and vice-versa, as + well as for deallocating memory for an existing C type + + ``$(prefix)qapi-visit.h`` + Declarations for previously mentioned visitor functions + +Example:: + + $ cat qapi-generated/example-qapi-visit.h + [Uninteresting stuff omitted...] + + #ifndef EXAMPLE_QAPI_VISIT_H + #define EXAMPLE_QAPI_VISIT_H + + #include "qapi/qapi-builtin-visit.h" + #include "example-qapi-types.h" + + + bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp); + + bool visit_type_UserDefOne(Visitor *v, const char *name, + UserDefOne **obj, Error **errp); + + bool visit_type_UserDefOneList(Visitor *v, const char *name, + UserDefOneList **obj, Error **errp); + + bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp); + + #endif /* EXAMPLE_QAPI_VISIT_H */ + $ cat qapi-generated/example-qapi-visit.c + [Uninteresting stuff omitted...] + + bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp) + { + if (!visit_type_int(v, "integer", &obj->integer, errp)) { + return false; + } + if (visit_optional(v, "string", &obj->has_string)) { + if (!visit_type_str(v, "string", &obj->string, errp)) { + return false; + } + } + return true; + } + + bool visit_type_UserDefOne(Visitor *v, const char *name, + UserDefOne **obj, Error **errp) + { + bool ok = false; + + if (!visit_start_struct(v, name, (void **)obj, sizeof(UserDefOne), errp)) { + return false; + } + if (!*obj) { + /* incomplete */ + assert(visit_is_dealloc(v)); + ok = true; + goto out_obj; + } + if (!visit_type_UserDefOne_members(v, *obj, errp)) { + goto out_obj; + } + ok = visit_check_struct(v, errp); + out_obj: + visit_end_struct(v, (void **)obj); + if (!ok && visit_is_input(v)) { + qapi_free_UserDefOne(*obj); + *obj = NULL; + } + return ok; + } + + bool visit_type_UserDefOneList(Visitor *v, const char *name, + UserDefOneList **obj, Error **errp) + { + bool ok = false; + UserDefOneList *tail; + size_t size = sizeof(**obj); + + if (!visit_start_list(v, name, (GenericList **)obj, size, errp)) { + return false; + } + + for (tail = *obj; tail; + tail = (UserDefOneList *)visit_next_list(v, (GenericList *)tail, size)) { + if (!visit_type_UserDefOne(v, NULL, &tail->value, errp)) { + goto out_obj; + } + } + + ok = visit_check_list(v, errp); + out_obj: + visit_end_list(v, (void **)obj); + if (!ok && visit_is_input(v)) { + qapi_free_UserDefOneList(*obj); + *obj = NULL; + } + return ok; + } + + bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp) + { + if (!visit_type_UserDefOneList(v, "arg1", &obj->arg1, errp)) { + return false; + } + return true; + } + + [Uninteresting stuff omitted...] + +For a modular QAPI schema (see section `Include directives`_), code for +each sub-module SUBDIR/SUBMODULE.json is actually generated into :: + + SUBDIR/$(prefix)qapi-visit-SUBMODULE.h + SUBDIR/$(prefix)qapi-visit-SUBMODULE.c + +If qapi-gen.py is run with option --builtins, additional files are +created: + + ``qapi-builtin-visit.h`` + Visitor functions for built-in types + + ``qapi-builtin-visit.c`` + Declarations for these visitor functions + + +Code generated for commands +--------------------------- + +These are the marshaling/dispatch functions for the commands defined +in the schema. The generated code provides qmp_marshal_COMMAND(), and +declares qmp_COMMAND() that the user must implement. + +The following files are generated: + + ``$(prefix)qapi-commands.c`` + Command marshal/dispatch functions for each QMP command defined in + the schema + + ``$(prefix)qapi-commands.h`` + Function prototypes for the QMP commands specified in the schema + + ``$(prefix)qapi-init-commands.h`` + Command initialization prototype + + ``$(prefix)qapi-init-commands.c`` + Command initialization code + +Example:: + + $ cat qapi-generated/example-qapi-commands.h + [Uninteresting stuff omitted...] + + #ifndef EXAMPLE_QAPI_COMMANDS_H + #define EXAMPLE_QAPI_COMMANDS_H + + #include "example-qapi-types.h" + + UserDefOne *qmp_my_command(UserDefOneList *arg1, Error **errp); + void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp); + + #endif /* EXAMPLE_QAPI_COMMANDS_H */ + $ cat qapi-generated/example-qapi-commands.c + [Uninteresting stuff omitted...] + + + static void qmp_marshal_output_UserDefOne(UserDefOne *ret_in, + QObject **ret_out, Error **errp) + { + Visitor *v; + + v = qobject_output_visitor_new_qmp(ret_out); + if (visit_type_UserDefOne(v, "unused", &ret_in, errp)) { + visit_complete(v, ret_out); + } + visit_free(v); + v = qapi_dealloc_visitor_new(); + visit_type_UserDefOne(v, "unused", &ret_in, NULL); + visit_free(v); + } + + void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp) + { + Error *err = NULL; + bool ok = false; + Visitor *v; + UserDefOne *retval; + q_obj_my_command_arg arg = {0}; + + v = qobject_input_visitor_new_qmp(QOBJECT(args)); + if (!visit_start_struct(v, NULL, NULL, 0, errp)) { + goto out; + } + if (visit_type_q_obj_my_command_arg_members(v, &arg, errp)) { + ok = visit_check_struct(v, errp); + } + visit_end_struct(v, NULL); + if (!ok) { + goto out; + } + + retval = qmp_my_command(arg.arg1, &err); + error_propagate(errp, err); + if (err) { + goto out; + } + + qmp_marshal_output_UserDefOne(retval, ret, errp); + + out: + visit_free(v); + v = qapi_dealloc_visitor_new(); + visit_start_struct(v, NULL, NULL, 0, NULL); + visit_type_q_obj_my_command_arg_members(v, &arg, NULL); + visit_end_struct(v, NULL); + visit_free(v); + } + + [Uninteresting stuff omitted...] + $ cat qapi-generated/example-qapi-init-commands.h + [Uninteresting stuff omitted...] + #ifndef EXAMPLE_QAPI_INIT_COMMANDS_H + #define EXAMPLE_QAPI_INIT_COMMANDS_H + + #include "qapi/qmp/dispatch.h" + + void example_qmp_init_marshal(QmpCommandList *cmds); + + #endif /* EXAMPLE_QAPI_INIT_COMMANDS_H */ + $ cat qapi-generated/example-qapi-init-commands.c + [Uninteresting stuff omitted...] + void example_qmp_init_marshal(QmpCommandList *cmds) + { + QTAILQ_INIT(cmds); + + qmp_register_command(cmds, "my-command", + qmp_marshal_my_command, QCO_NO_OPTIONS); + } + [Uninteresting stuff omitted...] + +For a modular QAPI schema (see section `Include directives`_), code for +each sub-module SUBDIR/SUBMODULE.json is actually generated into:: + + SUBDIR/$(prefix)qapi-commands-SUBMODULE.h + SUBDIR/$(prefix)qapi-commands-SUBMODULE.c + + +Code generated for events +------------------------- + +This is the code related to events defined in the schema, providing +qapi_event_send_EVENT(). + +The following files are created: + + ``$(prefix)qapi-events.h`` + Function prototypes for each event type + + ``$(prefix)qapi-events.c`` + Implementation of functions to send an event + + ``$(prefix)qapi-emit-events.h`` + Enumeration of all event names, and common event code declarations + + ``$(prefix)qapi-emit-events.c`` + Common event code definitions + +Example:: + + $ cat qapi-generated/example-qapi-events.h + [Uninteresting stuff omitted...] + + #ifndef EXAMPLE_QAPI_EVENTS_H + #define EXAMPLE_QAPI_EVENTS_H + + #include "qapi/util.h" + #include "example-qapi-types.h" + + void qapi_event_send_my_event(void); + + #endif /* EXAMPLE_QAPI_EVENTS_H */ + $ cat qapi-generated/example-qapi-events.c + [Uninteresting stuff omitted...] + + void qapi_event_send_my_event(void) + { + QDict *qmp; + + qmp = qmp_event_build_dict("MY_EVENT"); + + example_qapi_event_emit(EXAMPLE_QAPI_EVENT_MY_EVENT, qmp); + + qobject_unref(qmp); + } + + [Uninteresting stuff omitted...] + $ cat qapi-generated/example-qapi-emit-events.h + [Uninteresting stuff omitted...] + + #ifndef EXAMPLE_QAPI_EMIT_EVENTS_H + #define EXAMPLE_QAPI_EMIT_EVENTS_H + + #include "qapi/util.h" + + typedef enum example_QAPIEvent { + EXAMPLE_QAPI_EVENT_MY_EVENT, + EXAMPLE_QAPI_EVENT__MAX, + } example_QAPIEvent; + + #define example_QAPIEvent_str(val) \ + qapi_enum_lookup(&example_QAPIEvent_lookup, (val)) + + extern const QEnumLookup example_QAPIEvent_lookup; + + void example_qapi_event_emit(example_QAPIEvent event, QDict *qdict); + + #endif /* EXAMPLE_QAPI_EMIT_EVENTS_H */ + $ cat qapi-generated/example-qapi-emit-events.c + [Uninteresting stuff omitted...] + + const QEnumLookup example_QAPIEvent_lookup = { + .array = (const char *const[]) { + [EXAMPLE_QAPI_EVENT_MY_EVENT] = "MY_EVENT", + }, + .size = EXAMPLE_QAPI_EVENT__MAX + }; + + [Uninteresting stuff omitted...] + +For a modular QAPI schema (see section `Include directives`_), code for +each sub-module SUBDIR/SUBMODULE.json is actually generated into :: + + SUBDIR/$(prefix)qapi-events-SUBMODULE.h + SUBDIR/$(prefix)qapi-events-SUBMODULE.c + + +Code generated for introspection +-------------------------------- + +The following files are created: + + ``$(prefix)qapi-introspect.c`` + Defines a string holding a JSON description of the schema + + ``$(prefix)qapi-introspect.h`` + Declares the above string + +Example:: + + $ cat qapi-generated/example-qapi-introspect.h + [Uninteresting stuff omitted...] + + #ifndef EXAMPLE_QAPI_INTROSPECT_H + #define EXAMPLE_QAPI_INTROSPECT_H + + #include "qapi/qmp/qlit.h" + + extern const QLitObject example_qmp_schema_qlit; + + #endif /* EXAMPLE_QAPI_INTROSPECT_H */ + $ cat qapi-generated/example-qapi-introspect.c + [Uninteresting stuff omitted...] + + const QLitObject example_qmp_schema_qlit = QLIT_QLIST(((QLitObject[]) { + QLIT_QDICT(((QLitDictEntry[]) { + { "arg-type", QLIT_QSTR("0"), }, + { "meta-type", QLIT_QSTR("command"), }, + { "name", QLIT_QSTR("my-command"), }, + { "ret-type", QLIT_QSTR("1"), }, + {} + })), + QLIT_QDICT(((QLitDictEntry[]) { + { "arg-type", QLIT_QSTR("2"), }, + { "meta-type", QLIT_QSTR("event"), }, + { "name", QLIT_QSTR("MY_EVENT"), }, + {} + })), + /* "0" = q_obj_my-command-arg */ + QLIT_QDICT(((QLitDictEntry[]) { + { "members", QLIT_QLIST(((QLitObject[]) { + QLIT_QDICT(((QLitDictEntry[]) { + { "name", QLIT_QSTR("arg1"), }, + { "type", QLIT_QSTR("[1]"), }, + {} + })), + {} + })), }, + { "meta-type", QLIT_QSTR("object"), }, + { "name", QLIT_QSTR("0"), }, + {} + })), + /* "1" = UserDefOne */ + QLIT_QDICT(((QLitDictEntry[]) { + { "members", QLIT_QLIST(((QLitObject[]) { + QLIT_QDICT(((QLitDictEntry[]) { + { "name", QLIT_QSTR("integer"), }, + { "type", QLIT_QSTR("int"), }, + {} + })), + QLIT_QDICT(((QLitDictEntry[]) { + { "default", QLIT_QNULL, }, + { "name", QLIT_QSTR("string"), }, + { "type", QLIT_QSTR("str"), }, + {} + })), + {} + })), }, + { "meta-type", QLIT_QSTR("object"), }, + { "name", QLIT_QSTR("1"), }, + {} + })), + /* "2" = q_empty */ + QLIT_QDICT(((QLitDictEntry[]) { + { "members", QLIT_QLIST(((QLitObject[]) { + {} + })), }, + { "meta-type", QLIT_QSTR("object"), }, + { "name", QLIT_QSTR("2"), }, + {} + })), + QLIT_QDICT(((QLitDictEntry[]) { + { "element-type", QLIT_QSTR("1"), }, + { "meta-type", QLIT_QSTR("array"), }, + { "name", QLIT_QSTR("[1]"), }, + {} + })), + QLIT_QDICT(((QLitDictEntry[]) { + { "json-type", QLIT_QSTR("int"), }, + { "meta-type", QLIT_QSTR("builtin"), }, + { "name", QLIT_QSTR("int"), }, + {} + })), + QLIT_QDICT(((QLitDictEntry[]) { + { "json-type", QLIT_QSTR("string"), }, + { "meta-type", QLIT_QSTR("builtin"), }, + { "name", QLIT_QSTR("str"), }, + {} + })), + {} + })); + + [Uninteresting stuff omitted...] diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt deleted file mode 100644 index c1cb6f987de..00000000000 --- a/docs/devel/qapi-code-gen.txt +++ /dev/null @@ -1,1897 +0,0 @@ -= How to use the QAPI code generator = - -Copyright IBM Corp. 2011 -Copyright (C) 2012-2016 Red Hat, Inc. - -This work is licensed under the terms of the GNU GPL, version 2 or -later. See the COPYING file in the top-level directory. - -== Introduction == - -QAPI is a native C API within QEMU which provides management-level -functionality to internal and external users. For external -users/processes, this interface is made available by a JSON-based wire -format for the QEMU Monitor Protocol (QMP) for controlling qemu, as -well as the QEMU Guest Agent (QGA) for communicating with the guest. -The remainder of this document uses "Client JSON Protocol" when -referring to the wire contents of a QMP or QGA connection. - -To map between Client JSON Protocol interfaces and the native C API, -we generate C code from a QAPI schema. This document describes the -QAPI schema language, and how it gets mapped to the Client JSON -Protocol and to C. It additionally provides guidance on maintaining -Client JSON Protocol compatibility. - - -== The QAPI schema language == - -The QAPI schema defines the Client JSON Protocol's commands and -events, as well as types used by them. Forward references are -allowed. - -It is permissible for the schema to contain additional types not used -by any commands or events, for the side effect of generated C code -used internally. - -There are several kinds of types: simple types (a number of built-in -types, such as 'int' and 'str'; as well as enumerations), arrays, -complex types (structs and two flavors of unions), and alternate types -(a choice between other types). - - -=== Schema syntax === - -Syntax is loosely based on JSON (http://www.ietf.org/rfc/rfc8259.txt). -Differences: - -* Comments: start with a hash character (#) that is not part of a - string, and extend to the end of the line. - -* Strings are enclosed in 'single quotes', not "double quotes". - -* Strings are restricted to printable ASCII, and escape sequences to - just '\\'. - -* Numbers and null are not supported. - -A second layer of syntax defines the sequences of JSON texts that are -a correctly structured QAPI schema. We provide a grammar for this -syntax in an EBNF-like notation: - -* Production rules look like non-terminal = expression -* Concatenation: expression A B matches expression A, then B -* Alternation: expression A | B matches expression A or B -* Repetition: expression A... matches zero or more occurrences of - expression A -* Repetition: expression A, ... matches zero or more occurrences of - expression A separated by , -* Grouping: expression ( A ) matches expression A -* JSON's structural characters are terminals: { } [ ] : , -* JSON's literal names are terminals: false true -* String literals enclosed in 'single quotes' are terminal, and match - this JSON string, with a leading '*' stripped off -* When JSON object member's name starts with '*', the member is - optional. -* The symbol STRING is a terminal, and matches any JSON string -* The symbol BOOL is a terminal, and matches JSON false or true -* ALL-CAPS words other than STRING are non-terminals - -The order of members within JSON objects does not matter unless -explicitly noted. - -A QAPI schema consists of a series of top-level expressions: - - SCHEMA = TOP-LEVEL-EXPR... - -The top-level expressions are all JSON objects. Code and -documentation is generated in schema definition order. Code order -should not matter. - -A top-level expressions is either a directive or a definition: - - TOP-LEVEL-EXPR = DIRECTIVE | DEFINITION - -There are two kinds of directives and six kinds of definitions: - - DIRECTIVE = INCLUDE | PRAGMA - DEFINITION = ENUM | STRUCT | UNION | ALTERNATE | COMMAND | EVENT - -These are discussed in detail below. - - -=== Built-in Types === - -The following types are predefined, and map to C as follows: - - Schema C JSON - str char * any JSON string, UTF-8 - number double any JSON number - int int64_t a JSON number without fractional part - that fits into the C integer type - int8 int8_t likewise - int16 int16_t likewise - int32 int32_t likewise - int64 int64_t likewise - uint8 uint8_t likewise - uint16 uint16_t likewise - uint32 uint32_t likewise - uint64 uint64_t likewise - size uint64_t like uint64_t, except StringInputVisitor - accepts size suffixes - bool bool JSON true or false - null QNull * JSON null - any QObject * any JSON value - QType QType JSON string matching enum QType values - - -=== Include directives === - -Syntax: - INCLUDE = { 'include': STRING } - -The QAPI schema definitions can be modularized using the 'include' directive: - - { 'include': 'path/to/file.json' } - -The directive is evaluated recursively, and include paths are relative -to the file using the directive. Multiple includes of the same file -are idempotent. - -As a matter of style, it is a good idea to have all files be -self-contained, but at the moment, nothing prevents an included file -from making a forward reference to a type that is only introduced by -an outer file. The parser may be made stricter in the future to -prevent incomplete include files. - - -=== Pragma directives === - -Syntax: - PRAGMA = { 'pragma': { - '*doc-required': BOOL, - '*command-name-exceptions': [ STRING, ... ], - '*command-returns-exceptions': [ STRING, ... ], - '*member-name-exceptions': [ STRING, ... ] } } - -The pragma directive lets you control optional generator behavior. - -Pragma's scope is currently the complete schema. Setting the same -pragma to different values in parts of the schema doesn't work. - -Pragma 'doc-required' takes a boolean value. If true, documentation -is required. Default is false. - -Pragma 'command-name-exceptions' takes a list of commands whose names -may contain '_' instead of '-'. Default is none. - -Pragma 'command-returns-exceptions' takes a list of commands that may -violate the rules on permitted return types. Default is none. - -Pragma 'member-name-exceptions' takes a list of types whose member -names may contain uppercase letters, and '_' instead of '-'. Default -is none. - - -=== Enumeration types === - -Syntax: - ENUM = { 'enum': STRING, - 'data': [ ENUM-VALUE, ... ], - '*prefix': STRING, - '*if': COND, - '*features': FEATURES } - ENUM-VALUE = STRING - | { 'name': STRING, '*if': COND } - -Member 'enum' names the enum type. - -Each member of the 'data' array defines a value of the enumeration -type. The form STRING is shorthand for { 'name': STRING }. The -'name' values must be be distinct. - -Example: - - { 'enum': 'MyEnum', 'data': [ 'value1', 'value2', 'value3' ] } - -Nothing prevents an empty enumeration, although it is probably not -useful. - -On the wire, an enumeration type's value is represented by its -(string) name. In C, it's represented by an enumeration constant. -These are of the form PREFIX_NAME, where PREFIX is derived from the -enumeration type's name, and NAME from the value's name. For the -example above, the generator maps 'MyEnum' to MY_ENUM and 'value1' to -VALUE1, resulting in the enumeration constant MY_ENUM_VALUE1. The -optional 'prefix' member overrides PREFIX. - -The generated C enumeration constants have values 0, 1, ..., N-1 (in -QAPI schema order), where N is the number of values. There is an -additional enumeration constant PREFIX__MAX with value N. - -Do not use string or an integer type when an enumeration type can do -the job satisfactorily. - -The optional 'if' member specifies a conditional. See "Configuring -the schema" below for more on this. - -The optional 'features' member specifies features. See "Features" -below for more on this. - - -=== Type references and array types === - -Syntax: - TYPE-REF = STRING | ARRAY-TYPE - ARRAY-TYPE = [ STRING ] - -A string denotes the type named by the string. - -A one-element array containing a string denotes an array of the type -named by the string. Example: ['int'] denotes an array of 'int'. - - -=== Struct types === - -Syntax: - STRUCT = { 'struct': STRING, - 'data': MEMBERS, - '*base': STRING, - '*if': COND, - '*features': FEATURES } - MEMBERS = { MEMBER, ... } - MEMBER = STRING : TYPE-REF - | STRING : { 'type': TYPE-REF, - '*if': COND, - '*features': FEATURES } - -Member 'struct' names the struct type. - -Each MEMBER of the 'data' object defines a member of the struct type. - -The MEMBER's STRING name consists of an optional '*' prefix and the -struct member name. If '*' is present, the member is optional. - -The MEMBER's value defines its properties, in particular its type. -The form TYPE-REF is shorthand for { 'type': TYPE-REF }. - -Example: - - { 'struct': 'MyType', - 'data': { 'member1': 'str', 'member2': ['int'], '*member3': 'str' } } - -A struct type corresponds to a struct in C, and an object in JSON. -The C struct's members are generated in QAPI schema order. - -The optional 'base' member names a struct type whose members are to be -included in this type. They go first in the C struct. - -Example: - - { 'struct': 'BlockdevOptionsGenericFormat', - 'data': { 'file': 'str' } } - { 'struct': 'BlockdevOptionsGenericCOWFormat', - 'base': 'BlockdevOptionsGenericFormat', - 'data': { '*backing': 'str' } } - -An example BlockdevOptionsGenericCOWFormat object on the wire could use -both members like this: - - { "file": "/some/place/my-image", - "backing": "/some/place/my-backing-file" } - -The optional 'if' member specifies a conditional. See "Configuring -the schema" below for more on this. - -The optional 'features' member specifies features. See "Features" -below for more on this. - - -=== Union types === - -Syntax: - UNION = { 'union': STRING, - 'data': BRANCHES, - '*if': COND, - '*features': FEATURES } - | { 'union': STRING, - 'data': BRANCHES, - 'base': ( MEMBERS | STRING ), - 'discriminator': STRING, - '*if': COND, - '*features': FEATURES } - BRANCHES = { BRANCH, ... } - BRANCH = STRING : TYPE-REF - | STRING : { 'type': TYPE-REF, '*if': COND } - -Member 'union' names the union type. - -There are two flavors of union types: simple (no discriminator or -base), and flat (both discriminator and base). - -Each BRANCH of the 'data' object defines a branch of the union. A -union must have at least one branch. - -The BRANCH's STRING name is the branch name. - -The BRANCH's value defines the branch's properties, in particular its -type. The form TYPE-REF is shorthand for { 'type': TYPE-REF }. - -A simple union type defines a mapping from automatic discriminator -values to data types like in this example: - - { 'struct': 'BlockdevOptionsFile', 'data': { 'filename': 'str' } } - { 'struct': 'BlockdevOptionsQcow2', - 'data': { 'backing': 'str', '*lazy-refcounts': 'bool' } } - - { 'union': 'BlockdevOptionsSimple', - 'data': { 'file': 'BlockdevOptionsFile', - 'qcow2': 'BlockdevOptionsQcow2' } } - -In the Client JSON Protocol, a simple union is represented by an -object that contains the 'type' member as a discriminator, and a -'data' member that is of the specified data type corresponding to the -discriminator value, as in these examples: - - { "type": "file", "data": { "filename": "/some/place/my-image" } } - { "type": "qcow2", "data": { "backing": "/some/place/my-image", - "lazy-refcounts": true } } - -The generated C code uses a struct containing a union. Additionally, -an implicit C enum 'NameKind' is created, corresponding to the union -'Name', for accessing the various branches of the union. The value -for each branch can be of any type. - -Flat unions permit arbitrary common members that occur in all variants -of the union, not just a discriminator. Their discriminators need not -be named 'type'. They also avoid nesting on the wire. - -The 'base' member defines the common members. If it is a MEMBERS -object, it defines common members just like a struct type's 'data' -member defines struct type members. If it is a STRING, it names a -struct type whose members are the common members. - -All flat union branches must be of struct type. - -In the Client JSON Protocol, a flat union is represented by an object -with the common members (from the base type) and the selected branch's -members. The two sets of member names must be disjoint. Member -'discriminator' must name a non-optional enum-typed member of the base -struct. - -The following example enhances the above simple union example by -adding an optional common member 'read-only', renaming the -discriminator to something more applicable than the simple union's -default of 'type', and reducing the number of {} required on the wire: - - { 'enum': 'BlockdevDriver', 'data': [ 'file', 'qcow2' ] } - { 'union': 'BlockdevOptions', - 'base': { 'driver': 'BlockdevDriver', '*read-only': 'bool' }, - 'discriminator': 'driver', - 'data': { 'file': 'BlockdevOptionsFile', - 'qcow2': 'BlockdevOptionsQcow2' } } - -Resulting in these JSON objects: - - { "driver": "file", "read-only": true, - "filename": "/some/place/my-image" } - { "driver": "qcow2", "read-only": false, - "backing": "/some/place/my-image", "lazy-refcounts": true } - -Notice that in a flat union, the discriminator name is controlled by -the user, but because it must map to a base member with enum type, the -code generator ensures that branches match the existing values of the -enum. The order of branches need not match the order of the enum -values. The branches need not cover all possible enum values. -Omitted enum values are still valid branches that add no additional -members to the data type. In the resulting generated C data types, a -flat union is represented as a struct with the base members in QAPI -schema order, and then a union of structures for each branch of the -struct. - -A simple union can always be re-written as a flat union where the base -class has a single member named 'type', and where each branch of the -union has a struct with a single member named 'data'. That is, - - { 'union': 'Simple', 'data': { 'one': 'str', 'two': 'int' } } - -is identical on the wire to: - - { 'enum': 'Enum', 'data': ['one', 'two'] } - { 'struct': 'Branch1', 'data': { 'data': 'str' } } - { 'struct': 'Branch2', 'data': { 'data': 'int' } } - { 'union': 'Flat', 'base': { 'type': 'Enum' }, 'discriminator': 'type', - 'data': { 'one': 'Branch1', 'two': 'Branch2' } } - -The optional 'if' member specifies a conditional. See "Configuring -the schema" below for more on this. - -The optional 'features' member specifies features. See "Features" -below for more on this. - - -=== Alternate types === - -Syntax: - ALTERNATE = { 'alternate': STRING, - 'data': ALTERNATIVES, - '*if': COND, - '*features': FEATURES } - ALTERNATIVES = { ALTERNATIVE, ... } - ALTERNATIVE = STRING : STRING - | STRING : { 'type': STRING, '*if': COND } - -Member 'alternate' names the alternate type. - -Each ALTERNATIVE of the 'data' object defines a branch of the -alternate. An alternate must have at least one branch. - -The ALTERNATIVE's STRING name is the branch name. - -The ALTERNATIVE's value defines the branch's properties, in particular -its type. The form STRING is shorthand for { 'type': STRING }. - -Example: - - { 'alternate': 'BlockdevRef', - 'data': { 'definition': 'BlockdevOptions', - 'reference': 'str' } } - -An alternate type is like a union type, except there is no -discriminator on the wire. Instead, the branch to use is inferred -from the value. An alternate can only express a choice between types -represented differently on the wire. - -If a branch is typed as the 'bool' built-in, the alternate accepts -true and false; if it is typed as any of the various numeric -built-ins, it accepts a JSON number; if it is typed as a 'str' -built-in or named enum type, it accepts a JSON string; if it is typed -as the 'null' built-in, it accepts JSON null; and if it is typed as a -complex type (struct or union), it accepts a JSON object. - -The example alternate declaration above allows using both of the -following example objects: - - { "file": "my_existing_block_device_id" } - { "file": { "driver": "file", - "read-only": false, - "filename": "/tmp/mydisk.qcow2" } } - -The optional 'if' member specifies a conditional. See "Configuring -the schema" below for more on this. - -The optional 'features' member specifies features. See "Features" -below for more on this. - - -=== Commands === - -Syntax: - COMMAND = { 'command': STRING, - ( - '*data': ( MEMBERS | STRING ), - | - 'data': STRING, - 'boxed': true, - ) - '*returns': TYPE-REF, - '*success-response': false, - '*gen': false, - '*allow-oob': true, - '*allow-preconfig': true, - '*coroutine': true, - '*if': COND, - '*features': FEATURES } - -Member 'command' names the command. - -Member 'data' defines the arguments. It defaults to an empty MEMBERS -object. - -If 'data' is a MEMBERS object, then MEMBERS defines arguments just -like a struct type's 'data' defines struct type members. - -If 'data' is a STRING, then STRING names a complex type whose members -are the arguments. A union type requires 'boxed': true. - -Member 'returns' defines the command's return type. It defaults to an -empty struct type. It must normally be a complex type or an array of -a complex type. To return anything else, the command must be listed -in pragma 'commands-returns-exceptions'. If you do this, extending -the command to return additional information will be harder. Use of -the pragma for new commands is strongly discouraged. - -A command's error responses are not specified in the QAPI schema. -Error conditions should be documented in comments. - -In the Client JSON Protocol, the value of the "execute" or "exec-oob" -member is the command name. The value of the "arguments" member then -has to conform to the arguments, and the value of the success -response's "return" member will conform to the return type. - -Some example commands: - - { 'command': 'my-first-command', - 'data': { 'arg1': 'str', '*arg2': 'str' } } - { 'struct': 'MyType', 'data': { '*value': 'str' } } - { 'command': 'my-second-command', - 'returns': [ 'MyType' ] } - -which would validate this Client JSON Protocol transaction: - - => { "execute": "my-first-command", - "arguments": { "arg1": "hello" } } - <= { "return": { } } - => { "execute": "my-second-command" } - <= { "return": [ { "value": "one" }, { } ] } - -The generator emits a prototype for the C function implementing the -command. The function itself needs to be written by hand. See -section "Code generated for commands" for examples. - -The function returns the return type. When member 'boxed' is absent, -it takes the command arguments as arguments one by one, in QAPI schema -order. Else it takes them wrapped in the C struct generated for the -complex argument type. It takes an additional Error ** argument in -either case. - -The generator also emits a marshalling function that extracts -arguments for the user's function out of an input QDict, calls the -user's function, and if it succeeded, builds an output QObject from -its return value. This is for use by the QMP monitor core. - -In rare cases, QAPI cannot express a type-safe representation of a -corresponding Client JSON Protocol command. You then have to suppress -generation of a marshalling function by including a member 'gen' with -boolean value false, and instead write your own function. For -example: - - { 'command': 'netdev_add', - 'data': {'type': 'str', 'id': 'str'}, - 'gen': false } - -Please try to avoid adding new commands that rely on this, and instead -use type-safe unions. - -Normally, the QAPI schema is used to describe synchronous exchanges, -where a response is expected. But in some cases, the action of a -command is expected to change state in a way that a successful -response is not possible (although the command will still return an -error object on failure). When a successful reply is not possible, -the command definition includes the optional member 'success-response' -with boolean value false. So far, only QGA makes use of this member. - -Member 'allow-oob' declares whether the command supports out-of-band -(OOB) execution. It defaults to false. For example: - - { 'command': 'migrate_recover', - 'data': { 'uri': 'str' }, 'allow-oob': true } - -See qmp-spec.txt for out-of-band execution syntax and semantics. - -Commands supporting out-of-band execution can still be executed -in-band. - -When a command is executed in-band, its handler runs in the main -thread with the BQL held. - -When a command is executed out-of-band, its handler runs in a -dedicated monitor I/O thread with the BQL *not* held. - -An OOB-capable command handler must satisfy the following conditions: - -- It terminates quickly. -- It does not invoke system calls that may block. -- It does not access guest RAM that may block when userfaultfd is - enabled for postcopy live migration. -- It takes only "fast" locks, i.e. all critical sections protected by - any lock it takes also satisfy the conditions for OOB command - handler code. - -The restrictions on locking limit access to shared state. Such access -requires synchronization, but OOB commands can't take the BQL or any -other "slow" lock. - -When in doubt, do not implement OOB execution support. - -Member 'allow-preconfig' declares whether the command is available -before the machine is built. It defaults to false. For example: - - { 'enum': 'QMPCapability', - 'data': [ 'oob' ] } - { 'command': 'qmp_capabilities', - 'data': { '*enable': [ 'QMPCapability' ] }, - 'allow-preconfig': true } - -QMP is available before the machine is built only when QEMU was -started with --preconfig. - -Member 'coroutine' tells the QMP dispatcher whether the command handler -is safe to be run in a coroutine. It defaults to false. If it is true, -the command handler is called from coroutine context and may yield while -waiting for an external event (such as I/O completion) in order to avoid -blocking the guest and other background operations. - -Coroutine safety can be hard to prove, similar to thread safety. Common -pitfalls are: - -- The global mutex isn't held across qemu_coroutine_yield(), so - operations that used to assume that they execute atomically may have - to be more careful to protect against changes in the global state. - -- Nested event loops (AIO_WAIT_WHILE() etc.) are problematic in - coroutine context and can easily lead to deadlocks. They should be - replaced by yielding and reentering the coroutine when the condition - becomes false. - -Since the command handler may assume coroutine context, any callers -other than the QMP dispatcher must also call it in coroutine context. -In particular, HMP commands calling such a QMP command handler must be -marked .coroutine = true in hmp-commands.hx. - -It is an error to specify both 'coroutine': true and 'allow-oob': true -for a command. We don't currently have a use case for both together and -without a use case, it's not entirely clear what the semantics should -be. - -The optional 'if' member specifies a conditional. See "Configuring -the schema" below for more on this. - -The optional 'features' member specifies features. See "Features" -below for more on this. - - -=== Events === - -Syntax: - EVENT = { 'event': STRING, - ( - '*data': ( MEMBERS | STRING ), - | - 'data': STRING, - 'boxed': true, - ) - '*if': COND, - '*features': FEATURES } - -Member 'event' names the event. This is the event name used in the -Client JSON Protocol. - -Member 'data' defines the event-specific data. It defaults to an -empty MEMBERS object. - -If 'data' is a MEMBERS object, then MEMBERS defines event-specific -data just like a struct type's 'data' defines struct type members. - -If 'data' is a STRING, then STRING names a complex type whose members -are the event-specific data. A union type requires 'boxed': true. - -An example event is: - -{ 'event': 'EVENT_C', - 'data': { '*a': 'int', 'b': 'str' } } - -Resulting in this JSON object: - -{ "event": "EVENT_C", - "data": { "b": "test string" }, - "timestamp": { "seconds": 1267020223, "microseconds": 435656 } } - -The generator emits a function to send the event. When member 'boxed' -is absent, it takes event-specific data one by one, in QAPI schema -order. Else it takes them wrapped in the C struct generated for the -complex type. See section "Code generated for events" for examples. - -The optional 'if' member specifies a conditional. See "Configuring -the schema" below for more on this. - -The optional 'features' member specifies features. See "Features" -below for more on this. - - -=== Features === - -Syntax: - FEATURES = [ FEATURE, ... ] - FEATURE = STRING - | { 'name': STRING, '*if': COND } - -Sometimes, the behaviour of QEMU changes compatibly, but without a -change in the QMP syntax (usually by allowing values or operations -that previously resulted in an error). QMP clients may still need to -know whether the extension is available. - -For this purpose, a list of features can be specified for a command or -struct type. Each list member can either be { 'name': STRING, '*if': -COND }, or STRING, which is shorthand for { 'name': STRING }. - -The optional 'if' member specifies a conditional. See "Configuring -the schema" below for more on this. - -Example: - -{ 'struct': 'TestType', - 'data': { 'number': 'int' }, - 'features': [ 'allow-negative-numbers' ] } - -The feature strings are exposed to clients in introspection, as -explained in section "Client JSON Protocol introspection". - -Intended use is to have each feature string signal that this build of -QEMU shows a certain behaviour. - - -==== Special features ==== - -Feature "deprecated" marks a command, event, or struct member as -deprecated. It is not supported elsewhere so far. - - -=== Naming rules and reserved names === - -All names must begin with a letter, and contain only ASCII letters, -digits, hyphen, and underscore. There are two exceptions: enum values -may start with a digit, and names that are downstream extensions (see -section Downstream extensions) start with underscore. - -Names beginning with 'q_' are reserved for the generator, which uses -them for munging QMP names that resemble C keywords or other -problematic strings. For example, a member named "default" in qapi -becomes "q_default" in the generated C code. - -Types, commands, and events share a common namespace. Therefore, -generally speaking, type definitions should always use CamelCase for -user-defined type names, while built-in types are lowercase. - -Type names ending with 'Kind' or 'List' are reserved for the -generator, which uses them for implicit union enums and array types, -respectively. - -Command names, and member names within a type, should be all lower -case with words separated by a hyphen. However, some existing older -commands and complex types use underscore; when extending them, -consistency is preferred over blindly avoiding underscore. - -Event names should be ALL_CAPS with words separated by underscore. - -Member name 'u' and names starting with 'has-' or 'has_' are reserved -for the generator, which uses them for unions and for tracking -optional members. - -Any name (command, event, type, member, or enum value) beginning with -"x-" is marked experimental, and may be withdrawn or changed -incompatibly in a future release. - -Pragmas 'command-name-exceptions' and 'member-name-exceptions' let you -violate naming rules. Use for new code is strongly discouraged. - - -=== Downstream extensions === - -QAPI schema names that are externally visible, say in the Client JSON -Protocol, need to be managed with care. Names starting with a -downstream prefix of the form __RFQDN_ are reserved for the downstream -who controls the valid, reverse fully qualified domain name RFQDN. -RFQDN may only contain ASCII letters, digits, hyphen and period. - -Example: Red Hat, Inc. controls redhat.com, and may therefore add a -downstream command __com.redhat_drive-mirror. - - -=== Configuring the schema === - -Syntax: - COND = STRING - | [ STRING, ... ] - -All definitions take an optional 'if' member. Its value must be a -string or a list of strings. A string is shorthand for a list -containing just that string. The code generated for the definition -will then be guarded by #if STRING for each STRING in the COND list. - -Example: a conditional struct - - { 'struct': 'IfStruct', 'data': { 'foo': 'int' }, - 'if': ['defined(CONFIG_FOO)', 'defined(HAVE_BAR)'] } - -gets its generated code guarded like this: - - #if defined(CONFIG_FOO) - #if defined(HAVE_BAR) - ... generated code ... - #endif /* defined(HAVE_BAR) */ - #endif /* defined(CONFIG_FOO) */ - -Individual members of complex types, commands arguments, and -event-specific data can also be made conditional. This requires the -longhand form of MEMBER. - -Example: a struct type with unconditional member 'foo' and conditional -member 'bar' - -{ 'struct': 'IfStruct', 'data': - { 'foo': 'int', - 'bar': { 'type': 'int', 'if': 'defined(IFCOND)'} } } - -A union's discriminator may not be conditional. - -Likewise, individual enumeration values be conditional. This requires -the longhand form of ENUM-VALUE. - -Example: an enum type with unconditional value 'foo' and conditional -value 'bar' - -{ 'enum': 'IfEnum', 'data': - [ 'foo', - { 'name' : 'bar', 'if': 'defined(IFCOND)' } ] } - -Likewise, features can be conditional. This requires the longhand -form of FEATURE. - -Example: a struct with conditional feature 'allow-negative-numbers' - -{ 'struct': 'TestType', - 'data': { 'number': 'int' }, - 'features': [ { 'name': 'allow-negative-numbers', - 'if': 'defined(IFCOND)' } ] } - -Please note that you are responsible to ensure that the C code will -compile with an arbitrary combination of conditions, since the -generator is unable to check it at this point. - -The conditions apply to introspection as well, i.e. introspection -shows a conditional entity only when the condition is satisfied in -this particular build. - - -=== Documentation comments === - -A multi-line comment that starts and ends with a '##' line is a -documentation comment. - -If the documentation comment starts like - - ## - # @SYMBOL: - -it documents the definition if SYMBOL, else it's free-form -documentation. - -See below for more on definition documentation. - -Free-form documentation may be used to provide additional text and -structuring content. - -==== Headings and subheadings ==== - -A free-form documentation comment containing a line which starts with -some '=' symbols and then a space defines a section heading: - - ## - # = This is a top level heading - # - # This is a free-form comment which will go under the - # top level heading. - ## - - ## - # == This is a second level heading - ## - -A heading line must be the first line of the documentation -comment block. - -Section headings must always be correctly nested, so you can only -define a third-level heading inside a second-level heading, and so on. - -==== Documentation markup ==== - -Documentation comments can use most rST markup. In particular, -a '::' literal block can be used for examples: - - # :: - # - # Text of the example, may span - # multiple lines - -'*' starts an itemized list: - - # * First item, may span - # multiple lines - # * Second item - -You can also use '-' instead of '*'. - -A decimal number followed by '.' starts a numbered list: - - # 1. First item, may span - # multiple lines - # 2. Second item - -The actual number doesn't matter. - -Lists of either kind must be preceded and followed by a blank line. -If a list item's text spans multiple lines, then the second and -subsequent lines must be correctly indented to line up with the -first character of the first line. - -The usual '**strong**', '*emphasised*' and '``literal``' markup should -be used. If you need a single literal '*' you will need to -backslash-escape it. As an extension beyond the usual rST syntax, you -can also use '@foo' to reference a name in the schema; this is -rendered the same way as '``foo``'. - -Example: - -## -# Some text foo with **bold** and *emphasis* -# 1. with a list -# 2. like that -# -# And some code: -# -# :: -# -# $ echo foo -# -> do this -# <- get that -## - - -==== Definition documentation ==== - -Definition documentation, if present, must immediately precede the -definition it documents. - -When documentation is required (see pragma 'doc-required'), every -definition must have documentation. - -Definition documentation starts with a line naming the definition, -followed by an optional overview, a description of each argument (for -commands and events), member (for structs and unions), branch (for -alternates), or value (for enums), and finally optional tagged -sections. - -Descriptions of arguments can span multiple lines. The description -text can start on the line following the '@argname:', in which case it -must not be indented at all. It can also start on the same line as -the '@argname:'. In this case if it spans multiple lines then second -and subsequent lines must be indented to line up with the first -character of the first line of the description: - -# @argone: -# This is a two line description -# in the first style. -# -# @argtwo: This is a two line description -# in the second style. - -The number of spaces between the ':' and the text is not significant. - -FIXME: the parser accepts these things in almost any order. -FIXME: union branches should be described, too. - -Extensions added after the definition was first released carry a -'(since x.y.z)' comment. - -A tagged section starts with one of the following words: -"Note:"/"Notes:", "Since:", "Example"/"Examples", "Returns:", "TODO:". -The section ends with the start of a new section. - -The text of a section can start on a new line, in -which case it must not be indented at all. It can also start -on the same line as the 'Note:', 'Returns:', etc tag. In this -case if it spans multiple lines then second and subsequent -lines must be indented to match the first, in the same way as -multiline argument descriptions. - -A 'Since: x.y.z' tagged section lists the release that introduced the -definition. - -The text of a section can start on a new line, in -which case it must not be indented at all. It can also start -on the same line as the 'Note:', 'Returns:', etc tag. In this -case if it spans multiple lines then second and subsequent -lines must be indented to match the first. - -An 'Example' or 'Examples' section is automatically rendered -entirely as literal fixed-width text. In other sections, -the text is formatted, and rST markup can be used. - -For example: - -## -# @BlockStats: -# -# Statistics of a virtual block device or a block backing device. -# -# @device: If the stats are for a virtual block device, the name -# corresponding to the virtual block device. -# -# @node-name: The node name of the device. (since 2.3) -# -# ... more members ... -# -# Since: 0.14.0 -## -{ 'struct': 'BlockStats', - 'data': {'*device': 'str', '*node-name': 'str', - ... more members ... } } - -## -# @query-blockstats: -# -# Query the @BlockStats for all virtual block devices. -# -# @query-nodes: If true, the command will query all the -# block nodes ... explain, explain ... (since 2.3) -# -# Returns: A list of @BlockStats for each virtual block devices. -# -# Since: 0.14.0 -# -# Example: -# -# -> { "execute": "query-blockstats" } -# <- { -# ... lots of output ... -# } -# -## -{ 'command': 'query-blockstats', - 'data': { '*query-nodes': 'bool' }, - 'returns': ['BlockStats'] } - - -== Client JSON Protocol introspection == - -Clients of a Client JSON Protocol commonly need to figure out what -exactly the server (QEMU) supports. - -For this purpose, QMP provides introspection via command -query-qmp-schema. QGA currently doesn't support introspection. - -While Client JSON Protocol wire compatibility should be maintained -between qemu versions, we cannot make the same guarantees for -introspection stability. For example, one version of qemu may provide -a non-variant optional member of a struct, and a later version rework -the member to instead be non-optional and associated with a variant. -Likewise, one version of qemu may list a member with open-ended type -'str', and a later version could convert it to a finite set of strings -via an enum type; or a member may be converted from a specific type to -an alternate that represents a choice between the original type and -something else. - -query-qmp-schema returns a JSON array of SchemaInfo objects. These -objects together describe the wire ABI, as defined in the QAPI schema. -There is no specified order to the SchemaInfo objects returned; a -client must search for a particular name throughout the entire array -to learn more about that name, but is at least guaranteed that there -will be no collisions between type, command, and event names. - -However, the SchemaInfo can't reflect all the rules and restrictions -that apply to QMP. It's interface introspection (figuring out what's -there), not interface specification. The specification is in the QAPI -schema. To understand how QMP is to be used, you need to study the -QAPI schema. - -Like any other command, query-qmp-schema is itself defined in the QAPI -schema, along with the SchemaInfo type. This text attempts to give an -overview how things work. For details you need to consult the QAPI -schema. - -SchemaInfo objects have common members "name", "meta-type", -"features", and additional variant members depending on the value of -meta-type. - -Each SchemaInfo object describes a wire ABI entity of a certain -meta-type: a command, event or one of several kinds of type. - -SchemaInfo for commands and events have the same name as in the QAPI -schema. - -Command and event names are part of the wire ABI, but type names are -not. Therefore, the SchemaInfo for types have auto-generated -meaningless names. For readability, the examples in this section use -meaningful type names instead. - -Optional member "features" exposes the entity's feature strings as a -JSON array of strings. - -To examine a type, start with a command or event using it, then follow -references by name. - -QAPI schema definitions not reachable that way are omitted. - -The SchemaInfo for a command has meta-type "command", and variant -members "arg-type", "ret-type" and "allow-oob". On the wire, the -"arguments" member of a client's "execute" command must conform to the -object type named by "arg-type". The "return" member that the server -passes in a success response conforms to the type named by "ret-type". -When "allow-oob" is true, it means the command supports out-of-band -execution. It defaults to false. - -If the command takes no arguments, "arg-type" names an object type -without members. Likewise, if the command returns nothing, "ret-type" -names an object type without members. - -Example: the SchemaInfo for command query-qmp-schema - - { "name": "query-qmp-schema", "meta-type": "command", - "arg-type": "q_empty", "ret-type": "SchemaInfoList" } - - Type "q_empty" is an automatic object type without members, and type - "SchemaInfoList" is the array of SchemaInfo type. - -The SchemaInfo for an event has meta-type "event", and variant member -"arg-type". On the wire, a "data" member that the server passes in an -event conforms to the object type named by "arg-type". - -If the event carries no additional information, "arg-type" names an -object type without members. The event may not have a data member on -the wire then. - -Each command or event defined with 'data' as MEMBERS object in the -QAPI schema implicitly defines an object type. - -Example: the SchemaInfo for EVENT_C from section Events - - { "name": "EVENT_C", "meta-type": "event", - "arg-type": "q_obj-EVENT_C-arg" } - - Type "q_obj-EVENT_C-arg" is an implicitly defined object type with - the two members from the event's definition. - -The SchemaInfo for struct and union types has meta-type "object". - -The SchemaInfo for a struct type has variant member "members". - -The SchemaInfo for a union type additionally has variant members "tag" -and "variants". - -"members" is a JSON array describing the object's common members, if -any. Each element is a JSON object with members "name" (the member's -name), "type" (the name of its type), and optionally "default". The -member is optional if "default" is present. Currently, "default" can -only have value null. Other values are reserved for future -extensions. The "members" array is in no particular order; clients -must search the entire object when learning whether a particular -member is supported. - -Example: the SchemaInfo for MyType from section Struct types - - { "name": "MyType", "meta-type": "object", - "members": [ - { "name": "member1", "type": "str" }, - { "name": "member2", "type": "int" }, - { "name": "member3", "type": "str", "default": null } ] } - -"features" exposes the command's feature strings as a JSON array of -strings. - -Example: the SchemaInfo for TestType from section Features: - - { "name": "TestType", "meta-type": "object", - "members": [ - { "name": "number", "type": "int" } ], - "features": ["allow-negative-numbers"] } - -"tag" is the name of the common member serving as type tag. -"variants" is a JSON array describing the object's variant members. -Each element is a JSON object with members "case" (the value of type -tag this element applies to) and "type" (the name of an object type -that provides the variant members for this type tag value). The -"variants" array is in no particular order, and is not guaranteed to -list cases in the same order as the corresponding "tag" enum type. - -Example: the SchemaInfo for flat union BlockdevOptions from section -Union types - - { "name": "BlockdevOptions", "meta-type": "object", - "members": [ - { "name": "driver", "type": "BlockdevDriver" }, - { "name": "read-only", "type": "bool", "default": null } ], - "tag": "driver", - "variants": [ - { "case": "file", "type": "BlockdevOptionsFile" }, - { "case": "qcow2", "type": "BlockdevOptionsQcow2" } ] } - -Note that base types are "flattened": its members are included in the -"members" array. - -A simple union implicitly defines an enumeration type for its implicit -discriminator (called "type" on the wire, see section Union types). - -A simple union implicitly defines an object type for each of its -variants. - -Example: the SchemaInfo for simple union BlockdevOptionsSimple from section -Union types - - { "name": "BlockdevOptionsSimple", "meta-type": "object", - "members": [ - { "name": "type", "type": "BlockdevOptionsSimpleKind" } ], - "tag": "type", - "variants": [ - { "case": "file", "type": "q_obj-BlockdevOptionsFile-wrapper" }, - { "case": "qcow2", "type": "q_obj-BlockdevOptionsQcow2-wrapper" } ] } - - Enumeration type "BlockdevOptionsSimpleKind" and the object types - "q_obj-BlockdevOptionsFile-wrapper", "q_obj-BlockdevOptionsQcow2-wrapper" - are implicitly defined. - -The SchemaInfo for an alternate type has meta-type "alternate", and -variant member "members". "members" is a JSON array. Each element is -a JSON object with member "type", which names a type. Values of the -alternate type conform to exactly one of its member types. There is -no guarantee on the order in which "members" will be listed. - -Example: the SchemaInfo for BlockdevRef from section Alternate types - - { "name": "BlockdevRef", "meta-type": "alternate", - "members": [ - { "type": "BlockdevOptions" }, - { "type": "str" } ] } - -The SchemaInfo for an array type has meta-type "array", and variant -member "element-type", which names the array's element type. Array -types are implicitly defined. For convenience, the array's name may -resemble the element type; however, clients should examine member -"element-type" instead of making assumptions based on parsing member -"name". - -Example: the SchemaInfo for ['str'] - - { "name": "[str]", "meta-type": "array", - "element-type": "str" } - -The SchemaInfo for an enumeration type has meta-type "enum" and -variant member "values". The values are listed in no particular -order; clients must search the entire enum when learning whether a -particular value is supported. - -Example: the SchemaInfo for MyEnum from section Enumeration types - - { "name": "MyEnum", "meta-type": "enum", - "values": [ "value1", "value2", "value3" ] } - -The SchemaInfo for a built-in type has the same name as the type in -the QAPI schema (see section Built-in Types), with one exception -detailed below. It has variant member "json-type" that shows how -values of this type are encoded on the wire. - -Example: the SchemaInfo for str - - { "name": "str", "meta-type": "builtin", "json-type": "string" } - -The QAPI schema supports a number of integer types that only differ in -how they map to C. They are identical as far as SchemaInfo is -concerned. Therefore, they get all mapped to a single type "int" in -SchemaInfo. - -As explained above, type names are not part of the wire ABI. Not even -the names of built-in types. Clients should examine member -"json-type" instead of hard-coding names of built-in types. - - -== Compatibility considerations == - -Maintaining backward compatibility at the Client JSON Protocol level -while evolving the schema requires some care. This section is about -syntactic compatibility, which is necessary, but not sufficient, for -actual compatibility. - -Clients send commands with argument data, and receive command -responses with return data and events with event data. - -Adding opt-in functionality to the send direction is backwards -compatible: adding commands, optional arguments, enumeration values, -union and alternate branches; turning an argument type into an -alternate of that type; making mandatory arguments optional. Clients -oblivious of the new functionality continue to work. - -Incompatible changes include removing commands, command arguments, -enumeration values, union and alternate branches, adding mandatory -command arguments, and making optional arguments mandatory. - -The specified behavior of an absent optional argument should remain -the same. With proper documentation, this policy still allows some -flexibility; for example, when an optional 'buffer-size' argument is -specified to default to a sensible buffer size, the actual default -value can still be changed. The specified default behavior is not the -exact size of the buffer, only that the default size is sensible. - -Adding functionality to the receive direction is generally backwards -compatible: adding events, adding return and event data members. -Clients are expected to ignore the ones they don't know. - -Removing "unreachable" stuff like events that can't be triggered -anymore, optional return or event data members that can't be sent -anymore, and return or event data member (enumeration) values that -can't be sent anymore makes no difference to clients, except for -introspection. The latter can conceivably confuse clients, so tread -carefully. - -Incompatible changes include removing return and event data members. - -Any change to a command definition's 'data' or one of the types used -there (recursively) needs to consider send direction compatibility. - -Any change to a command definition's 'return', an event definition's -'data', or one of the types used there (recursively) needs to consider -receive direction compatibility. - -Any change to types used in both contexts need to consider both. - -Enumeration type values and complex and alternate type members may be -reordered freely. For enumerations and alternate types, this doesn't -affect the wire encoding. For complex types, this might make the -implementation emit JSON object members in a different order, which -the Client JSON Protocol permits. - -Since type names are not visible in the Client JSON Protocol, types -may be freely renamed. Even certain refactorings are invisible, such -as splitting members from one type into a common base type. - - -== Code generation == - -The QAPI code generator qapi-gen.py generates code and documentation -from the schema. Together with the core QAPI libraries, this code -provides everything required to take JSON commands read in by a Client -JSON Protocol server, unmarshal the arguments into the underlying C -types, call into the corresponding C function, map the response back -to a Client JSON Protocol response to be returned to the user, and -introspect the commands. - -As an example, we'll use the following schema, which describes a -single complex user-defined type, along with command which takes a -list of that type as a parameter, and returns a single element of that -type. The user is responsible for writing the implementation of -qmp_my_command(); everything else is produced by the generator. - - $ cat example-schema.json - { 'struct': 'UserDefOne', - 'data': { 'integer': 'int', '*string': 'str' } } - - { 'command': 'my-command', - 'data': { 'arg1': ['UserDefOne'] }, - 'returns': 'UserDefOne' } - - { 'event': 'MY_EVENT' } - -We run qapi-gen.py like this: - - $ python scripts/qapi-gen.py --output-dir="qapi-generated" \ - --prefix="example-" example-schema.json - -For a more thorough look at generated code, the testsuite includes -tests/qapi-schema/qapi-schema-tests.json that covers more examples of -what the generator will accept, and compiles the resulting C code as -part of 'make check-unit'. - -=== Code generated for QAPI types === - -The following files are created: - -$(prefix)qapi-types.h - C types corresponding to types defined in - the schema - -$(prefix)qapi-types.c - Cleanup functions for the above C types - -The $(prefix) is an optional parameter used as a namespace to keep the -generated code from one schema/code-generation separated from others so code -can be generated/used from multiple schemas without clobbering previously -created code. - -Example: - - $ cat qapi-generated/example-qapi-types.h -[Uninteresting stuff omitted...] - - #ifndef EXAMPLE_QAPI_TYPES_H - #define EXAMPLE_QAPI_TYPES_H - - #include "qapi/qapi-builtin-types.h" - - typedef struct UserDefOne UserDefOne; - - typedef struct UserDefOneList UserDefOneList; - - typedef struct q_obj_my_command_arg q_obj_my_command_arg; - - struct UserDefOne { - int64_t integer; - bool has_string; - char *string; - }; - - void qapi_free_UserDefOne(UserDefOne *obj); - G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOne, qapi_free_UserDefOne) - - struct UserDefOneList { - UserDefOneList *next; - UserDefOne *value; - }; - - void qapi_free_UserDefOneList(UserDefOneList *obj); - G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOneList, qapi_free_UserDefOneList) - - struct q_obj_my_command_arg { - UserDefOneList *arg1; - }; - - #endif /* EXAMPLE_QAPI_TYPES_H */ - $ cat qapi-generated/example-qapi-types.c -[Uninteresting stuff omitted...] - - void qapi_free_UserDefOne(UserDefOne *obj) - { - Visitor *v; - - if (!obj) { - return; - } - - v = qapi_dealloc_visitor_new(); - visit_type_UserDefOne(v, NULL, &obj, NULL); - visit_free(v); - } - - void qapi_free_UserDefOneList(UserDefOneList *obj) - { - Visitor *v; - - if (!obj) { - return; - } - - v = qapi_dealloc_visitor_new(); - visit_type_UserDefOneList(v, NULL, &obj, NULL); - visit_free(v); - } - -[Uninteresting stuff omitted...] - -For a modular QAPI schema (see section Include directives), code for -each sub-module SUBDIR/SUBMODULE.json is actually generated into - -SUBDIR/$(prefix)qapi-types-SUBMODULE.h -SUBDIR/$(prefix)qapi-types-SUBMODULE.c - -If qapi-gen.py is run with option --builtins, additional files are -created: - -qapi-builtin-types.h - C types corresponding to built-in types - -qapi-builtin-types.c - Cleanup functions for the above C types - -=== Code generated for visiting QAPI types === - -These are the visitor functions used to walk through and convert -between a native QAPI C data structure and some other format (such as -QObject); the generated functions are named visit_type_FOO() and -visit_type_FOO_members(). - -The following files are generated: - -$(prefix)qapi-visit.c: Visitor function for a particular C type, used - to automagically convert QObjects into the - corresponding C type and vice-versa, as well - as for deallocating memory for an existing C - type - -$(prefix)qapi-visit.h: Declarations for previously mentioned visitor - functions - -Example: - - $ cat qapi-generated/example-qapi-visit.h -[Uninteresting stuff omitted...] - - #ifndef EXAMPLE_QAPI_VISIT_H - #define EXAMPLE_QAPI_VISIT_H - - #include "qapi/qapi-builtin-visit.h" - #include "example-qapi-types.h" - - - bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp); - bool visit_type_UserDefOne(Visitor *v, const char *name, UserDefOne **obj, Error **errp); - bool visit_type_UserDefOneList(Visitor *v, const char *name, UserDefOneList **obj, Error **errp); - - bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp); - - #endif /* EXAMPLE_QAPI_VISIT_H */ - $ cat qapi-generated/example-qapi-visit.c -[Uninteresting stuff omitted...] - - bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp) - { - if (!visit_type_int(v, "integer", &obj->integer, errp)) { - return false; - } - if (visit_optional(v, "string", &obj->has_string)) { - if (!visit_type_str(v, "string", &obj->string, errp)) { - return false; - } - } - return true; - } - - bool visit_type_UserDefOne(Visitor *v, const char *name, UserDefOne **obj, Error **errp) - { - bool ok = false; - - if (!visit_start_struct(v, name, (void **)obj, sizeof(UserDefOne), errp)) { - return false; - } - if (!*obj) { - /* incomplete */ - assert(visit_is_dealloc(v)); - goto out_obj; - } - if (!visit_type_UserDefOne_members(v, *obj, errp)) { - goto out_obj; - } - ok = visit_check_struct(v, errp); - out_obj: - visit_end_struct(v, (void **)obj); - if (!ok && visit_is_input(v)) { - qapi_free_UserDefOne(*obj); - *obj = NULL; - } - return ok; - } - - bool visit_type_UserDefOneList(Visitor *v, const char *name, UserDefOneList **obj, Error **errp) - { - bool ok = false; - UserDefOneList *tail; - size_t size = sizeof(**obj); - - if (!visit_start_list(v, name, (GenericList **)obj, size, errp)) { - return false; - } - - for (tail = *obj; tail; - tail = (UserDefOneList *)visit_next_list(v, (GenericList *)tail, size)) { - if (!visit_type_UserDefOne(v, NULL, &tail->value, errp)) { - goto out_obj; - } - } - - ok = visit_check_list(v, errp); - out_obj: - visit_end_list(v, (void **)obj); - if (!ok && visit_is_input(v)) { - qapi_free_UserDefOneList(*obj); - *obj = NULL; - } - return ok; - } - - bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp) - { - if (!visit_type_UserDefOneList(v, "arg1", &obj->arg1, errp)) { - return false; - } - return true; - } - -[Uninteresting stuff omitted...] - -For a modular QAPI schema (see section Include directives), code for -each sub-module SUBDIR/SUBMODULE.json is actually generated into - -SUBDIR/$(prefix)qapi-visit-SUBMODULE.h -SUBDIR/$(prefix)qapi-visit-SUBMODULE.c - -If qapi-gen.py is run with option --builtins, additional files are -created: - -qapi-builtin-visit.h - Visitor functions for built-in types - -qapi-builtin-visit.c - Declarations for these visitor functions - -=== Code generated for commands === - -These are the marshaling/dispatch functions for the commands defined -in the schema. The generated code provides qmp_marshal_COMMAND(), and -declares qmp_COMMAND() that the user must implement. - -The following files are generated: - -$(prefix)qapi-commands.c: Command marshal/dispatch functions for each - QMP command defined in the schema - -$(prefix)qapi-commands.h: Function prototypes for the QMP commands - specified in the schema - -$(prefix)qapi-init-commands.h - Command initialization prototype - -$(prefix)qapi-init-commands.c - Command initialization code - -Example: - - $ cat qapi-generated/example-qapi-commands.h -[Uninteresting stuff omitted...] - - #ifndef EXAMPLE_QAPI_COMMANDS_H - #define EXAMPLE_QAPI_COMMANDS_H - - #include "example-qapi-types.h" - - UserDefOne *qmp_my_command(UserDefOneList *arg1, Error **errp); - void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp); - - #endif /* EXAMPLE_QAPI_COMMANDS_H */ - $ cat qapi-generated/example-qapi-commands.c -[Uninteresting stuff omitted...] - - static void qmp_marshal_output_UserDefOne(UserDefOne *ret_in, QObject **ret_out, Error **errp) - { - Visitor *v; - - v = qobject_output_visitor_new(ret_out); - if (visit_type_UserDefOne(v, "unused", &ret_in, errp)) { - visit_complete(v, ret_out); - } - visit_free(v); - v = qapi_dealloc_visitor_new(); - visit_type_UserDefOne(v, "unused", &ret_in, NULL); - visit_free(v); - } - - void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp) - { - Error *err = NULL; - bool ok = false; - Visitor *v; - UserDefOne *retval; - q_obj_my_command_arg arg = {0}; - - v = qobject_input_visitor_new(QOBJECT(args)); - if (!visit_start_struct(v, NULL, NULL, 0, errp)) { - goto out; - } - if (visit_type_q_obj_my_command_arg_members(v, &arg, errp)) { - ok = visit_check_struct(v, errp); - } - visit_end_struct(v, NULL); - if (!ok) { - goto out; - } - - retval = qmp_my_command(arg.arg1, &err); - error_propagate(errp, err); - if (err) { - goto out; - } - - qmp_marshal_output_UserDefOne(retval, ret, errp); - - out: - visit_free(v); - v = qapi_dealloc_visitor_new(); - visit_start_struct(v, NULL, NULL, 0, NULL); - visit_type_q_obj_my_command_arg_members(v, &arg, NULL); - visit_end_struct(v, NULL); - visit_free(v); - } - -[Uninteresting stuff omitted...] - $ cat qapi-generated/example-qapi-init-commands.h -[Uninteresting stuff omitted...] - #ifndef EXAMPLE_QAPI_INIT_COMMANDS_H - #define EXAMPLE_QAPI_INIT_COMMANDS_H - - #include "qapi/qmp/dispatch.h" - - void example_qmp_init_marshal(QmpCommandList *cmds); - - #endif /* EXAMPLE_QAPI_INIT_COMMANDS_H */ - $ cat qapi-generated/example-qapi-init-commands.c -[Uninteresting stuff omitted...] - void example_qmp_init_marshal(QmpCommandList *cmds) - { - QTAILQ_INIT(cmds); - - qmp_register_command(cmds, "my-command", - qmp_marshal_my_command, QCO_NO_OPTIONS); - } -[Uninteresting stuff omitted...] - -For a modular QAPI schema (see section Include directives), code for -each sub-module SUBDIR/SUBMODULE.json is actually generated into - -SUBDIR/$(prefix)qapi-commands-SUBMODULE.h -SUBDIR/$(prefix)qapi-commands-SUBMODULE.c - -=== Code generated for events === - -This is the code related to events defined in the schema, providing -qapi_event_send_EVENT(). - -The following files are created: - -$(prefix)qapi-events.h - Function prototypes for each event type - -$(prefix)qapi-events.c - Implementation of functions to send an event - -$(prefix)qapi-emit-events.h - Enumeration of all event names, and - common event code declarations - -$(prefix)qapi-emit-events.c - Common event code definitions - -Example: - - $ cat qapi-generated/example-qapi-events.h -[Uninteresting stuff omitted...] - - #ifndef EXAMPLE_QAPI_EVENTS_H - #define EXAMPLE_QAPI_EVENTS_H - - #include "qapi/util.h" - #include "example-qapi-types.h" - - void qapi_event_send_my_event(void); - - #endif /* EXAMPLE_QAPI_EVENTS_H */ - $ cat qapi-generated/example-qapi-events.c -[Uninteresting stuff omitted...] - - void qapi_event_send_my_event(void) - { - QDict *qmp; - - qmp = qmp_event_build_dict("MY_EVENT"); - - example_qapi_event_emit(EXAMPLE_QAPI_EVENT_MY_EVENT, qmp); - - qobject_unref(qmp); - } - -[Uninteresting stuff omitted...] - $ cat qapi-generated/example-qapi-emit-events.h -[Uninteresting stuff omitted...] - - #ifndef EXAMPLE_QAPI_EMIT_EVENTS_H - #define EXAMPLE_QAPI_EMIT_EVENTS_H - - #include "qapi/util.h" - - typedef enum example_QAPIEvent { - EXAMPLE_QAPI_EVENT_MY_EVENT, - EXAMPLE_QAPI_EVENT__MAX, - } example_QAPIEvent; - - #define example_QAPIEvent_str(val) \ - qapi_enum_lookup(&example_QAPIEvent_lookup, (val)) - - extern const QEnumLookup example_QAPIEvent_lookup; - - void example_qapi_event_emit(example_QAPIEvent event, QDict *qdict); - - #endif /* EXAMPLE_QAPI_EMIT_EVENTS_H */ - $ cat qapi-generated/example-qapi-emit-events.c -[Uninteresting stuff omitted...] - - const QEnumLookup example_QAPIEvent_lookup = { - .array = (const char *const[]) { - [EXAMPLE_QAPI_EVENT_MY_EVENT] = "MY_EVENT", - }, - .size = EXAMPLE_QAPI_EVENT__MAX - }; - -[Uninteresting stuff omitted...] - -For a modular QAPI schema (see section Include directives), code for -each sub-module SUBDIR/SUBMODULE.json is actually generated into - -SUBDIR/$(prefix)qapi-events-SUBMODULE.h -SUBDIR/$(prefix)qapi-events-SUBMODULE.c - -=== Code generated for introspection === - -The following files are created: - -$(prefix)qapi-introspect.c - Defines a string holding a JSON - description of the schema - -$(prefix)qapi-introspect.h - Declares the above string - -Example: - - $ cat qapi-generated/example-qapi-introspect.h -[Uninteresting stuff omitted...] - - #ifndef EXAMPLE_QAPI_INTROSPECT_H - #define EXAMPLE_QAPI_INTROSPECT_H - - #include "qapi/qmp/qlit.h" - - extern const QLitObject example_qmp_schema_qlit; - - #endif /* EXAMPLE_QAPI_INTROSPECT_H */ - $ cat qapi-generated/example-qapi-introspect.c -[Uninteresting stuff omitted...] - - const QLitObject example_qmp_schema_qlit = QLIT_QLIST(((QLitObject[]) { - QLIT_QDICT(((QLitDictEntry[]) { - { "arg-type", QLIT_QSTR("0"), }, - { "meta-type", QLIT_QSTR("command"), }, - { "name", QLIT_QSTR("my-command"), }, - { "ret-type", QLIT_QSTR("1"), }, - {} - })), - QLIT_QDICT(((QLitDictEntry[]) { - { "arg-type", QLIT_QSTR("2"), }, - { "meta-type", QLIT_QSTR("event"), }, - { "name", QLIT_QSTR("MY_EVENT"), }, - {} - })), - /* "0" = q_obj_my-command-arg */ - QLIT_QDICT(((QLitDictEntry[]) { - { "members", QLIT_QLIST(((QLitObject[]) { - QLIT_QDICT(((QLitDictEntry[]) { - { "name", QLIT_QSTR("arg1"), }, - { "type", QLIT_QSTR("[1]"), }, - {} - })), - {} - })), }, - { "meta-type", QLIT_QSTR("object"), }, - { "name", QLIT_QSTR("0"), }, - {} - })), - /* "1" = UserDefOne */ - QLIT_QDICT(((QLitDictEntry[]) { - { "members", QLIT_QLIST(((QLitObject[]) { - QLIT_QDICT(((QLitDictEntry[]) { - { "name", QLIT_QSTR("integer"), }, - { "type", QLIT_QSTR("int"), }, - {} - })), - QLIT_QDICT(((QLitDictEntry[]) { - { "default", QLIT_QNULL, }, - { "name", QLIT_QSTR("string"), }, - { "type", QLIT_QSTR("str"), }, - {} - })), - {} - })), }, - { "meta-type", QLIT_QSTR("object"), }, - { "name", QLIT_QSTR("1"), }, - {} - })), - /* "2" = q_empty */ - QLIT_QDICT(((QLitDictEntry[]) { - { "members", QLIT_QLIST(((QLitObject[]) { - {} - })), }, - { "meta-type", QLIT_QSTR("object"), }, - { "name", QLIT_QSTR("2"), }, - {} - })), - QLIT_QDICT(((QLitDictEntry[]) { - { "element-type", QLIT_QSTR("1"), }, - { "meta-type", QLIT_QSTR("array"), }, - { "name", QLIT_QSTR("[1]"), }, - {} - })), - QLIT_QDICT(((QLitDictEntry[]) { - { "json-type", QLIT_QSTR("int"), }, - { "meta-type", QLIT_QSTR("builtin"), }, - { "name", QLIT_QSTR("int"), }, - {} - })), - QLIT_QDICT(((QLitDictEntry[]) { - { "json-type", QLIT_QSTR("string"), }, - { "meta-type", QLIT_QSTR("builtin"), }, - { "name", QLIT_QSTR("str"), }, - {} - })), - {} - })); - -[Uninteresting stuff omitted...] diff --git a/docs/devel/qgraph.rst b/docs/devel/qgraph.rst index a9aff167ada..43342d9d650 100644 --- a/docs/devel/qgraph.rst +++ b/docs/devel/qgraph.rst @@ -1,8 +1,7 @@ .. _qgraph: -======================================== Qtest Driver Framework -======================================== +====================== In order to test a specific driver, plain libqos tests need to take care of booting QEMU with the right machine and devices. @@ -15,7 +14,7 @@ support that device. Using only libqos APIs, the test has to manually take care of covering all the setups, and build the correct command line. -This also introduces backward compability issues: if a device/driver command +This also introduces backward compatibility issues: if a device/driver command line name is changed, all tests that use that will not work properly anymore and need to be adjusted. @@ -31,17 +30,19 @@ so the sdhci-test should only care of linking its qgraph node with that interface. In this way, if the command line of a sdhci driver is changed, only the respective qgraph driver node has to be adjusted. +QGraph concepts +--------------- + The graph is composed by nodes that represent machines, drivers, tests and edges that define the relationships between them (``CONSUMES``, ``PRODUCES``, and ``CONTAINS``). - Nodes -^^^^^^ +~~~~~ A node can be of four types: -- **QNODE_MACHINE**: for example ``arm/raspi2`` +- **QNODE_MACHINE**: for example ``arm/raspi2b`` - **QNODE_DRIVER**: for example ``generic-sdhci`` - **QNODE_INTERFACE**: for example ``sdhci`` (interface for all ``-sdhci`` drivers). @@ -64,16 +65,16 @@ Notes for the nodes: drivers name, otherwise they won't be discovered Edges -^^^^^^ +~~~~~ -An edge relation between two nodes (drivers or machines) `X` and `Y` can be: +An edge relation between two nodes (drivers or machines) ``X`` and ``Y`` can be: -- ``X CONSUMES Y``: `Y` can be plugged into `X` -- ``X PRODUCES Y``: `X` provides the interface `Y` -- ``X CONTAINS Y``: `Y` is part of `X` component +- ``X CONSUMES Y``: ``Y`` can be plugged into ``X`` +- ``X PRODUCES Y``: ``X`` provides the interface ``Y`` +- ``X CONTAINS Y``: ``Y`` is part of ``X`` component Execution steps -^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~ The basic framework steps are the following: @@ -92,15 +93,129 @@ The basic framework steps are the following: Depending on the QEMU binary used, only some drivers/machines will be available and only test that are reached by them will be executed. +Command line +~~~~~~~~~~~~ + +Command line is built by using node names and optional arguments +passed by the user when building the edges. + +There are three types of command line arguments: + +- ``in node`` : created from the node name. For example, machines will + have ``-M `` to its command line, while devices + ``-device ``. It is automatically done by the framework. +- ``after node`` : added as additional argument to the node name. + This argument is added optionally when creating edges, + by setting the parameter ``after_cmd_line`` and + ``extra_edge_opts`` in ``QOSGraphEdgeOptions``. + The framework automatically adds + a comma before ``extra_edge_opts``, + because it is going to add attributes + after the destination node pointed by + the edge containing these options, and automatically + adds a space before ``after_cmd_line``, because it + adds an additional device, not an attribute. +- ``before node`` : added as additional argument to the node name. + This argument is added optionally when creating edges, + by setting the parameter ``before_cmd_line`` in + ``QOSGraphEdgeOptions``. This attribute + is going to add attributes before the destination node + pointed by the edge containing these options. It is + helpful to commands that are not node-representable, + such as ``-fdsev`` or ``-netdev``. + +While adding command line in edges is always used, not all nodes names are +used in every path walk: this is because the contained or produced ones +are already added by QEMU, so only nodes that "consumes" will be used to +build the command line. Also, nodes that will have ``{ "abstract" : true }`` +as QMP attribute will loose their command line, since they are not proper +devices to be added in QEMU. + +Example:: + + QOSGraphEdgeOptions opts = { + .before_cmd_line = "-drive id=drv0,if=none,file=null-co://," + "file.read-zeroes=on,format=raw", + .after_cmd_line = "-device scsi-hd,bus=vs0.0,drive=drv0", + + opts.extra_device_opts = "id=vs0"; + }; + + qos_node_create_driver("virtio-scsi-device", + virtio_scsi_device_create); + qos_node_consumes("virtio-scsi-device", "virtio-bus", &opts); + +Will produce the following command line: +``-drive id=drv0,if=none,file=null-co://, -device virtio-scsi-device,id=vs0 -device scsi-hd,bus=vs0.0,drive=drv0`` + +Troubleshooting unavailable tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If there is no path from an available machine to a test then that test will be +unavailable and won't execute. This can happen if a test or driver did not set +up its qgraph node correctly. It can also happen if the necessary machine type +or device is missing from the QEMU binary because it was compiled out or +otherwise. + +It is possible to troubleshoot unavailable tests by running:: + + $ QTEST_QEMU_BINARY=build/qemu-system-x86_64 build/tests/qtest/qos-test --verbose + # ALL QGRAPH EDGES: { + # src='virtio-net' + # |-> dest='virtio-net-tests/vhost-user/multiqueue' type=2 (node=0x559142109e30) + # |-> dest='virtio-net-tests/vhost-user/migrate' type=2 (node=0x559142109d00) + # src='virtio-net-pci' + # |-> dest='virtio-net' type=1 (node=0x55914210d740) + # src='pci-bus' + # |-> dest='virtio-net-pci' type=2 (node=0x55914210d880) + # src='pci-bus-pc' + # |-> dest='pci-bus' type=1 (node=0x559142103f40) + # src='i440FX-pcihost' + # |-> dest='pci-bus-pc' type=0 (node=0x55914210ac70) + # src='x86_64/pc' + # |-> dest='i440FX-pcihost' type=0 (node=0x5591421117f0) + # src='' + # |-> dest='x86_64/pc' type=0 (node=0x559142111600) + # |-> dest='arm/raspi2b' type=0 (node=0x559142110740) + ... + # } + # ALL QGRAPH NODES: { + # name='virtio-net-tests/announce-self' type=3 cmd_line='(null)' [available] + # name='arm/raspi2b' type=0 cmd_line='-M raspi2b ' [UNAVAILABLE] + ... + # } + +The ``virtio-net-tests/announce-self`` test is listed as "available" in the +"ALL QGRAPH NODES" output. This means the test will execute. We can follow the +qgraph path in the "ALL QGRAPH EDGES" output as follows: '' -> 'x86_64/pc' -> +'i440FX-pcihost' -> 'pci-bus-pc' -> 'pci-bus' -> 'virtio-net-pci' -> +'virtio-net'. The root of the qgraph is '' and the depth first search begins +there. + +The ``arm/raspi2b`` machine node is listed as "UNAVAILABLE". Although it is +reachable from the root via '' -> 'arm/raspi2b' the node is unavailable because +the QEMU binary did not list it when queried by the framework. This is expected +because we used the ``qemu-system-x86_64`` binary which does not support ARM +machine types. + +If a test is unexpectedly listed as "UNAVAILABLE", first check that the "ALL +QGRAPH EDGES" output reports edge connectivity from the root ('') to the test. +If there is no connectivity then the qgraph nodes were not set up correctly and +the driver or test code is incorrect. If there is connectivity, check the +availability of each node in the path in the "ALL QGRAPH NODES" output. The +first unavailable node in the path is the reason why the test is unavailable. +Typically this is because the QEMU binary lacks support for the necessary +machine type or device. + Creating a new driver and its interface -""""""""""""""""""""""""""""""""""""""""" +--------------------------------------- Here we continue the ``sdhci`` use case, with the following scenario: - ``sdhci-test`` aims to test the ``read[q,w], writeq`` functions offered by the ``sdhci`` drivers. - The current ``sdhci`` device is supported by both ``x86_64/pc`` and ``ARM`` - (in this example we focus on the ``arm-raspi2``) machines. + (in this example we focus on the ``arm-raspi2b``) machines. - QEMU offers 2 types of drivers: ``QSDHCI_MemoryMapped`` for ``ARM`` and ``QSDHCI_PCI`` for ``x86_64/pc``. Both implement the ``read[q,w], writeq`` functions. @@ -122,11 +237,11 @@ In order to implement such scenario in qgraph, the test developer needs to: all the pci drivers available) ``sdhci-pci --consumes--> pci-bus`` -- Create an ``arm/raspi2`` machine node. This machine ``contains`` +- Create an ``arm/raspi2b`` machine node. This machine ``contains`` a ``generic-sdhci`` memory mapped ``sdhci`` driver node, representing ``QSDHCI_MemoryMapped``. - ``arm/raspi2 --contains--> generic-sdhci`` + ``arm/raspi2b --contains--> generic-sdhci`` - Create the ``sdhci`` interface node. This interface offers the functions that are shared by all ``sdhci`` devices. The interface is produced by ``sdhci-pci`` and ``generic-sdhci``, @@ -141,7 +256,7 @@ In order to implement such scenario in qgraph, the test developer needs to: ``sdhci-test --consumes--> sdhci`` -``arm-raspi2`` machine, simplified from +``arm-raspi2b`` machine, simplified from ``tests/qtest/libqos/arm-raspi2-machine.c``:: #include "qgraph.h" @@ -159,7 +274,7 @@ In order to implement such scenario in qgraph, the test developer needs to: return &machine->alloc; } - fprintf(stderr, "%s not present in arm/raspi2\n", interface); + fprintf(stderr, "%s not present in arm/raspi2b\n", interface); g_assert_not_reached(); } @@ -171,7 +286,7 @@ In order to implement such scenario in qgraph, the test developer needs to: return &machine->sdhci.obj; } - fprintf(stderr, "%s not present in arm/raspi2\n", device); + fprintf(stderr, "%s not present in arm/raspi2b\n", device); g_assert_not_reached(); } @@ -195,10 +310,10 @@ In order to implement such scenario in qgraph, the test developer needs to: static void raspi2_register_nodes(void) { - /* arm/raspi2 --contains--> generic-sdhci */ - qos_node_create_machine("arm/raspi2", + /* arm/raspi2b --contains--> generic-sdhci */ + qos_node_create_machine("arm/raspi2b", qos_create_machine_arm_raspi2); - qos_node_contains("arm/raspi2", "generic-sdhci", NULL); + qos_node_contains("arm/raspi2b", "generic-sdhci", NULL); } libqos_init(raspi2_register_nodes); @@ -412,7 +527,7 @@ In the above example, all possible types of relations are created:: | +--produces-- + | - arm/raspi2 --contains--> generic-sdhci + arm/raspi2b --contains--> generic-sdhci or inverting the consumes edge in consumed_by:: @@ -428,10 +543,10 @@ or inverting the consumes edge in consumed_by:: | +--produces-- + | - arm/raspi2 --contains--> generic-sdhci + arm/raspi2b --contains--> generic-sdhci Adding a new test -""""""""""""""""" +----------------- Given the above setup, adding a new test is very simple. ``sdhci-test``, taken from ``tests/qtest/sdhci-test.c``:: @@ -478,7 +593,7 @@ Final graph will be like this:: | +--produces-- + | - arm/raspi2 --contains--> generic-sdhci + arm/raspi2b --contains--> generic-sdhci or inverting the consumes edge in consumed_by:: @@ -494,7 +609,7 @@ or inverting the consumes edge in consumed_by:: | +--produces-- + | - arm/raspi2 --contains--> generic-sdhci + arm/raspi2b --contains--> generic-sdhci Assuming there the binary is ``QTEST_QEMU_BINARY=./qemu-system-x86_64`` @@ -503,66 +618,11 @@ a valid test path will be: and for the binary ``QTEST_QEMU_BINARY=./qemu-system-arm``: -``/arm/raspi2/generic-sdhci/sdhci/sdhci-test`` +``/arm/raspi2b/generic-sdhci/sdhci/sdhci-test`` Additional examples are also in ``test-qgraph.c`` -Command line: -"""""""""""""" - -Command line is built by using node names and optional arguments -passed by the user when building the edges. - -There are three types of command line arguments: - -- ``in node`` : created from the node name. For example, machines will - have ``-M `` to its command line, while devices - ``-device ``. It is automatically done by the framework. -- ``after node`` : added as additional argument to the node name. - This argument is added optionally when creating edges, - by setting the parameter ``after_cmd_line`` and - ``extra_edge_opts`` in ``QOSGraphEdgeOptions``. - The framework automatically adds - a comma before ``extra_edge_opts``, - because it is going to add attributes - after the destination node pointed by - the edge containing these options, and automatically - adds a space before ``after_cmd_line``, because it - adds an additional device, not an attribute. -- ``before node`` : added as additional argument to the node name. - This argument is added optionally when creating edges, - by setting the parameter ``before_cmd_line`` in - ``QOSGraphEdgeOptions``. This attribute - is going to add attributes before the destination node - pointed by the edge containing these options. It is - helpful to commands that are not node-representable, - such as ``-fdsev`` or ``-netdev``. - -While adding command line in edges is always used, not all nodes names are -used in every path walk: this is because the contained or produced ones -are already added by QEMU, so only nodes that "consumes" will be used to -build the command line. Also, nodes that will have ``{ "abstract" : true }`` -as QMP attribute will loose their command line, since they are not proper -devices to be added in QEMU. - -Example:: - - QOSGraphEdgeOptions opts = { - .before_cmd_line = "-drive id=drv0,if=none,file=null-co://," - "file.read-zeroes=on,format=raw", - .after_cmd_line = "-device scsi-hd,bus=vs0.0,drive=drv0", - - opts.extra_device_opts = "id=vs0"; - }; - - qos_node_create_driver("virtio-scsi-device", - virtio_scsi_device_create); - qos_node_consumes("virtio-scsi-device", "virtio-bus", &opts); - -Will produce the following command line: -``-drive id=drv0,if=none,file=null-co://, -device virtio-scsi-device,id=vs0 -device scsi-hd,bus=vs0.0,drive=drv0`` - Qgraph API reference -^^^^^^^^^^^^^^^^^^^^ +-------------------- .. kernel-doc:: tests/qtest/libqos/qgraph.h diff --git a/docs/devel/qom.rst b/docs/devel/qom.rst index 42d0dc4f4da..e5fe3597cd8 100644 --- a/docs/devel/qom.rst +++ b/docs/devel/qom.rst @@ -87,6 +87,14 @@ specific type: #define MY_DEVICE(obj) \ OBJECT_CHECK(MyDevice, obj, TYPE_MY_DEVICE) +In case the ObjectClass implementation can be built as module a +module_obj() line must be added to make sure qemu loads the module +when the object is needed. + +.. code-block:: c + + module_obj(TYPE_MY_DEVICE); + Class Initialization ==================== diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst index cbfc8af67e6..0454cc527e1 100644 --- a/docs/devel/secure-coding-practices.rst +++ b/docs/devel/secure-coding-practices.rst @@ -104,3 +104,12 @@ structures and only process the local copy. This prevents time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to crash when a vCPU thread modifies guest RAM while device emulation is processing it. + +Use of null-co block drivers +---------------------------- + +The ``null-co`` block driver is designed for performance: its read accesses are +not initialized by default. In case this driver has to be used for security +research, it must be used with the ``read-zeroes=on`` option which fills read +buffers with zeroes. Security issues reported with the default +(``read-zeroes=off``) will be discarded. diff --git a/docs/devel/stable-process.rst b/docs/devel/stable-process.rst index e541b983fac..c21fb86645a 100644 --- a/docs/devel/stable-process.rst +++ b/docs/devel/stable-process.rst @@ -1,3 +1,5 @@ +.. _stable-process: + QEMU and the stable process =========================== diff --git a/docs/devel/style.rst b/docs/devel/style.rst index 260e3263fa0..9c5c0fffd98 100644 --- a/docs/devel/style.rst +++ b/docs/devel/style.rst @@ -1,3 +1,5 @@ +.. _coding-style: + ================= QEMU Coding Style ================= @@ -686,7 +688,7 @@ Rationale: hex numbers are hard to read in logs when there is no 0x prefix, especially when (occasionally) the representation doesn't contain any letters and especially in one line with other decimal numbers. Number groups are allowed to not use '0x' because for some things notations like %x.%x.%x are used not -only in Qemu. Also dumping raw data bytes with '0x' is less readable. +only in QEMU. Also dumping raw data bytes with '0x' is less readable. '#' printf flag --------------- diff --git a/docs/devel/submitting-a-patch.rst b/docs/devel/submitting-a-patch.rst new file mode 100644 index 00000000000..e51259eb9ca --- /dev/null +++ b/docs/devel/submitting-a-patch.rst @@ -0,0 +1,562 @@ +.. _submitting-a-patch: + +Submitting a Patch +================== + +QEMU welcomes contributions of code (either fixing bugs or adding new +functionality). However, we get a lot of patches, and so we have some +guidelines about submitting patches. If you follow these, you'll help +make our task of code review easier and your patch is likely to be +committed faster. + +This page seems very long, so if you are only trying to post a quick +one-shot fix, the bare minimum we ask is that: + +- You **must** provide a Signed-off-by: line (this is a hard + requirement because it's how you say "I'm legally okay to contribute + this and happy for it to go into QEMU", modeled after the `Linux kernel + `__ + policy.) ``git commit -s`` or ``git format-patch -s`` will add one. +- All contributions to QEMU must be **sent as patches** to the + qemu-devel `mailing list `__. Patch contributions + should not be posted on the bug tracker, posted on forums, or + externally hosted and linked to. (We have other mailing lists too, + but all patches must go to qemu-devel, possibly with a Cc: to another + list.) ``git send-email`` (`step-by-step setup + guide `__ and `hints and + tips `__) + works best for delivering the patch without mangling it, but + attachments can be used as a last resort on a first-time submission. +- You must read replies to your message, and be willing to act on them. + Note, however, that maintainers are often willing to manually fix up + first-time contributions, since there is a learning curve involved in + making an ideal patch submission. + +You do not have to subscribe to post (list policy is to reply-to-all to +preserve CCs and keep non-subscribers in the loop on the threads they +start), although you may find it easier as a subscriber to pick up good +ideas from other posts. If you do subscribe, be prepared for a high +volume of email, often over one thousand messages in a week. The list is +moderated; first-time posts from an email address (whether or not you +subscribed) may be subject to some delay while waiting for a moderator +to whitelist your address. + +The larger your contribution is, or if you plan on becoming a long-term +contributor, then the more important the rest of this page becomes. +Reading the table of contents below should already give you an idea of +the basic requirements. Use the table of contents as a reference, and +read the parts that you have doubts about. + +.. contents:: Table of Contents + +.. _writing_your_patches: + +Writing your Patches +-------------------- + +.. _use_the_qemu_coding_style: + +Use the QEMU coding style +~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can run run *scripts/checkpatch.pl * before submitting to +check that you are in compliance with our coding standards. Be aware +that ``checkpatch.pl`` is not infallible, though, especially where C +preprocessor macros are involved; use some common sense too. See also: + +- :ref:`coding-style` +- `Automate a checkpatch run on + commit `__ + +.. _base_patches_against_current_git_master: + +Base patches against current git master +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There's no point submitting a patch which is based on a released version +of QEMU because development will have moved on from then and it probably +won't even apply to master. We only apply selected bugfixes to release +branches and then only as backports once the code has gone into master. + +It is also okay to base patches on top of other on-going work that is +not yet part of the git master branch. To aid continuous integration +tools, such as `patchew `__, you should `add a +tag `__ +line ``Based-on: $MESSAGE_ID`` to your cover letter to make the series +dependency obvious. + +.. _split_up_long_patches: + +Split up long patches +~~~~~~~~~~~~~~~~~~~~~ + +Split up longer patches into a patch series of logical code changes. +Each change should compile and execute successfully. For instance, don't +add a file to the makefile in patch one and then add the file itself in +patch two. (This rule is here so that people can later use tools like +`git bisect `__ without hitting +points in the commit history where QEMU doesn't work for reasons +unrelated to the bug they're chasing.) Put documentation first, not +last, so that someone reading the series can do a clean-room evaluation +of the documentation, then validate that the code matched the +documentation. A commit message that mentions "Also, ..." is often a +good candidate for splitting into multiple patches. For more thoughts on +properly splitting patches and writing good commit messages, see `this +advice from +OpenStack `__. + +.. _make_code_motion_patches_easy_to_review: + +Make code motion patches easy to review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a series requires large blocks of code motion, there are tricks for +making the refactoring easier to review. Split up the series so that +semantic changes (or even function renames) are done in a separate patch +from the raw code motion. Use a one-time setup of ``git config +diff.renames true;`` ``git config diff.algorithm patience`` (refer to +`git-config `__). The 'diff.renames' +property ensures file rename patches will be given in a more compact +representation that focuses only on the differences across the file +rename, instead of showing the entire old file as a deletion and the new +file as an insertion. Meanwhile, the 'diff.algorithm' property ensures +that extracting a non-contiguous subset of one file into a new file, but +where all extracted parts occur in the same order both before and after +the patch, will reduce churn in trying to treat unrelated ``}`` lines in +the original file as separating hunks of changes. + +Ideally, a code motion patch can be reviewed by doing:: + + git format-patch --stdout -1 > patch; + diff -u <(sed -n 's/^-//p' patch) <(sed -n 's/^\+//p' patch) + +to focus on the few changes that weren't wholesale code motion. + +.. _dont_include_irrelevant_changes: + +Don't include irrelevant changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In particular, don't include formatting, coding style or whitespace +changes to bits of code that would otherwise not be touched by the +patch. (It's OK to fix coding style issues in the immediate area (few +lines) of the lines you're changing.) If you think a section of code +really does need a reindent or other large-scale style fix, submit this +as a separate patch which makes no semantic changes; don't put it in the +same patch as your bug fix. + +For smaller patches in less frequently changed areas of QEMU, consider +using the :ref:`trivial-patches` process. + +.. _write_a_meaningful_commit_message: + +Write a meaningful commit message +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Commit messages should be meaningful and should stand on their own as a +historical record of why the changes you applied were necessary or +useful. + +QEMU follows the usual standard for git commit messages: the first line +(which becomes the email subject line) is "subsystem: single line +summary of change". Whether the "single line summary of change" starts +with a capital is a matter of taste, but we prefer that the summary does +not end in a dot. Look at ``git shortlog -30`` for an idea of sample +subject lines. Then there is a blank line and a more detailed +description of the patch, another blank and your Signed-off-by: line. +Please do not use lines that are longer than 76 characters in your +commit message (so that the text still shows up nicely with "git show" +in a 80-columns terminal window). + +The body of the commit message is a good place to document why your +change is important. Don't include comments like "This is a suggestion +for fixing this bug" (they can go below the ``---`` line in the email so +they don't go into the final commit message). Make sure the body of the +commit message can be read in isolation even if the reader's mailer +displays the subject line some distance apart (that is, a body that +starts with "... so that" as a continuation of the subject line is +harder to follow). + +If your patch fixes a commit that is already in the repository, please +add an additional line with "Fixes: +("Fixed commit subject")" below the patch description / before your +"Signed-off-by:" line in the commit message. + +If your patch fixes a bug in the gitlab bug tracker, please add a line +with "Resolves: " to the commit message, too. Gitlab can +close bugs automatically once commits with the "Resolved:" keyword get +merged into the master branch of the project. And if your patch addresses +a bug in another public bug tracker, you can also use a line with +"Buglink: " for reference here, too. + +Example:: + + Fixes: 14055ce53c2d ("s390x/tcg: avoid overflows in time2tod/tod2time") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/42 + Buglink: https://bugs.launchpad.net/qemu/+bug/1804323`` + +Some other tags that are used in commit messages include "Message-Id:" +"Tested-by:", "Acked-by:", "Reported-by:", "Suggested-by:". See ``git +log`` for these keywords for example usage. + +.. _test_your_patches: + +Test your patches +~~~~~~~~~~~~~~~~~ + +Although QEMU has `continuous integration +services `__ that attempt to test +patches submitted to the list, it still saves everyone time if you have +already tested that your patch compiles and works. Because QEMU is such +a large project, it's okay to use configure arguments to limit what is +built for faster turnaround during your development time; but it is +still wise to also check that your patches work with a full build before +submitting a series, especially if your changes might have an unintended +effect on other areas of the code you don't normally experiment with. +See `Testing `__ for more details on what tests are available. +Also, it is a wise idea to include a testsuite addition as part of your +patches - either to ensure that future changes won't regress your new +feature, or to add a test which exposes the bug that the rest of your +series fixes. Keeping separate commits for the test and the fix allows +reviewers to rebase the test to occur first to prove it catches the +problem, then again to place it last in the series so that bisection +doesn't land on a known-broken state. + +.. _submitting_your_patches: + +Submitting your Patches +----------------------- + +.. _if_you_cannot_send_patch_emails: + +If you cannot send patch emails +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In rare cases it may not be possible to send properly formatted patch +emails. You can use `sourcehut `__ to send your +patches to the QEMU mailing list by following these steps: + +#. Register or sign in to your account +#. Add your SSH public key in `meta \| + keys `__. +#. Publish your git branch using **git push git@git.sr.ht:~USERNAME/qemu + HEAD** +#. Send your patches to the QEMU mailing list using the web-based + ``git-send-email`` UI at https://git.sr.ht/~USERNAME/qemu/send-email + +`This video +`__ +shows the web-based ``git-send-email`` workflow. Documentation is +available `here +`__. + +.. _cc_the_relevant_maintainer: + +CC the relevant maintainer +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send patches both to the mailing list and CC the maintainer(s) of the +files you are modifying. look in the MAINTAINERS file to find out who +that is. Also try using scripts/get_maintainer.pl from the repository +for learning the most common committers for the files you touched. + +Example:: + + ~/src/qemu/scripts/get_maintainer.pl -f hw/ide/core.c + +In fact, you can automate this, via a one-time setup of ``git config +sendemail.cccmd 'scripts/get_maintainer.pl --nogit-fallback'`` (Refer to +`git-config `__.) + +.. _do_not_send_as_an_attachment: + +Do not send as an attachment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send patches inline so they are easy to reply to with review comments. +Do not put patches in attachments. + +.. _use_git_format_patch: + +Use ``git format-patch`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the right diff format. +`git format-patch `__ will +produce patch emails in the right format (check the documentation to +find out how to drive it). You can then edit the cover letter before +using ``git send-email`` to mail the files to the mailing list. (We +recommend `git send-email `__ +because mail clients often mangle patches by wrapping long lines or +messing up whitespace. Some distributions do not include send-email in a +default install of git; you may need to download additional packages, +such as 'git-email' on Fedora-based systems.) Patch series need a cover +letter, with shallow threading (all patches in the series are +in-reply-to the cover letter, but not to each other); single unrelated +patches do not need a cover letter (but if you do send a cover letter, +use ``--numbered`` so the cover and the patch have distinct subject lines). +Patches are easier to find if they start a new top-level thread, rather +than being buried in-reply-to another existing thread. + +.. _avoid_posting_large_binary_blob: + +Avoid posting large binary blob +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you added binaries to the repository, consider producing the patch +emails using ``git format-patch --no-binary`` and include a link to a +git repository to fetch the original commit. + +.. _patch_emails_must_include_a_signed_off_by_line: + +Patch emails must include a ``Signed-off-by:`` line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For more information see `SubmittingPatches 1.12 +`__. +This is vital or we will not be able to apply your patch! Please use +your real name to sign a patch (not an alias or acronym). + +If you wrote the patch, make sure your "From:" and "Signed-off-by:" +lines use the same spelling. It's okay if you subscribe or contribute to +the list via more than one address, but using multiple addresses in one +commit just confuses things. If someone else wrote the patch, git will +include a "From:" line in the body of the email (different from your +envelope From:) that will give credit to the correct author; but again, +that author's Signed-off-by: line is mandatory, with the same spelling. + +.. _include_a_meaningful_cover_letter: + +Include a meaningful cover letter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a requirement for any series with multiple patches (as it aids +continuous integration), but optional for an isolated patch. The cover +letter explains the overall goal of such a series, and also provides a +convenient 0/N email for others to reply to the series as a whole. A +one-time setup of ``git config format.coverletter auto`` (refer to +`git-config `__) will generate the +cover letter as needed. + +When reviewers don't know your goal at the start of their review, they +may object to early changes that don't make sense until the end of the +series, because they do not have enough context yet at that point of +their review. A series where the goal is unclear also risks a higher +number of review-fix cycles because the reviewers haven't bought into +the idea yet. If the cover letter can explain these points to the +reviewer, the process will be smoother patches will get merged faster. +Make sure your cover letter includes a diffstat of changes made over the +entire series; potential reviewers know what files they are interested +in, and they need an easy way determine if your series touches them. + +.. _use_the_rfc_tag_if_needed: + +Use the RFC tag if needed +~~~~~~~~~~~~~~~~~~~~~~~~~ + +For example, "[PATCH RFC v2]". ``git format-patch --subject-prefix=RFC`` +can help. + +"RFC" means "Request For Comments" and is a statement that you don't +intend for your patchset to be applied to master, but would like some +review on it anyway. Reasons for doing this include: + +- the patch depends on some pending kernel changes which haven't yet + been accepted, so the QEMU patch series is blocked until that + dependency has been dealt with, but is worth reviewing anyway +- the patch set is not finished yet (perhaps it doesn't cover all use + cases or work with all targets) but you want early review of a major + API change or design structure before continuing + +In general, since it's asking other people to do review work on a +patchset that the submitter themselves is saying shouldn't be applied, +it's best to: + +- use it sparingly +- in the cover letter, be clear about why a patch is an RFC, what areas + of the patchset you're looking for review on, and why reviewers + should care + +.. _consider_whether_your_patch_is_applicable_for_stable: + +Consider whether your patch is applicable for stable +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your patch fixes a severe issue or a regression, it may be applicable +for stable. In that case, consider adding ``Cc: qemu-stable@nongnu.org`` +to your patch to notify the stable maintainers. + +For more details on how QEMU's stable process works, refer to the +:ref:`stable-process` page. + +.. _participating_in_code_review: + +Participating in Code Review +---------------------------- + +All patches submitted to the QEMU project go through a code review +process before they are accepted. Some areas of code that are well +maintained may review patches quickly, lesser-loved areas of code may +have a longer delay. + +.. _stay_around_to_fix_problems_raised_in_code_review: + +Stay around to fix problems raised in code review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Not many patches get into QEMU straight away -- it is quite common that +developers will identify bugs, or suggest a cleaner approach, or even +just point out code style issues or commit message typos. You'll need to +respond to these, and then send a second version of your patches with +the issues fixed. This takes a little time and effort on your part, but +if you don't do it then your changes will never get into QEMU. It's also +just polite -- it is quite disheartening for a developer to spend time +reviewing your code and suggesting improvements, only to find that +you're not going to do anything further and it was all wasted effort. + +When replying to comments on your patches **reply to all and not just +the sender** -- keeping discussion on the mailing list means everybody +can follow it. + +.. _pay_attention_to_review_comments: + +Pay attention to review comments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Someone took their time to review your work, and it pays to respect that +effort; repeatedly submitting a series without addressing all comments +from the previous round tends to alienate reviewers and stall your +patch. Reviewers aren't always perfect, so it is okay if you want to +argue that your code was correct in the first place instead of blindly +doing everything the reviewer asked. On the other hand, if someone +pointed out a potential issue during review, then even if your code +turns out to be correct, it's probably a sign that you should improve +your commit message and/or comments in the code explaining why the code +is correct. + +If you fix issues that are raised during review **resend the entire +patch series** not just the one patch that was changed. This allows +maintainers to easily apply the fixed series without having to manually +identify which patches are relevant. Send the new version as a complete +fresh email or series of emails -- don't try to make it a followup to +version 1. (This helps automatic patch email handling tools distinguish +between v1 and v2 emails.) + +.. _when_resending_patches_add_a_version_tag: + +When resending patches add a version tag +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All patches beyond the first version should include a version tag -- for +example, "[PATCH v2]". This means people can easily identify whether +they're looking at the most recent version. (The first version of a +patch need not say "v1", just [PATCH] is sufficient.) For patch series, +the version applies to the whole series -- even if you only change one +patch, you resend the entire series and mark it as "v2". Don't try to +track versions of different patches in the series separately. `git +format-patch `__ and `git +send-email `__ both understand +the ``-v2`` option to make this easier. Send each new revision as a new +top-level thread, rather than burying it in-reply-to an earlier +revision, as many reviewers are not looking inside deep threads for new +patches. + +.. _include_version_history_in_patchset_revisions: + +Include version history in patchset revisions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For later versions of patches, include a summary of changes from +previous versions, but not in the commit message itself. In an email +formatted as a git patch, the commit message is the part above the ``---`` +line, and this will go into the git changelog when the patch is +committed. This part should be a self-contained description of what this +version of the patch does, written to make sense to anybody who comes +back to look at this commit in git in six months' time. The part below +the ``---`` line and above the patch proper (git format-patch puts the +diffstat here) is a good place to put remarks for people reading the +patch email, and this is where the "changes since previous version" +summary belongs. The `git-publish +`__ script can help with +tracking a good summary across versions. Also, the `git-backport-diff +`__ script can help focus +reviewers on what changed between revisions. + +.. _tips_and_tricks: + +Tips and Tricks +--------------- + +.. _proper_use_of_reviewed_by_tags_can_aid_review: + +Proper use of Reviewed-by: tags can aid review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When reviewing a large series, a reviewer can reply to some of the +patches with a Reviewed-by tag, stating that they are happy with that +patch in isolation (sometimes conditional on minor cleanup, like fixing +whitespace, that doesn't affect code content). You should then update +those commit messages by hand to include the Reviewed-by tag, so that in +the next revision, reviewers can spot which patches were already clean +from the previous round. Conversely, if you significantly modify a patch +that was previously reviewed, remove the reviewed-by tag out of the +commit message, as well as listing the changes from the previous +version, to make it easier to focus a reviewer's attention to your +changes. + +.. _if_your_patch_seems_to_have_been_ignored: + +If your patch seems to have been ignored +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your patchset has received no replies you should "ping" it after a +week or two, by sending an email as a reply-to-all to the patch mail, +including the word "ping" and ideally also a link to the page for the +patch on `patchew `__ or +`lore.kernel.org `__. It's worth +double-checking for reasons why your patch might have been ignored +(forgot to CC the maintainer? annoyed people by failing to respond to +review comments on an earlier version?), but often for less-maintained +areas of QEMU patches do just slip through the cracks. If your ping is +also ignored, ping again after another week or so. As the submitter, you +are the person with the most motivation to get your patch applied, so +you have to be persistent. + +.. _is_my_patch_in: + +Is my patch in? +~~~~~~~~~~~~~~~ + +QEMU has some Continuous Integration machines that try to catch patch +submission problems as soon as possible. `patchew +`__ includes a web interface for tracking the +status of various threads that have been posted to the list, and may +send you an automated mail if it detected a problem with your patch. + +Once your patch has had enough review on list, the maintainer for that +area of code will send notification to the list that they are including +your patch in a particular staging branch. Periodically, the maintainer +then takes care of :ref:`submitting-a-pull-request` +for aggregating topic branches into mainline QEMU. Generally, you do not +need to send a pull request unless you have contributed enough patches +to become a maintainer over a particular section of code. Maintainers +may further modify your commit, by resolving simple merge conflicts or +fixing minor typos pointed out during review, but will always add a +Signed-off-by line in addition to yours, indicating that it went through +their tree. Occasionally, the maintainer's pull request may hit more +difficult merge conflicts, where you may be requested to help rebase and +resolve the problems. It may take a couple of weeks between when your +patch first had a positive review to when it finally lands in qemu.git; +release cycle freezes may extend that time even longer. + +.. _return_the_favor: + +Return the favor +~~~~~~~~~~~~~~~~ + +Peer review only works if everyone chips in a bit of review time. If +everyone submitted more patches than they reviewed, we would have a +patch backlog. A good goal is to try to review at least as many patches +from others as what you submit. Don't worry if you don't know the code +base as well as a maintainer; it's perfectly fine to admit when your +review is weak because you are unfamiliar with the code. diff --git a/docs/devel/submitting-a-pull-request.rst b/docs/devel/submitting-a-pull-request.rst new file mode 100644 index 00000000000..c9d1e8afd91 --- /dev/null +++ b/docs/devel/submitting-a-pull-request.rst @@ -0,0 +1,77 @@ +.. _submitting-a-pull-request: + +Submitting a Pull Request +========================= + +QEMU welcomes contributions of code, but we generally expect these to be +sent as simple patch emails to the mailing list (see our page on +:ref:`submitting-a-patch` +for more details). Generally only existing submaintainers of a tree +will need to submit pull requests, although occasionally for a large +patch series we might ask a submitter to send a pull request. This page +documents our recommendations on pull requests for those people. + +A good rule of thumb is not to send a pull request unless somebody asks +you to. + +**Resend the patches with the pull request** as emails which are +threaded as follow-ups to the pull request itself. The simplest way to +do this is to use ``git format-patch --cover-letter`` to create the +emails, and then edit the cover letter to include the pull request +details that ``git request-pull`` outputs. + +**Use PULL as the subject line tag** in both the cover letter and the +retransmitted patch mails (for example, by using +``--subject-prefix=PULL`` in your ``git format-patch`` command). This +helps people to filter in or out the resulting emails (especially useful +if they are only CC'd on one email out of the set). + +**Each patch must have your own Signed-off-by: line** as well as that of +the original author if the patch was not written by you. This is because +with a pull request you're now indicating that the patch has passed via +you rather than directly from the original author. + +**Don't forget to add Reviewed-by: and Acked-by: lines**. When other +people have reviewed the patches you're putting in the pull request, +make sure you've copied their signoffs across. (If you use the `patches +tool `__ to add patches from email +directly to your git repo it will include the tags automatically; if +you're updating patches manually or in some other way you'll need to +edit the commit messages by hand.) + +**Don't send pull requests for code that hasn't passed review**. A pull +request says these patches are ready to go into QEMU now, so they must +have passed the standard code review processes. In particular if you've +corrected issues in one round of code review, you need to send your +fixed patch series as normal to the list; you can't put it in a pull +request until it's gone through. (Extremely trivial fixes may be OK to +just fix in passing, but if in doubt err on the side of not.) + +**Test before sending**. This is an obvious thing to say, but make sure +everything builds (including that it compiles at each step of the patch +series) and that "make check" passes before sending out the pull +request. As a submaintainer you're one of QEMU's lines of defense +against bad code, so double check the details. + +**All pull requests must be signed**. If your key is not already signed +by members of the QEMU community, you should make arrangements to attend +a `KeySigningParty `__ (for +example at KVM Forum) or make alternative arrangements to have your key +signed by an attendee. Key signing requires meeting another community +member \*in person\* so please make appropriate arrangements. By +"signed" here we mean that the pullreq email should quote a tag which is +a GPG-signed tag (as created with 'gpg tag -s ...'). + +**Pull requests not for master should say "not for master" and have +"PULL SUBSYSTEM whatever" in the subject tag**. If your pull request is +targeting a stable branch or some submaintainer tree, please include the +string "not for master" in the cover letter email, and make sure the +subject tag is "PULL SUBSYSTEM s390/block/whatever" rather than just +"PULL". This allows it to be automatically filtered out of the set of +pull requests that should be applied to master. + +You might be interested in the `make-pullreq +`__ +script which automates some of this process for you and includes a few +sanity checks. Note that you must edit it to configure it suitably for +your local situation! diff --git a/docs/devel/tcg-icount.rst b/docs/devel/tcg-icount.rst index 8d67b6c076a..50c8e8dabc1 100644 --- a/docs/devel/tcg-icount.rst +++ b/docs/devel/tcg-icount.rst @@ -92,6 +92,3 @@ When the translator is handling an instruction of this kind: } * it must end the TB immediately after this instruction - -Note that some older front-ends call a "gen_io_end()" function: -this is obsolete and should not be used. diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst index 18c6581d85c..f93ef4fe52a 100644 --- a/docs/devel/tcg-plugins.rst +++ b/docs/devel/tcg-plugins.rst @@ -3,7 +3,6 @@ Copyright (c) 2019, Linaro Limited Written by Emilio Cota and Alex Bennée -================ QEMU TCG Plugins ================ @@ -16,8 +15,30 @@ only monitor it passively. However they can do this down to an individual instruction granularity including potentially subscribing to all load and store operations. -API Stability -============= +Usage +----- + +Any QEMU binary with TCG support has plugins enabled by default. +Earlier releases needed to be explicitly enabled with:: + + configure --enable-plugins + +Once built a program can be run with multiple plugins loaded each with +their own arguments:: + + $QEMU $OTHER_QEMU_ARGS \ + -plugin tests/plugin/libhowvec.so,inline=on,count=hint \ + -plugin tests/plugin/libhotblocks.so + +Arguments are plugin specific and can be used to modify their +behaviour. In this case the howvec plugin is being asked to use inline +ops to count and break down the hint instructions by type. + +Writing plugins +--------------- + +API versioning +~~~~~~~~~~~~~~ This is a new feature for QEMU and it does allow people to develop out-of-tree plugins that can be dynamically linked into a running QEMU @@ -25,36 +46,23 @@ process. However the project reserves the right to change or break the API should it need to do so. The best way to avoid this is to submit your plugin upstream so they can be updated if/when the API changes. -API versioning --------------- - All plugins need to declare a symbol which exports the plugin API version they were built against. This can be done simply by:: QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; The core code will refuse to load a plugin that doesn't export a -`qemu_plugin_version` symbol or if plugin version is outside of QEMU's +``qemu_plugin_version`` symbol or if plugin version is outside of QEMU's supported range of API versions. -Additionally the `qemu_info_t` structure which is passed to the -`qemu_plugin_install` method of a plugin will detail the minimum and +Additionally the ``qemu_info_t`` structure which is passed to the +``qemu_plugin_install`` method of a plugin will detail the minimum and current API versions supported by QEMU. The API version will be incremented if new APIs are added. The minimum API version will be incremented if existing APIs are changed or removed. -Exposure of QEMU internals --------------------------- - -The plugin architecture actively avoids leaking implementation details -about how QEMU's translation works to the plugins. While there are -conceptions such as translation time and translation blocks the -details are opaque to plugins. The plugin is able to query select -details of instructions and system configuration only through the -exported *qemu_plugin* functions. - -Query Handle Lifetime ---------------------- +Lifetime of the query handle +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each callback provides an opaque anonymous information handle which can usually be further queried to find out information about a @@ -63,31 +71,8 @@ valid during the lifetime of the callback so it is important that any information that is needed is extracted during the callback and saved by the plugin. -API -=== - -.. kernel-doc:: include/qemu/qemu-plugin.h - -Usage -===== - -The QEMU binary needs to be compiled for plugin support:: - - configure --enable-plugins - -Once built a program can be run with multiple plugins loaded each with -their own arguments:: - - $QEMU $OTHER_QEMU_ARGS \ - -plugin tests/plugin/libhowvec.so,arg=inline,arg=hint \ - -plugin tests/plugin/libhotblocks.so - -Arguments are plugin specific and can be used to modify their -behaviour. In this case the howvec plugin is being asked to use inline -ops to count and break down the hint instructions by type. - -Plugin Life cycle -================= +Plugin life cycle +~~~~~~~~~~~~~~~~~ First the plugin is loaded and the public qemu_plugin_install function is called. The plugin will then register callbacks for various plugin @@ -110,11 +95,26 @@ callback which can then ensure atomicity itself. Finally when QEMU exits all the registered *atexit* callbacks are invoked. +Exposure of QEMU internals +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The plugin architecture actively avoids leaking implementation details +about how QEMU's translation works to the plugins. While there are +conceptions such as translation time and translation blocks the +details are opaque to plugins. The plugin is able to query select +details of instructions and system configuration only through the +exported *qemu_plugin* functions. + +API +~~~ + +.. kernel-doc:: include/qemu/qemu-plugin.h + Internals -========= +--------- Locking -------- +~~~~~~~ We have to ensure we cannot deadlock, particularly under MTTCG. For this we acquire a lock when called from plugin code. We also keep the @@ -141,16 +141,16 @@ requested. The plugin isn't completely uninstalled until the safe work has executed while all vCPUs are quiescent. Example Plugins -=============== +--------------- There are a number of plugins included with QEMU and you are encouraged to contribute your own plugins plugins upstream. There is a -`contrib/plugins` directory where they can go. +``contrib/plugins`` directory where they can go. - tests/plugins These are some basic plugins that are used to test and exercise the -API during the `make check-tcg` target. +API during the ``make check-tcg`` target. - contrib/plugins/hotblocks.c @@ -162,7 +162,7 @@ with linux-user execution as system emulation tends to generate re-translations as blocks from different programs get swapped in and out of system memory. -If your program is single-threaded you can use the `inline` option for +If your program is single-threaded you can use the ``inline`` option for slightly faster (but not thread safe) counters. Example:: @@ -192,17 +192,32 @@ Similar to hotblocks but this time tracks memory accesses:: 0x0000000048b000, 0x0001, 130594, 0x0001, 355 0x0000000048a000, 0x0001, 1826, 0x0001, 11 +The hotpages plugin can be configured using the following arguments: + + * sortby=reads|writes|address + + Log the data sorted by either the number of reads, the number of writes, or + memory address. (Default: entries are sorted by the sum of reads and writes) + + * io=on + + Track IO addresses. Only relevant to full system emulation. (Default: off) + + * pagesize=N + + The page size used. (Default: N = 4096) + - contrib/plugins/howvec.c This is an instruction classifier so can be used to count different types of instructions. It has a number of options to refine which get -counted. You can give an argument for a class of instructions to break -it down fully, so for example to see all the system registers -accesses:: +counted. You can give a value to the ``count`` argument for a class of +instructions to break it down fully, so for example to see all the system +registers accesses:: ./aarch64-softmmu/qemu-system-aarch64 $(QEMU_ARGS) \ -append "root=/dev/sda2 systemd.unit=benchmark.service" \ - -smp 4 -plugin ./contrib/plugins/libhowvec.so,arg=sreg -d plugin + -smp 4 -plugin ./contrib/plugins/libhowvec.so,count=sreg -d plugin which will lead to a sorted list after the class breakdown:: @@ -250,7 +265,7 @@ which will lead to a sorted list after the class breakdown:: ... To find the argument shorthand for the class you need to examine the -source code of the plugin at the moment, specifically the `*opt` +source code of the plugin at the moment, specifically the ``*opt`` argument in the InsnClassExecCount tables. - contrib/plugins/lockstep.c @@ -270,7 +285,7 @@ communicate over:: ./sparc-softmmu/qemu-system-sparc -monitor none -parallel none \ -net none -M SS-20 -m 256 -kernel day11/zImage.elf \ - -plugin ./contrib/plugins/liblockstep.so,arg=lockstep-sparc.sock \ + -plugin ./contrib/plugins/liblockstep.so,sockpath=lockstep-sparc.sock \ -d plugin,nochain which will eventually report:: @@ -285,27 +300,27 @@ which will eventually report:: previously @ 0x000000ffd08098/5 (809900593 insns) previously @ 0x000000ffd080c0/1 (809900588 insns) -- contrib/plugins/hwprofile +- contrib/plugins/hwprofile.c The hwprofile tool can only be used with system emulation and allows the user to see what hardware is accessed how often. It has a number of options: - * arg=read or arg=write + * track=read or track=write By default the plugin tracks both reads and writes. You can use one of these options to limit the tracking to just one class of accesses. - * arg=source + * source Will include a detailed break down of what the guest PC that made the - access was. Not compatible with arg=pattern. Example output:: + access was. Not compatible with the pattern option. Example output:: cirrus-low-memory @ 0xfffffd00000a0000 pc:fffffc0000005cdc, 1, 256 pc:fffffc0000005ce8, 1, 256 pc:fffffc0000005cec, 1, 256 - * arg=pattern + * pattern Instead break down the accesses based on the offset into the HW region. This can be useful for seeing the most used registers of a @@ -319,3 +334,105 @@ the user to see what hardware is accessed how often. It has a number of options: off:0000001c, 1, 2 off:00000020, 1, 2 ... + +- contrib/plugins/execlog.c + +The execlog tool traces executed instructions with memory access. It can be used +for debugging and security analysis purposes. +Please be aware that this will generate a lot of output. + +The plugin takes no argument:: + + qemu-system-arm $(QEMU_ARGS) \ + -plugin ./contrib/plugins/libexeclog.so -d plugin + +which will output an execution trace following this structure:: + + # vCPU, vAddr, opcode, disassembly[, load/store, memory addr, device]... + 0, 0xa12, 0xf8012400, "movs r4, #0" + 0, 0xa14, 0xf87f42b4, "cmp r4, r6" + 0, 0xa16, 0xd206, "bhs #0xa26" + 0, 0xa18, 0xfff94803, "ldr r0, [pc, #0xc]", load, 0x00010a28, RAM + 0, 0xa1a, 0xf989f000, "bl #0xd30" + 0, 0xd30, 0xfff9b510, "push {r4, lr}", store, 0x20003ee0, RAM, store, 0x20003ee4, RAM + 0, 0xd32, 0xf9893014, "adds r0, #0x14" + 0, 0xd34, 0xf9c8f000, "bl #0x10c8" + 0, 0x10c8, 0xfff96c43, "ldr r3, [r0, #0x44]", load, 0x200000e4, RAM + +- contrib/plugins/cache.c + +Cache modelling plugin that measures the performance of a given L1 cache +configuration, and optionally a unified L2 per-core cache when a given working +set is run:: + + qemu-x86_64 -plugin ./contrib/plugins/libcache.so \ + -d plugin -D cache.log ./tests/tcg/x86_64-linux-user/float_convs + +will report the following:: + + core #, data accesses, data misses, dmiss rate, insn accesses, insn misses, imiss rate + 0 996695 508 0.0510% 2642799 18617 0.7044% + + address, data misses, instruction + 0x424f1e (_int_malloc), 109, movq %rax, 8(%rcx) + 0x41f395 (_IO_default_xsputn), 49, movb %dl, (%rdi, %rax) + 0x42584d (ptmalloc_init.part.0), 33, movaps %xmm0, (%rax) + 0x454d48 (__tunables_init), 20, cmpb $0, (%r8) + ... + + address, fetch misses, instruction + 0x4160a0 (__vfprintf_internal), 744, movl $1, %ebx + 0x41f0a0 (_IO_setb), 744, endbr64 + 0x415882 (__vfprintf_internal), 744, movq %r12, %rdi + 0x4268a0 (__malloc), 696, andq $0xfffffffffffffff0, %rax + ... + +The plugin has a number of arguments, all of them are optional: + + * limit=N + + Print top N icache and dcache thrashing instructions along with their + address, number of misses, and its disassembly. (default: 32) + + * icachesize=N + * iblksize=B + * iassoc=A + + Instruction cache configuration arguments. They specify the cache size, block + size, and associativity of the instruction cache, respectively. + (default: N = 16384, B = 64, A = 8) + + * dcachesize=N + * dblksize=B + * dassoc=A + + Data cache configuration arguments. They specify the cache size, block size, + and associativity of the data cache, respectively. + (default: N = 16384, B = 64, A = 8) + + * evict=POLICY + + Sets the eviction policy to POLICY. Available policies are: :code:`lru`, + :code:`fifo`, and :code:`rand`. The plugin will use the specified policy for + both instruction and data caches. (default: POLICY = :code:`lru`) + + * cores=N + + Sets the number of cores for which we maintain separate icache and dcache. + (default: for linux-user, N = 1, for full system emulation: N = cores + available to guest) + + * l2=on + + Simulates a unified L2 cache (stores blocks for both instructions and data) + using the default L2 configuration (cache size = 2MB, associativity = 16-way, + block size = 64B). + + * l2cachesize=N + * l2blksize=B + * l2assoc=A + + L2 cache configuration arguments. They specify the cache size, block size, and + associativity of the L2 cache, respectively. Setting any of the L2 + configuration arguments implies ``l2=on``. + (default: N = 2097152 (2MB), B = 64, A = 16) diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst index 4ebde44b9d7..a65fb7b1c44 100644 --- a/docs/devel/tcg.rst +++ b/docs/devel/tcg.rst @@ -11,13 +11,14 @@ performances. QEMU's dynamic translation backend is called TCG, for "Tiny Code Generator". For more information, please take a look at ``tcg/README``. -Some notable features of QEMU's dynamic translator are: +The following sections outline some notable features and implementation +details of QEMU's dynamic translator. CPU state optimisations ----------------------- -The target CPUs have many internal states which change the way it -evaluates instructions. In order to achieve a good speed, the +The target CPUs have many internal states which change the way they +evaluate instructions. In order to achieve a good speed, the translation phase considers that some state information of the virtual CPU cannot change in it. The state is recorded in the Translation Block (TB). If the state changes (e.g. privilege level), a new TB will @@ -31,17 +32,95 @@ Direct block chaining --------------------- After each translated basic block is executed, QEMU uses the simulated -Program Counter (PC) and other cpu state information (such as the CS +Program Counter (PC) and other CPU state information (such as the CS segment base value) to find the next basic block. -In order to accelerate the most common cases where the new simulated PC -is known, QEMU can patch a basic block so that it jumps directly to the -next one. - -The most portable code uses an indirect jump. An indirect jump makes -it easier to make the jump target modification atomic. On some host -architectures (such as x86 or PowerPC), the ``JUMP`` opcode is -directly patched so that the block chaining has no overhead. +In its simplest, less optimized form, this is done by exiting from the +current TB, going through the TB epilogue, and then back to the +main loop. That’s where QEMU looks for the next TB to execute, +translating it from the guest architecture if it isn’t already available +in memory. Then QEMU proceeds to execute this next TB, starting at the +prologue and then moving on to the translated instructions. + +Exiting from the TB this way will cause the ``cpu_exec_interrupt()`` +callback to be re-evaluated before executing additional instructions. +It is mandatory to exit this way after any CPU state changes that may +unmask interrupts. + +In order to accelerate the cases where the TB for the new +simulated PC is already available, QEMU has mechanisms that allow +multiple TBs to be chained directly, without having to go back to the +main loop as described above. These mechanisms are: + +``lookup_and_goto_ptr`` +^^^^^^^^^^^^^^^^^^^^^^^ + +Calling ``tcg_gen_lookup_and_goto_ptr()`` will emit a call to +``helper_lookup_tb_ptr``. This helper will look for an existing TB that +matches the current CPU state. If the destination TB is available its +code address is returned, otherwise the address of the JIT epilogue is +returned. The call to the helper is always followed by the tcg ``goto_ptr`` +opcode, which branches to the returned address. In this way, we either +branch to the next TB or return to the main loop. + +``goto_tb + exit_tb`` +^^^^^^^^^^^^^^^^^^^^^ + +The translation code usually implements branching by performing the +following steps: + +1. Call ``tcg_gen_goto_tb()`` passing a jump slot index (either 0 or 1) + as a parameter. + +2. Emit TCG instructions to update the CPU state with any information + that has been assumed constant and is required by the main loop to + correctly locate and execute the next TB. For most guests, this is + just the PC of the branch destination, but others may store additional + data. The information updated in this step must be inferable from both + ``cpu_get_tb_cpu_state()`` and ``cpu_restore_state()``. + +3. Call ``tcg_gen_exit_tb()`` passing the address of the current TB and + the jump slot index again. + +Step 1, ``tcg_gen_goto_tb()``, will emit a ``goto_tb`` TCG +instruction that later on gets translated to a jump to an address +associated with the specified jump slot. Initially, this is the address +of step 2's instructions, which update the CPU state information. Step 3, +``tcg_gen_exit_tb()``, exits from the current TB returning a tagged +pointer composed of the last executed TB’s address and the jump slot +index. + +The first time this whole sequence is executed, step 1 simply jumps +to step 2. Then the CPU state information gets updated and we exit from +the current TB. As a result, the behavior is very similar to the less +optimized form described earlier in this section. + +Next, the main loop looks for the next TB to execute using the +current CPU state information (creating the TB if it wasn’t already +available) and, before starting to execute the new TB’s instructions, +patches the previously executed TB by associating one of its jump +slots (the one specified in the call to ``tcg_gen_exit_tb()``) with the +address of the new TB. + +The next time this previous TB is executed and we get to that same +``goto_tb`` step, it will already be patched (assuming the destination TB +is still in memory) and will jump directly to the first instruction of +the destination TB, without going back to the main loop. + +For the ``goto_tb + exit_tb`` mechanism to be used, the following +conditions need to be satisfied: + +* The change in CPU state must be constant, e.g., a direct branch and + not an indirect branch. + +* The direct branch cannot cross a page boundary. Memory mappings + may change, causing the code at the destination address to change. + +Note that, on step 3 (``tcg_gen_exit_tb()``), in addition to the +jump slot index, the address of the TB just executed is also returned. +This address corresponds to the TB that will be patched; it may be +different than the one that was directly executed from the main loop +if the latter had already been chained to other TBs. Self-modifying code and translated code invalidation ---------------------------------------------------- diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst index 1da4c4e4c4e..755343c7dd0 100644 --- a/docs/devel/testing.rst +++ b/docs/devel/testing.rst @@ -1,11 +1,10 @@ -=============== Testing in QEMU =============== This document describes the testing infrastructure in QEMU. Testing with "make check" -========================= +------------------------- The "make check" testing family includes most of the C based tests in QEMU. For a quick help, run ``make check-help`` from the source tree. @@ -24,7 +23,7 @@ expect the executables to exist and will fail with obscure messages if they cannot find them. Unit tests ----------- +~~~~~~~~~~ Unit tests, which can be invoked with ``make check-unit``, are simple C tests that typically link to individual QEMU object files and exercise them by @@ -67,7 +66,7 @@ and copy the actual command line which executes the unit test, then run it from the command line. QTest ------ +~~~~~ QTest is a device emulation testing framework. It can be very useful to test device models; it could also control certain aspects of QEMU (such as virtual @@ -81,7 +80,7 @@ QTest cases can be executed with make check-qtest QAPI schema tests ------------------ +~~~~~~~~~~~~~~~~~ The QAPI schema tests validate the QAPI parser used by QMP, by feeding predefined input to the parser and comparing the result with the reference @@ -108,33 +107,14 @@ parser (either fixing a bug or extending/modifying the syntax). To do this: ``qapi-schema += foo.json`` check-block ------------ +~~~~~~~~~~~ ``make check-block`` runs a subset of the block layer iotests (the tests that are in the "auto" group). See the "QEMU iotests" section below for more information. -GCC gcov support ----------------- - -``gcov`` is a GCC tool to analyze the testing coverage by -instrumenting the tested code. To use it, configure QEMU with -``--enable-gcov`` option and build. Then run ``make check`` as usual. - -If you want to gather coverage information on a single test the ``make -clean-gcda`` target can be used to delete any existing coverage -information before running a single test. - -You can generate a HTML coverage report by executing ``make -coverage-html`` which will create -``meson-logs/coveragereport/index.html``. - -Further analysis can be conducted by running the ``gcov`` command -directly on the various .gcda output files. Please read the ``gcov`` -documentation for more information. - QEMU iotests -============ +------------ QEMU iotests, under the directory ``tests/qemu-iotests``, is the testing framework widely used to test block layer related features. It is higher level @@ -171,7 +151,7 @@ More options are supported by the ``./check`` script, run ``./check -h`` for help. Writing a new test case ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ Consider writing a tests case when you are making any changes to the block layer. An iotest case is usually the choice for that. There are already many @@ -224,8 +204,38 @@ another application on the host may have locked the file, possibly leading to a test failure. If using such devices are explicitly desired, consider adding ``locking=off`` option to disable image locking. +Debugging a test case +~~~~~~~~~~~~~~~~~~~~~ + +The following options to the ``check`` script can be useful when debugging +a failing test: + +* ``-gdb`` wraps every QEMU invocation in a ``gdbserver``, which waits for a + connection from a gdb client. The options given to ``gdbserver`` (e.g. the + address on which to listen for connections) are taken from the ``$GDB_OPTIONS`` + environment variable. By default (if ``$GDB_OPTIONS`` is empty), it listens on + ``localhost:12345``. + It is possible to connect to it for example with + ``gdb -iex "target remote $addr"``, where ``$addr`` is the address + ``gdbserver`` listens on. + If the ``-gdb`` option is not used, ``$GDB_OPTIONS`` is ignored, + regardless of whether it is set or not. + +* ``-valgrind`` attaches a valgrind instance to QEMU. If it detects + warnings, it will print and save the log in + ``$TEST_DIR/.valgrind``. + The final command line will be ``valgrind --log-file=$TEST_DIR/ + .valgrind --error-exitcode=99 $QEMU ...`` + +* ``-d`` (debug) just increases the logging verbosity, showing + for example the QMP commands and answers. + +* ``-p`` (print) redirects QEMU’s stdout and stderr to the test output, + instead of saving it into a log file in + ``$TEST_DIR/qemu-machine-``. + Test case groups ----------------- +~~~~~~~~~~~~~~~~ "Tests may belong to one or more test groups, which are defined in the form of a comment in the test source file. By convention, test groups are listed @@ -275,10 +285,10 @@ Note that the following group names have a special meaning: .. _container-ref: Container based tests -===================== +--------------------- Introduction ------------- +~~~~~~~~~~~~ The container testing framework in QEMU utilizes public images to build and test QEMU in predefined and widely accessible Linux @@ -293,7 +303,7 @@ The container images are also used to augment the generation of tests for testing TCG. See :ref:`checktcg-ref` for more details. Docker Prerequisites --------------------- +~~~~~~~~~~~~~~~~~~~~ Install "docker" with the system package manager and start the Docker service on your development machine, then make sure you have the privilege to run @@ -324,7 +334,7 @@ exploit the whole host with Docker bind mounting or other privileged operations. So only do it on development machines. Podman Prerequisites --------------------- +~~~~~~~~~~~~~~~~~~~~ Install "podman" with the system package manager. @@ -336,7 +346,7 @@ Install "podman" with the system package manager. The last command should print an empty table, to verify the system is ready. Quickstart ----------- +~~~~~~~~~~ From source tree, type ``make docker-help`` to see the help. Testing can be started without configuring or building QEMU (``configure`` and @@ -352,7 +362,7 @@ is downloaded and initialized automatically), in which the ``test-build`` job is executed. Registry --------- +~~~~~~~~ The QEMU project has a container registry hosted by GitLab at ``registry.gitlab.com/qemu-project/qemu`` which will automatically be @@ -366,7 +376,7 @@ locally by using the ``NOCACHE`` build option: make docker-image-debian10 NOCACHE=1 Images ------- +~~~~~~ Along with many other images, the ``centos8`` image is defined in a Dockerfile in ``tests/docker/dockerfiles/``, called ``centos8.docker``. ``make docker-help`` @@ -381,7 +391,7 @@ mainly used to do necessary host side setup. One such setup is ``binfmt_misc``, for example, to make qemu-user powered cross build containers work. Tests ------ +~~~~~ Different tests are added to cover various configurations to build and test QEMU. Docker tests are the executables under ``tests/docker`` named @@ -392,7 +402,7 @@ source and build it. The full list of tests is printed in the ``make docker-help`` help. Debugging a Docker test failure -------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When CI tasks, maintainers or yourself report a Docker test failure, follow the below steps to debug it: @@ -409,7 +419,7 @@ below steps to debug it: the prompt for debug. Options -------- +~~~~~~~ Various options can be used to affect how Docker tests are done. The full list is in the ``make docker`` help text. The frequently used ones are: @@ -423,7 +433,7 @@ list is in the ``make docker`` help text. The frequently used ones are: failure" section. Thread Sanitizer -================ +---------------- Thread Sanitizer (TSan) is a tool which can detect data races. QEMU supports building and testing with this tool. @@ -433,7 +443,7 @@ For more information on TSan: https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual Thread Sanitizer in Docker ---------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ TSan is currently supported in the ubuntu2004 docker. The test-tsan test will build using TSan and then run make check. @@ -448,7 +458,7 @@ We recommend using DEBUG=1 to allow launching the test from inside the docker, and to allow review of the warnings generated by TSan. Building and Testing with TSan ------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It is possible to build and test with TSan, with a few additional steps. These steps are normally done automatically in the docker. @@ -487,7 +497,7 @@ This allows for running the test and then checking the warnings afterwards. If you want TSan to stop and exit with error on warnings, use exitcode=66. TSan Suppressions ------------------ +~~~~~~~~~~~~~~~~~ Keep in mind that for any data race warning, although there might be a data race detected by TSan, there might be no actual bug here. TSan provides several different mechanisms for suppressing warnings. In general it is recommended @@ -513,7 +523,7 @@ More information on the file format can be found here under "Blacklist Format": https://github.com/google/sanitizers/wiki/ThreadSanitizerFlags TSan Annotations ----------------- +~~~~~~~~~~~~~~~~ include/qemu/tsan.h defines annotations. See this file for more descriptions of the annotations themselves. Annotations can be used to suppress TSan warnings or give TSan more information so that it can detect proper @@ -530,14 +540,14 @@ The full set of annotations can be found here: https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp VM testing -========== +---------- This test suite contains scripts that bootstrap various guest images that have necessary packages to build QEMU. The basic usage is documented in ``Makefile`` help which is displayed with ``make vm-help``. Quickstart ----------- +~~~~~~~~~~ Run ``make vm-help`` to list available make targets. Invoke a specific make command to run build test in an image. For example, ``make vm-build-freebsd`` @@ -552,29 +562,29 @@ concerned about attackers taking control of the guest and potentially exploiting a QEMU security bug to compromise the host. QEMU binaries -------------- +~~~~~~~~~~~~~ -By default, qemu-system-x86_64 is searched in $PATH to run the guest. If there -isn't one, or if it is older than 2.10, the test won't work. In this case, +By default, ``qemu-system-x86_64`` is searched in $PATH to run the guest. If +there isn't one, or if it is older than 2.10, the test won't work. In this case, provide the QEMU binary in env var: ``QEMU=/path/to/qemu-2.10+``. -Likewise the path to qemu-img can be set in QEMU_IMG environment variable. +Likewise the path to ``qemu-img`` can be set in QEMU_IMG environment variable. Make jobs ---------- +~~~~~~~~~ The ``-j$X`` option in the make command line is not propagated into the VM, specify ``J=$X`` to control the make jobs in the guest. Debugging ---------- +~~~~~~~~~ Add ``DEBUG=1`` and/or ``V=1`` to the make command to allow interactive debugging and verbose output. If this is not enough, see the next section. ``V=1`` will be propagated down into the make jobs in the guest. Manual invocation ------------------ +~~~~~~~~~~~~~~~~~ Each guest script is an executable script with the same command line options. For example to work with the netbsd guest, use ``$QEMU_SRC/tests/vm/netbsd``: @@ -598,7 +608,7 @@ For example to work with the netbsd guest, use ``$QEMU_SRC/tests/vm/netbsd``: $ ./netbsd --interactive --image /var/tmp/netbsd.img sh Adding new guests ------------------ +~~~~~~~~~~~~~~~~~ Please look at existing guest scripts for how to add new guests. @@ -631,7 +641,7 @@ the script's ``main()``. recommended. Image fuzzer testing -==================== +-------------------- An image fuzzer was added to exercise format drivers. Currently only qcow2 is supported. To start the fuzzer, run @@ -640,20 +650,19 @@ supported. To start the fuzzer, run tests/image-fuzzer/runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2 -Alternatively, some command different from "qemu-img info" can be tested, by +Alternatively, some command different from ``qemu-img info`` can be tested, by changing the ``-c`` option. -Acceptance tests using the Avocado Framework -============================================ +Integration tests using the Avocado Framework +--------------------------------------------- -The ``tests/acceptance`` directory hosts functional tests, also known -as acceptance level tests. They're usually higher level tests, and -may interact with external resources and with various guest operating -systems. +The ``tests/avocado`` directory hosts integration tests. They're usually +higher level tests, and may interact with external resources and with +various guest operating systems. These tests are written using the Avocado Testing Framework (which must be installed separately) in conjunction with a the ``avocado_qemu.Test`` -class, implemented at ``tests/acceptance/avocado_qemu``. +class, implemented at ``tests/avocado/avocado_qemu``. Tests based on ``avocado_qemu.Test`` can easily: @@ -683,13 +692,13 @@ Tests based on ``avocado_qemu.Test`` can easily: - http://avocado-framework.readthedocs.io/en/latest/api/utils/avocado.utils.html Running tests -------------- +~~~~~~~~~~~~~ -You can run the acceptance tests simply by executing: +You can run the avocado tests simply by executing: .. code:: - make check-acceptance + make check-avocado This involves the automatic creation of Python virtual environment within the build tree (at ``tests/venv``) which will have all the @@ -703,16 +712,85 @@ available. On Debian and Ubuntu based systems, depending on the specific version, they may be on packages named ``python3-venv`` and ``python3-pip``. +It is also possible to run tests based on tags using the +``make check-avocado`` command and the ``AVOCADO_TAGS`` environment +variable: + +.. code:: + + make check-avocado AVOCADO_TAGS=quick + +Note that tags separated with commas have an AND behavior, while tags +separated by spaces have an OR behavior. For more information on Avocado +tags, see: + + https://avocado-framework.readthedocs.io/en/latest/guides/user/chapters/tags.html + +To run a single test file, a couple of them, or a test within a file +using the ``make check-avocado`` command, set the ``AVOCADO_TESTS`` +environment variable with the test files or test names. To run all +tests from a single file, use: + + .. code:: + + make check-avocado AVOCADO_TESTS=$FILEPATH + +The same is valid to run tests from multiple test files: + + .. code:: + + make check-avocado AVOCADO_TESTS='$FILEPATH1 $FILEPATH2' + +To run a single test within a file, use: + + .. code:: + + make check-avocado AVOCADO_TESTS=$FILEPATH:$TESTCLASS.$TESTNAME + +The same is valid to run single tests from multiple test files: + + .. code:: + + make check-avocado AVOCADO_TESTS='$FILEPATH1:$TESTCLASS1.$TESTNAME1 $FILEPATH2:$TESTCLASS2.$TESTNAME2' + The scripts installed inside the virtual environment may be used without an "activation". For instance, the Avocado test runner may be invoked by running: .. code:: - tests/venv/bin/avocado run $OPTION1 $OPTION2 tests/acceptance/ + tests/venv/bin/avocado run $OPTION1 $OPTION2 tests/avocado/ + +Note that if ``make check-avocado`` was not executed before, it is +possible to create the Python virtual environment with the dependencies +needed running: + + .. code:: + + make check-venv + +It is also possible to run tests from a single file or a single test within +a test file. To run tests from a single file within the build tree, use: + + .. code:: + + tests/venv/bin/avocado run tests/avocado/$TESTFILE + +To run a single test within a test file, use: + + .. code:: + + tests/venv/bin/avocado run tests/avocado/$TESTFILE:$TESTCLASS.$TESTNAME + +Valid test names are visible in the output from any previous execution +of Avocado or ``make check-avocado``, and can also be queried using: + + .. code:: + + tests/venv/bin/avocado list tests/avocado Manual Installation -------------------- +~~~~~~~~~~~~~~~~~~~ To manually install Avocado and its dependencies, run: @@ -725,18 +803,18 @@ Alternatively, follow the instructions on this link: https://avocado-framework.readthedocs.io/en/latest/guides/user/chapters/installing.html Overview --------- +~~~~~~~~ -The ``tests/acceptance/avocado_qemu`` directory provides the +The ``tests/avocado/avocado_qemu`` directory provides the ``avocado_qemu`` Python module, containing the ``avocado_qemu.Test`` class. Here's a simple usage example: .. code:: - from avocado_qemu import Test + from avocado_qemu import QemuSystemTest - class Version(Test): + class Version(QemuSystemTest): """ :avocado: tags=quick """ @@ -761,7 +839,7 @@ in the current directory, tagged as "quick", run: avocado run -t quick . The ``avocado_qemu.Test`` base test class ------------------------------------------ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``avocado_qemu.Test`` class has a number of characteristics that are worth being mentioned right away. @@ -775,16 +853,16 @@ The base test class has also support for tests with more than one QEMUMachine. The way to get machines is through the ``self.get_vm()`` method which will return a QEMUMachine instance. The ``self.get_vm()`` method accepts arguments that will be passed to the QEMUMachine creation -and also an optional `name` attribute so you can identify a specific +and also an optional ``name`` attribute so you can identify a specific machine and get it more than once through the tests methods. A simple and hypothetical example follows: .. code:: - from avocado_qemu import Test + from avocado_qemu import QemuSystemTest - class MultipleMachines(Test): + class MultipleMachines(QemuSystemTest): def test_multiple_machines(self): first_machine = self.get_vm() second_machine = self.get_vm() @@ -810,6 +888,32 @@ and hypothetical example follows: At test "tear down", ``avocado_qemu.Test`` handles all the QEMUMachines shutdown. +The ``avocado_qemu.LinuxTest`` base test class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``avocado_qemu.LinuxTest`` is further specialization of the +``avocado_qemu.Test`` class, so it contains all the characteristics of +the later plus some extra features. + +First of all, this base class is intended for tests that need to +interact with a fully booted and operational Linux guest. At this +time, it uses a Fedora 31 guest image. The most basic example looks +like this: + +.. code:: + + from avocado_qemu import LinuxTest + + + class SomeTest(LinuxTest): + + def test(self): + self.launch_and_wait() + self.ssh_command('some_command_to_be_run_in_the_guest') + +Please refer to tests that use ``avocado_qemu.LinuxTest`` under +``tests/avocado`` for more examples. + QEMUMachine ~~~~~~~~~~~ @@ -828,7 +932,7 @@ execution of a QEMU binary, giving its users: a more succinct and intuitive way QEMU binary selection -~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^ The QEMU binary used for the ``self.vm`` QEMUMachine instance will primarily depend on the value of the ``qemu_bin`` parameter. If it's @@ -849,20 +953,23 @@ The resulting ``qemu_bin`` value will be preserved in the ``avocado_qemu.Test`` as an attribute with the same name. Attribute reference -------------------- +~~~~~~~~~~~~~~~~~~~ + +Test +^^^^ Besides the attributes and methods that are part of the base ``avocado.Test`` class, the following attributes are available on any ``avocado_qemu.Test`` instance. vm -~~ +'' A QEMUMachine instance, initially configured according to the given ``qemu_bin`` parameter. arch -~~~~ +'''' The architecture can be used on different levels of the stack, e.g. by the framework or by the test itself. At the framework level, it will @@ -878,8 +985,19 @@ name. If one is not given explicitly, it will either be set to ``None``, or, if the test is tagged with one (and only one) ``:avocado: tags=arch:VALUE`` tag, it will be set to ``VALUE``. +cpu +''' + +The cpu model that will be set to all QEMUMachine instances created +by the test. + +The ``cpu`` attribute will be set to the test parameter of the same +name. If one is not given explicitly, it will either be set to +``None ``, or, if the test is tagged with one (and only one) +``:avocado: tags=cpu:VALUE`` tag, it will be set to ``VALUE``. + machine -~~~~~~~ +''''''' The machine type that will be set to all QEMUMachine instances created by the test. @@ -890,14 +1008,47 @@ name. If one is not given explicitly, it will either be set to ``:avocado: tags=machine:VALUE`` tag, it will be set to ``VALUE``. qemu_bin -~~~~~~~~ +'''''''' The preserved value of the ``qemu_bin`` parameter or the result of the dynamic probe for a QEMU binary in the current working directory or source tree. +LinuxTest +^^^^^^^^^ + +Besides the attributes present on the ``avocado_qemu.Test`` base +class, the ``avocado_qemu.LinuxTest`` adds the following attributes: + +distro +'''''' + +The name of the Linux distribution used as the guest image for the +test. The name should match the **Provider** column on the list +of images supported by the avocado.utils.vmimage library: + +https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images + +distro_version +'''''''''''''' + +The version of the Linux distribution as the guest image for the +test. The name should match the **Version** column on the list +of images supported by the avocado.utils.vmimage library: + +https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images + +distro_checksum +''''''''''''''' + +The sha256 hash of the guest image file used for the test. + +If this value is not set in the code or by a test parameter (with the +same name), no validation on the integrity of the image will be +performed. + Parameter reference -------------------- +~~~~~~~~~~~~~~~~~~~ To understand how Avocado parameters are accessed by tests, and how they can be passed to tests, please refer to:: @@ -911,8 +1062,11 @@ like the following: PARAMS (key=qemu_bin, path=*, default=./qemu-system-x86_64) => './qemu-system-x86_64 +Test +^^^^ + arch -~~~~ +'''' The architecture that will influence the selection of a QEMU binary (when one is not explicitly given). @@ -924,20 +1078,58 @@ architecture of a kernel or disk image to boot a VM with. This parameter has a direct relation with the ``arch`` attribute. If not given, it will default to None. +cpu +''' + +The cpu model that will be set to all QEMUMachine instances created +by the test. + machine -~~~~~~~ +''''''' The machine type that will be set to all QEMUMachine instances created by the test. - qemu_bin -~~~~~~~~ +'''''''' The exact QEMU binary to be used on QEMUMachine. +LinuxTest +^^^^^^^^^ + +Besides the parameters present on the ``avocado_qemu.Test`` base +class, the ``avocado_qemu.LinuxTest`` adds the following parameters: + +distro +'''''' + +The name of the Linux distribution used as the guest image for the +test. The name should match the **Provider** column on the list +of images supported by the avocado.utils.vmimage library: + +https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images + +distro_version +'''''''''''''' + +The version of the Linux distribution as the guest image for the +test. The name should match the **Version** column on the list +of images supported by the avocado.utils.vmimage library: + +https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images + +distro_checksum +''''''''''''''' + +The sha256 hash of the guest image file used for the test. + +If this value is not set in the code or by this parameter no +validation on the integrity of the image will be performed. + Skipping tests --------------- +~~~~~~~~~~~~~~ + The Avocado framework provides Python decorators which allow for easily skip tests running under certain conditions. For example, on the lack of a binary on the test system or when the running environment is a CI system. For further @@ -952,27 +1144,27 @@ environment variables became a kind of standard way to enable/disable tests. Here is a list of the most used variables: AVOCADO_ALLOW_LARGE_STORAGE -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ Tests which are going to fetch or produce assets considered *large* are not -going to run unless that `AVOCADO_ALLOW_LARGE_STORAGE=1` is exported on +going to run unless that ``AVOCADO_ALLOW_LARGE_STORAGE=1`` is exported on the environment. The definition of *large* is a bit arbitrary here, but it usually means an asset which occupies at least 1GB of size on disk when uncompressed. AVOCADO_ALLOW_UNTRUSTED_CODE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ There are tests which will boot a kernel image or firmware that can be considered not safe to run on the developer's workstation, thus they are skipped by default. The definition of *not safe* is also arbitrary but usually it means a blob which either its source or build process aren't public available. -You should export `AVOCADO_ALLOW_UNTRUSTED_CODE=1` on the environment in +You should export ``AVOCADO_ALLOW_UNTRUSTED_CODE=1`` on the environment in order to allow tests which make use of those kind of assets. AVOCADO_TIMEOUT_EXPECTED -~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^ The Avocado framework has a timeout mechanism which interrupts tests to avoid the test suite of getting stuck. The timeout value can be set via test parameter or property defined in the test class, for further details:: @@ -982,11 +1174,11 @@ property defined in the test class, for further details:: Even though the timeout can be set by the test developer, there are some tests that may not have a well-defined limit of time to finish under certain conditions. For example, tests that take longer to execute when QEMU is -compiled with debug flags. Therefore, the `AVOCADO_TIMEOUT_EXPECTED` variable +compiled with debug flags. Therefore, the ``AVOCADO_TIMEOUT_EXPECTED`` variable has been used to determine whether those tests should run or not. GITLAB_CI -~~~~~~~~~ +^^^^^^^^^ A number of tests are flagged to not run on the GitLab CI. Usually because they proved to the flaky or there are constraints on the CI environment which would make them fail. If you encounter a similar situation then use that @@ -999,7 +1191,7 @@ variable as shown on the code snippet below to skip the test: do_something() Uninstalling Avocado --------------------- +~~~~~~~~~~~~~~~~~~~~ If you've followed the manual installation instructions above, you can easily uninstall Avocado. Start by listing the packages you have @@ -1011,13 +1203,13 @@ And remove any package you want with:: pip uninstall -If you've used ``make check-acceptance``, the Python virtual environment where +If you've used ``make check-avocado``, the Python virtual environment where Avocado is installed will be cleaned up as part of ``make check-clean``. .. _checktcg-ref: Testing with "make check-tcg" -============================= +----------------------------- The check-tcg tests are intended for simple smoke tests of both linux-user and softmmu TCG functionality. However to build test @@ -1050,7 +1242,7 @@ itself. See :ref:`container-ref` for more details. Running subset of tests ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ You can build the tests for one architecture:: @@ -1064,7 +1256,7 @@ Adding ``V=1`` to the invocation will show the details of how to invoke QEMU for the test which is useful for debugging tests. TCG test dependencies ---------------------- +~~~~~~~~~~~~~~~~~~~~~ The TCG tests are deliberately very light on dependencies and are either totally bare with minimal gcc lib support (for softmmu tests) @@ -1096,3 +1288,22 @@ exercise as many corner cases as possible. It is a useful test suite to run to exercise QEMU's linux-user code:: https://linux-test-project.github.io/ + +GCC gcov support +---------------- + +``gcov`` is a GCC tool to analyze the testing coverage by +instrumenting the tested code. To use it, configure QEMU with +``--enable-gcov`` option and build. Then run the tests as usual. + +If you want to gather coverage information on a single test the ``make +clean-gcda`` target can be used to delete any existing coverage +information before running a single test. + +You can generate a HTML coverage report by executing ``make +coverage-html`` which will create +``meson-logs/coveragereport/index.html``. + +Further analysis can be conducted by running the ``gcov`` command +directly on the various .gcda output files. Please read the ``gcov`` +documentation for more information. diff --git a/docs/devel/trivial-patches.rst b/docs/devel/trivial-patches.rst new file mode 100644 index 00000000000..9380c730f78 --- /dev/null +++ b/docs/devel/trivial-patches.rst @@ -0,0 +1,52 @@ +.. _trivial-patches: + +Trivial Patches +=============== + +Overview +-------- + +Trivial patches that change just a few lines of code sometimes languish +on the mailing list even though they require only a small amount of +review. This is often the case for patches that do not fall under an +actively maintained subsystem and therefore fall through the cracks. + +The trivial patches team take on the task of reviewing and building pull +requests for patches that: + +- Do not fall under an actively maintained subsystem. +- Are single patches or short series (max 2-4 patches). +- Only touch a few lines of code. + +**You should hint that your patch is a candidate by CCing +qemu-trivial@nongnu.org.** + +Repositories +------------ + +Since the trivial patch team rotates maintainership there is only one +active repository at a time: + +- git://github.com/vivier/qemu.git trivial-patches - `browse `__ + +Workflow +-------- + +The trivial patches team rotates the duty of collecting trivial patches +amongst its members. A team member's job is to: + +1. Identify trivial patches on the development mailing list. +2. Review trivial patches, merge them into a git tree, and reply to state + that the patch is queued. +3. Send pull requests to the development mailing list once a week. + +A single team member can be on duty as long as they like. The suggested +time is 1 week before handing off to the next member. + +Team +---- + +If you would like to join the trivial patches team, contact Laurent +Vivier. The current team includes: + +- `Laurent Vivier `__ diff --git a/docs/devel/ui.rst b/docs/devel/ui.rst new file mode 100644 index 00000000000..17fb667dec4 --- /dev/null +++ b/docs/devel/ui.rst @@ -0,0 +1,8 @@ +================= +QEMU UI subsystem +================= + +QEMU Clipboard +-------------- + +.. kernel-doc:: include/ui/clipboard.h diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst new file mode 100644 index 00000000000..9ff6163c881 --- /dev/null +++ b/docs/devel/vfio-migration.rst @@ -0,0 +1,150 @@ +===================== +VFIO device Migration +===================== + +Migration of virtual machine involves saving the state for each device that +the guest is running on source host and restoring this saved state on the +destination host. This document details how saving and restoring of VFIO +devices is done in QEMU. + +Migration of VFIO devices consists of two phases: the optional pre-copy phase, +and the stop-and-copy phase. The pre-copy phase is iterative and allows to +accommodate VFIO devices that have a large amount of data that needs to be +transferred. The iterative pre-copy phase of migration allows for the guest to +continue whilst the VFIO device state is transferred to the destination, this +helps to reduce the total downtime of the VM. VFIO devices can choose to skip +the pre-copy phase of migration by returning pending_bytes as zero during the +pre-copy phase. + +A detailed description of the UAPI for VFIO device migration can be found in +the comment for the ``vfio_device_migration_info`` structure in the header +file linux-headers/linux/vfio.h. + +VFIO implements the device hooks for the iterative approach as follows: + +* A ``save_setup`` function that sets up the migration region and sets _SAVING + flag in the VFIO device state. + +* A ``load_setup`` function that sets up the migration region on the + destination and sets _RESUMING flag in the VFIO device state. + +* A ``save_live_pending`` function that reads pending_bytes from the vendor + driver, which indicates the amount of data that the vendor driver has yet to + save for the VFIO device. + +* A ``save_live_iterate`` function that reads the VFIO device's data from the + vendor driver through the migration region during iterative phase. + +* A ``save_state`` function to save the device config space if it is present. + +* A ``save_live_complete_precopy`` function that resets _RUNNING flag from the + VFIO device state and iteratively copies the remaining data for the VFIO + device until the vendor driver indicates that no data remains (pending bytes + is zero). + +* A ``load_state`` function that loads the config section and the data + sections that are generated by the save functions above + +* ``cleanup`` functions for both save and load that perform any migration + related cleanup, including unmapping the migration region + + +The VFIO migration code uses a VM state change handler to change the VFIO +device state when the VM state changes from running to not-running, and +vice versa. + +Similarly, a migration state change handler is used to trigger a transition of +the VFIO device state when certain changes of the migration state occur. For +example, the VFIO device state is transitioned back to _RUNNING in case a +migration failed or was canceled. + +System memory dirty pages tracking +---------------------------------- + +A ``log_global_start`` and ``log_global_stop`` memory listener callback informs +the VFIO IOMMU module to start and stop dirty page tracking. A ``log_sync`` +memory listener callback marks those system memory pages as dirty which are +used for DMA by the VFIO device. The dirty pages bitmap is queried per +container. All pages pinned by the vendor driver through external APIs have to +be marked as dirty during migration. When there are CPU writes, CPU dirty page +tracking can identify dirtied pages, but any page pinned by the vendor driver +can also be written by the device. There is currently no device or IOMMU +support for dirty page tracking in hardware. + +By default, dirty pages are tracked when the device is in pre-copy as well as +stop-and-copy phase. So, a page pinned by the vendor driver will be copied to +the destination in both phases. Copying dirty pages in pre-copy phase helps +QEMU to predict if it can achieve its downtime tolerances. If QEMU during +pre-copy phase keeps finding dirty pages continuously, then it understands +that even in stop-and-copy phase, it is likely to find dirty pages and can +predict the downtime accordingly. + +QEMU also provides a per device opt-out option ``pre-copy-dirty-page-tracking`` +which disables querying the dirty bitmap during pre-copy phase. If it is set to +off, all dirty pages will be copied to the destination in stop-and-copy phase +only. + +System memory dirty pages tracking when vIOMMU is enabled +--------------------------------------------------------- + +With vIOMMU, an IO virtual address range can get unmapped while in pre-copy +phase of migration. In that case, the unmap ioctl returns any dirty pages in +that range and QEMU reports corresponding guest physical pages dirty. During +stop-and-copy phase, an IOMMU notifier is used to get a callback for mapped +pages and then dirty pages bitmap is fetched from VFIO IOMMU modules for those +mapped ranges. + +Flow of state changes during Live migration +=========================================== + +Below is the flow of state change during live migration. +The values in the brackets represent the VM state, the migration state, and +the VFIO device state, respectively. + +Live migration save path +------------------------ + +:: + + QEMU normal running state + (RUNNING, _NONE, _RUNNING) + | + migrate_init spawns migration_thread + Migration thread then calls each device's .save_setup() + (RUNNING, _SETUP, _RUNNING|_SAVING) + | + (RUNNING, _ACTIVE, _RUNNING|_SAVING) + If device is active, get pending_bytes by .save_live_pending() + If total pending_bytes >= threshold_size, call .save_live_iterate() + Data of VFIO device for pre-copy phase is copied + Iterate till total pending bytes converge and are less than threshold + | + On migration completion, vCPU stops and calls .save_live_complete_precopy for + each active device. The VFIO device is then transitioned into _SAVING state + (FINISH_MIGRATE, _DEVICE, _SAVING) + | + For the VFIO device, iterate in .save_live_complete_precopy until + pending data is 0 + (FINISH_MIGRATE, _DEVICE, _STOPPED) + | + (FINISH_MIGRATE, _COMPLETED, _STOPPED) + Migraton thread schedules cleanup bottom half and exits + +Live migration resume path +-------------------------- + +:: + + Incoming migration calls .load_setup for each device + (RESTORE_VM, _ACTIVE, _STOPPED) + | + For each device, .load_state is called for that device section data + (RESTORE_VM, _ACTIVE, _RESUMING) + | + At the end, .load_cleanup is called for each device and vCPUs are started + (RUNNING, _NONE, _RUNNING) + +Postcopy +======== + +Postcopy migration is currently not supported for VFIO devices. diff --git a/docs/devel/writing-monitor-commands.rst b/docs/devel/writing-monitor-commands.rst new file mode 100644 index 00000000000..1693822f8f9 --- /dev/null +++ b/docs/devel/writing-monitor-commands.rst @@ -0,0 +1,751 @@ +How to write monitor commands +============================= + +This document is a step-by-step guide on how to write new QMP commands using +the QAPI framework and HMP commands. + +This document doesn't discuss QMP protocol level details, nor does it dive +into the QAPI framework implementation. + +For an in-depth introduction to the QAPI framework, please refer to +docs/devel/qapi-code-gen.txt. For documentation about the QMP protocol, +start with docs/interop/qmp-intro.txt. + +New commands may be implemented in QMP only. New HMP commands should be +implemented on top of QMP. The typical HMP command wraps around an +equivalent QMP command, but HMP convenience commands built from QMP +building blocks are also fine. The long term goal is to make all +existing HMP commands conform to this, to fully isolate HMP from the +internals of QEMU. Refer to the `Writing a debugging aid returning +unstructured text`_ section for further guidance on commands that +would have traditionally been HMP only. + +Overview +-------- + +Generally speaking, the following steps should be taken in order to write a +new QMP command. + +1. Define the command and any types it needs in the appropriate QAPI + schema module. + +2. Write the QMP command itself, which is a regular C function. Preferably, + the command should be exported by some QEMU subsystem. But it can also be + added to the monitor/qmp-cmds.c file + +3. At this point the command can be tested under the QMP protocol + +4. Write the HMP command equivalent. This is not required and should only be + done if it does make sense to have the functionality in HMP. The HMP command + is implemented in terms of the QMP command + +The following sections will demonstrate each of the steps above. We will start +very simple and get more complex as we progress. + + +Testing +------- + +For all the examples in the next sections, the test setup is the same and is +shown here. + +First, QEMU should be started like this:: + + # qemu-system-TARGET [...] \ + -chardev socket,id=qmp,port=4444,host=localhost,server=on \ + -mon chardev=qmp,mode=control,pretty=on + +Then, in a different terminal:: + + $ telnet localhost 4444 + Trying 127.0.0.1... + Connected to localhost. + Escape character is '^]'. + { + "QMP": { + "version": { + "qemu": { + "micro": 50, + "minor": 15, + "major": 0 + }, + "package": "" + }, + "capabilities": [ + ] + } + } + +The above output is the QMP server saying you're connected. The server is +actually in capabilities negotiation mode. To enter in command mode type:: + + { "execute": "qmp_capabilities" } + +Then the server should respond:: + + { + "return": { + } + } + +Which is QMP's way of saying "the latest command executed OK and didn't return +any data". Now you're ready to enter the QMP example commands as explained in +the following sections. + + +Writing a simple command: hello-world +------------------------------------- + +That's the most simple QMP command that can be written. Usually, this kind of +command carries some meaningful action in QEMU but here it will just print +"Hello, world" to the standard output. + +Our command will be called "hello-world". It takes no arguments, nor does it +return any data. + +The first step is defining the command in the appropriate QAPI schema +module. We pick module qapi/misc.json, and add the following line at +the bottom:: + + { 'command': 'hello-world' } + +The "command" keyword defines a new QMP command. It's an JSON object. All +schema entries are JSON objects. The line above will instruct the QAPI to +generate any prototypes and the necessary code to marshal and unmarshal +protocol data. + +The next step is to write the "hello-world" implementation. As explained +earlier, it's preferable for commands to live in QEMU subsystems. But +"hello-world" doesn't pertain to any, so we put its implementation in +monitor/qmp-cmds.c:: + + void qmp_hello_world(Error **errp) + { + printf("Hello, world!\n"); + } + +There are a few things to be noticed: + +1. QMP command implementation functions must be prefixed with "qmp\_" +2. qmp_hello_world() returns void, this is in accordance with the fact that the + command doesn't return any data +3. It takes an "Error \*\*" argument. This is required. Later we will see how to + return errors and take additional arguments. The Error argument should not + be touched if the command doesn't return errors +4. We won't add the function's prototype. That's automatically done by the QAPI +5. Printing to the terminal is discouraged for QMP commands, we do it here + because it's the easiest way to demonstrate a QMP command + +You're done. Now build qemu, run it as suggested in the "Testing" section, +and then type the following QMP command:: + + { "execute": "hello-world" } + +Then check the terminal running qemu and look for the "Hello, world" string. If +you don't see it then something went wrong. + + +Arguments +~~~~~~~~~ + +Let's add an argument called "message" to our "hello-world" command. The new +argument will contain the string to be printed to stdout. It's an optional +argument, if it's not present we print our default "Hello, World" string. + +The first change we have to do is to modify the command specification in the +schema file to the following:: + + { 'command': 'hello-world', 'data': { '*message': 'str' } } + +Notice the new 'data' member in the schema. It's an JSON object whose each +element is an argument to the command in question. Also notice the asterisk, +it's used to mark the argument optional (that means that you shouldn't use it +for mandatory arguments). Finally, 'str' is the argument's type, which +stands for "string". The QAPI also supports integers, booleans, enumerations +and user defined types. + +Now, let's update our C implementation in monitor/qmp-cmds.c:: + + void qmp_hello_world(bool has_message, const char *message, Error **errp) + { + if (has_message) { + printf("%s\n", message); + } else { + printf("Hello, world\n"); + } + } + +There are two important details to be noticed: + +1. All optional arguments are accompanied by a 'has\_' boolean, which is set + if the optional argument is present or false otherwise +2. The C implementation signature must follow the schema's argument ordering, + which is defined by the "data" member + +Time to test our new version of the "hello-world" command. Build qemu, run it as +described in the "Testing" section and then send two commands:: + + { "execute": "hello-world" } + { + "return": { + } + } + + { "execute": "hello-world", "arguments": { "message": "We love qemu" } } + { + "return": { + } + } + +You should see "Hello, world" and "We love qemu" in the terminal running qemu, +if you don't see these strings, then something went wrong. + + +Errors +~~~~~~ + +QMP commands should use the error interface exported by the error.h header +file. Basically, most errors are set by calling the error_setg() function. + +Let's say we don't accept the string "message" to contain the word "love". If +it does contain it, we want the "hello-world" command to return an error:: + + void qmp_hello_world(bool has_message, const char *message, Error **errp) + { + if (has_message) { + if (strstr(message, "love")) { + error_setg(errp, "the word 'love' is not allowed"); + return; + } + printf("%s\n", message); + } else { + printf("Hello, world\n"); + } + } + +The first argument to the error_setg() function is the Error pointer +to pointer, which is passed to all QMP functions. The next argument is a human +description of the error, this is a free-form printf-like string. + +Let's test the example above. Build qemu, run it as defined in the "Testing" +section, and then issue the following command:: + + { "execute": "hello-world", "arguments": { "message": "all you need is love" } } + +The QMP server's response should be:: + + { + "error": { + "class": "GenericError", + "desc": "the word 'love' is not allowed" + } + } + +Note that error_setg() produces a "GenericError" class. In general, +all QMP errors should have that error class. There are two exceptions +to this rule: + + 1. To support a management application's need to recognize a specific + error for special handling + + 2. Backward compatibility + +If the failure you want to report falls into one of the two cases above, +use error_set() with a second argument of an ErrorClass value. + + +Command Documentation +~~~~~~~~~~~~~~~~~~~~~ + +There's only one step missing to make "hello-world"'s implementation complete, +and that's its documentation in the schema file. + +There are many examples of such documentation in the schema file already, but +here goes "hello-world"'s new entry for qapi/misc.json:: + + ## + # @hello-world: + # + # Print a client provided string to the standard output stream. + # + # @message: string to be printed + # + # Returns: Nothing on success. + # + # Notes: if @message is not provided, the "Hello, world" string will + # be printed instead + # + # Since: + ## + { 'command': 'hello-world', 'data': { '*message': 'str' } } + +Please, note that the "Returns" clause is optional if a command doesn't return +any data nor any errors. + + +Implementing the HMP command +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now that the QMP command is in place, we can also make it available in the human +monitor (HMP). + +With the introduction of the QAPI, HMP commands make QMP calls. Most of the +time HMP commands are simple wrappers. All HMP commands implementation exist in +the monitor/hmp-cmds.c file. + +Here's the implementation of the "hello-world" HMP command:: + + void hmp_hello_world(Monitor *mon, const QDict *qdict) + { + const char *message = qdict_get_try_str(qdict, "message"); + Error *err = NULL; + + qmp_hello_world(!!message, message, &err); + if (hmp_handle_error(mon, err)) { + return; + } + } + +Also, you have to add the function's prototype to the hmp.h file. + +There are three important points to be noticed: + +1. The "mon" and "qdict" arguments are mandatory for all HMP functions. The + former is the monitor object. The latter is how the monitor passes + arguments entered by the user to the command implementation +2. hmp_hello_world() performs error checking. In this example we just call + hmp_handle_error() which prints a message to the user, but we could do + more, like taking different actions depending on the error + qmp_hello_world() returns +3. The "err" variable must be initialized to NULL before performing the + QMP call + +There's one last step to actually make the command available to monitor users, +we should add it to the hmp-commands.hx file:: + + { + .name = "hello-world", + .args_type = "message:s?", + .params = "hello-world [message]", + .help = "Print message to the standard output", + .cmd = hmp_hello_world, + }, + +:: + + STEXI + @item hello_world @var{message} + @findex hello_world + Print message to the standard output + ETEXI + +To test this you have to open a user monitor and issue the "hello-world" +command. It might be instructive to check the command's documentation with +HMP's "help" command. + +Please, check the "-monitor" command-line option to know how to open a user +monitor. + + +Writing more complex commands +----------------------------- + +A QMP command is capable of returning any data the QAPI supports like integers, +strings, booleans, enumerations and user defined types. + +In this section we will focus on user defined types. Please, check the QAPI +documentation for information about the other types. + + +Modelling data in QAPI +~~~~~~~~~~~~~~~~~~~~~~ + +For a QMP command that to be considered stable and supported long term, +there is a requirement returned data should be explicitly modelled +using fine-grained QAPI types. As a general guide, a caller of the QMP +command should never need to parse individual returned data fields. If +a field appears to need parsing, then it should be split into separate +fields corresponding to each distinct data item. This should be the +common case for any new QMP command that is intended to be used by +machines, as opposed to exclusively human operators. + +Some QMP commands, however, are only intended as ad hoc debugging aids +for human operators. While they may return large amounts of formatted +data, it is not expected that machines will need to parse the result. +The overhead of defining a fine grained QAPI type for the data may not +be justified by the potential benefit. In such cases, it is permitted +to have a command return a simple string that contains formatted data, +however, it is mandatory for the command to use the 'x-' name prefix. +This indicates that the command is not guaranteed to be long term +stable / liable to change in future and is not following QAPI design +best practices. An example where this approach is taken is the QMP +command "x-query-registers". This returns a formatted dump of the +architecture specific CPU state. The way the data is formatted varies +across QEMU targets, is liable to change over time, and is only +intended to be consumed as an opaque string by machines. Refer to the +`Writing a debugging aid returning unstructured text`_ section for +an illustration. + +User Defined Types +~~~~~~~~~~~~~~~~~~ + +FIXME This example needs to be redone after commit 6d32717 + +For this example we will write the query-alarm-clock command, which returns +information about QEMU's timer alarm. For more information about it, please +check the "-clock" command-line option. + +We want to return two pieces of information. The first one is the alarm clock's +name. The second one is when the next alarm will fire. The former information is +returned as a string, the latter is an integer in nanoseconds (which is not +very useful in practice, as the timer has probably already fired when the +information reaches the client). + +The best way to return that data is to create a new QAPI type, as shown below:: + + ## + # @QemuAlarmClock + # + # QEMU alarm clock information. + # + # @clock-name: The alarm clock method's name. + # + # @next-deadline: The time (in nanoseconds) the next alarm will fire. + # + # Since: 1.0 + ## + { 'type': 'QemuAlarmClock', + 'data': { 'clock-name': 'str', '*next-deadline': 'int' } } + +The "type" keyword defines a new QAPI type. Its "data" member contains the +type's members. In this example our members are the "clock-name" and the +"next-deadline" one, which is optional. + +Now let's define the query-alarm-clock command:: + + ## + # @query-alarm-clock + # + # Return information about QEMU's alarm clock. + # + # Returns a @QemuAlarmClock instance describing the alarm clock method + # being currently used by QEMU (this is usually set by the '-clock' + # command-line option). + # + # Since: 1.0 + ## + { 'command': 'query-alarm-clock', 'returns': 'QemuAlarmClock' } + +Notice the "returns" keyword. As its name suggests, it's used to define the +data returned by a command. + +It's time to implement the qmp_query_alarm_clock() function, you can put it +in the qemu-timer.c file:: + + QemuAlarmClock *qmp_query_alarm_clock(Error **errp) + { + QemuAlarmClock *clock; + int64_t deadline; + + clock = g_malloc0(sizeof(*clock)); + + deadline = qemu_next_alarm_deadline(); + if (deadline > 0) { + clock->has_next_deadline = true; + clock->next_deadline = deadline; + } + clock->clock_name = g_strdup(alarm_timer->name); + + return clock; + } + +There are a number of things to be noticed: + +1. The QemuAlarmClock type is automatically generated by the QAPI framework, + its members correspond to the type's specification in the schema file +2. As specified in the schema file, the function returns a QemuAlarmClock + instance and takes no arguments (besides the "errp" one, which is mandatory + for all QMP functions) +3. The "clock" variable (which will point to our QAPI type instance) is + allocated by the regular g_malloc0() function. Note that we chose to + initialize the memory to zero. This is recommended for all QAPI types, as + it helps avoiding bad surprises (specially with booleans) +4. Remember that "next_deadline" is optional? All optional members have a + 'has_TYPE_NAME' member that should be properly set by the implementation, + as shown above +5. Even static strings, such as "alarm_timer->name", should be dynamically + allocated by the implementation. This is so because the QAPI also generates + a function to free its types and it cannot distinguish between dynamically + or statically allocated strings +6. You have to include "qapi/qapi-commands-misc.h" in qemu-timer.c + +Time to test the new command. Build qemu, run it as described in the "Testing" +section and try this:: + + { "execute": "query-alarm-clock" } + { + "return": { + "next-deadline": 2368219, + "clock-name": "dynticks" + } + } + + +The HMP command +~~~~~~~~~~~~~~~ + +Here's the HMP counterpart of the query-alarm-clock command:: + + void hmp_info_alarm_clock(Monitor *mon) + { + QemuAlarmClock *clock; + Error *err = NULL; + + clock = qmp_query_alarm_clock(&err); + if (hmp_handle_error(mon, err)) { + return; + } + + monitor_printf(mon, "Alarm clock method in use: '%s'\n", clock->clock_name); + if (clock->has_next_deadline) { + monitor_printf(mon, "Next alarm will fire in %" PRId64 " nanoseconds\n", + clock->next_deadline); + } + + qapi_free_QemuAlarmClock(clock); + } + +It's important to notice that hmp_info_alarm_clock() calls +qapi_free_QemuAlarmClock() to free the data returned by qmp_query_alarm_clock(). +For user defined types, the QAPI will generate a qapi_free_QAPI_TYPE_NAME() +function and that's what you have to use to free the types you define and +qapi_free_QAPI_TYPE_NAMEList() for list types (explained in the next section). +If the QMP call returns a string, then you should g_free() to free it. + +Also note that hmp_info_alarm_clock() performs error handling. That's not +strictly required if you're sure the QMP function doesn't return errors, but +it's good practice to always check for errors. + +Another important detail is that HMP's "info" commands don't go into the +hmp-commands.hx. Instead, they go into the info_cmds[] table, which is defined +in the monitor/misc.c file. The entry for the "info alarmclock" follows:: + + { + .name = "alarmclock", + .args_type = "", + .params = "", + .help = "show information about the alarm clock", + .cmd = hmp_info_alarm_clock, + }, + +To test this, run qemu and type "info alarmclock" in the user monitor. + + +Returning Lists +~~~~~~~~~~~~~~~ + +For this example, we're going to return all available methods for the timer +alarm, which is pretty much what the command-line option "-clock ?" does, +except that we're also going to inform which method is in use. + +This first step is to define a new type:: + + ## + # @TimerAlarmMethod + # + # Timer alarm method information. + # + # @method-name: The method's name. + # + # @current: true if this alarm method is currently in use, false otherwise + # + # Since: 1.0 + ## + { 'type': 'TimerAlarmMethod', + 'data': { 'method-name': 'str', 'current': 'bool' } } + +The command will be called "query-alarm-methods", here is its schema +specification:: + + ## + # @query-alarm-methods + # + # Returns information about available alarm methods. + # + # Returns: a list of @TimerAlarmMethod for each method + # + # Since: 1.0 + ## + { 'command': 'query-alarm-methods', 'returns': ['TimerAlarmMethod'] } + +Notice the syntax for returning lists "'returns': ['TimerAlarmMethod']", this +should be read as "returns a list of TimerAlarmMethod instances". + +The C implementation follows:: + + TimerAlarmMethodList *qmp_query_alarm_methods(Error **errp) + { + TimerAlarmMethodList *method_list = NULL; + const struct qemu_alarm_timer *p; + bool current = true; + + for (p = alarm_timers; p->name; p++) { + TimerAlarmMethod *value = g_malloc0(*value); + value->method_name = g_strdup(p->name); + value->current = current; + QAPI_LIST_PREPEND(method_list, value); + current = false; + } + + return method_list; + } + +The most important difference from the previous examples is the +TimerAlarmMethodList type, which is automatically generated by the QAPI from +the TimerAlarmMethod type. + +Each list node is represented by a TimerAlarmMethodList instance. We have to +allocate it, and that's done inside the for loop: the "info" pointer points to +an allocated node. We also have to allocate the node's contents, which is +stored in its "value" member. In our example, the "value" member is a pointer +to an TimerAlarmMethod instance. + +Notice that the "current" variable is used as "true" only in the first +iteration of the loop. That's because the alarm timer method in use is the +first element of the alarm_timers array. Also notice that QAPI lists are handled +by hand and we return the head of the list. + +Now Build qemu, run it as explained in the "Testing" section and try our new +command:: + + { "execute": "query-alarm-methods" } + { + "return": [ + { + "current": false, + "method-name": "unix" + }, + { + "current": true, + "method-name": "dynticks" + } + ] + } + +The HMP counterpart is a bit more complex than previous examples because it +has to traverse the list, it's shown below for reference:: + + void hmp_info_alarm_methods(Monitor *mon) + { + TimerAlarmMethodList *method_list, *method; + Error *err = NULL; + + method_list = qmp_query_alarm_methods(&err); + if (hmp_handle_error(mon, err)) { + return; + } + + for (method = method_list; method; method = method->next) { + monitor_printf(mon, "%c %s\n", method->value->current ? '*' : ' ', + method->value->method_name); + } + + qapi_free_TimerAlarmMethodList(method_list); + } + +Writing a debugging aid returning unstructured text +--------------------------------------------------- + +As discussed in section `Modelling data in QAPI`_, it is required that +commands expecting machine usage be using fine-grained QAPI data types. +The exception to this rule applies when the command is solely intended +as a debugging aid and allows for returning unstructured text. This is +commonly needed for query commands that report aspects of QEMU's +internal state that are useful to human operators. + +In this example we will consider a simplified variant of the HMP +command ``info roms``. Following the earlier rules, this command will +need to live under the ``x-`` name prefix, so its QMP implementation +will be called ``x-query-roms``. It will have no parameters and will +return a single text string:: + + { 'struct': 'HumanReadableText', + 'data': { 'human-readable-text': 'str' } } + + { 'command': 'x-query-roms', + 'returns': 'HumanReadableText' } + +The ``HumanReadableText`` struct is intended to be used for all +commands, under the ``x-`` name prefix that are returning unstructured +text targeted at humans. It should never be used for commands outside +the ``x-`` name prefix, as those should be using structured QAPI types. + +Implementing the QMP command +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The QMP implementation will typically involve creating a ``GString`` +object and printing formatted data into it:: + + HumanReadableText *qmp_x_query_roms(Error **errp) + { + g_autoptr(GString) buf = g_string_new(""); + Rom *rom; + + QTAILQ_FOREACH(rom, &roms, next) { + g_string_append_printf("%s size=0x%06zx name=\"%s\"\n", + memory_region_name(rom->mr), + rom->romsize, + rom->name); + } + + return human_readable_text_from_str(buf); + } + + +Implementing the HMP command +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now that the QMP command is in place, we can also make it available in +the human monitor (HMP) as shown in previous examples. The HMP +implementations will all look fairly similar, as all they need do is +invoke the QMP command and then print the resulting text or error +message. Here's the implementation of the "info roms" HMP command:: + + void hmp_info_roms(Monitor *mon, const QDict *qdict) + { + Error err = NULL; + g_autoptr(HumanReadableText) info = qmp_x_query_roms(&err); + + if (hmp_handle_error(mon, err)) { + return; + } + monitor_printf(mon, "%s", info->human_readable_text); + } + +Also, you have to add the function's prototype to the hmp.h file. + +There's one last step to actually make the command available to +monitor users, we should add it to the hmp-commands-info.hx file:: + + { + .name = "roms", + .args_type = "", + .params = "", + .help = "show roms", + .cmd = hmp_info_roms, + }, + +The case of writing a HMP info handler that calls a no-parameter QMP query +command is quite common. To simplify the implementation there is a general +purpose HMP info handler for this scenario. All that is required to expose +a no-parameter QMP query command via HMP is to declare it using the +'.cmd_info_hrt' field to point to the QMP handler, and leave the '.cmd' +field NULL:: + + { + .name = "roms", + .args_type = "", + .params = "", + .help = "show roms", + .cmd_info_hrt = qmp_x_query_roms, + }, diff --git a/docs/devel/writing-qmp-commands.txt b/docs/devel/writing-qmp-commands.txt deleted file mode 100644 index b1e31d56c0f..00000000000 --- a/docs/devel/writing-qmp-commands.txt +++ /dev/null @@ -1,597 +0,0 @@ -= How to write QMP commands using the QAPI framework = - -This document is a step-by-step guide on how to write new QMP commands using -the QAPI framework. It also shows how to implement new style HMP commands. - -This document doesn't discuss QMP protocol level details, nor does it dive -into the QAPI framework implementation. - -For an in-depth introduction to the QAPI framework, please refer to -docs/devel/qapi-code-gen.txt. For documentation about the QMP protocol, -start with docs/interop/qmp-intro.txt. - -== Overview == - -Generally speaking, the following steps should be taken in order to write a -new QMP command. - -1. Define the command and any types it needs in the appropriate QAPI - schema module. - -2. Write the QMP command itself, which is a regular C function. Preferably, - the command should be exported by some QEMU subsystem. But it can also be - added to the monitor/qmp-cmds.c file - -3. At this point the command can be tested under the QMP protocol - -4. Write the HMP command equivalent. This is not required and should only be - done if it does make sense to have the functionality in HMP. The HMP command - is implemented in terms of the QMP command - -The following sections will demonstrate each of the steps above. We will start -very simple and get more complex as we progress. - -=== Testing === - -For all the examples in the next sections, the test setup is the same and is -shown here. - -First, QEMU should be started like this: - -# qemu-system-TARGET [...] \ - -chardev socket,id=qmp,port=4444,host=localhost,server=on \ - -mon chardev=qmp,mode=control,pretty=on - -Then, in a different terminal: - -$ telnet localhost 4444 -Trying 127.0.0.1... -Connected to localhost. -Escape character is '^]'. -{ - "QMP": { - "version": { - "qemu": { - "micro": 50, - "minor": 15, - "major": 0 - }, - "package": "" - }, - "capabilities": [ - ] - } -} - -The above output is the QMP server saying you're connected. The server is -actually in capabilities negotiation mode. To enter in command mode type: - -{ "execute": "qmp_capabilities" } - -Then the server should respond: - -{ - "return": { - } -} - -Which is QMP's way of saying "the latest command executed OK and didn't return -any data". Now you're ready to enter the QMP example commands as explained in -the following sections. - -== Writing a command that doesn't return data == - -That's the most simple QMP command that can be written. Usually, this kind of -command carries some meaningful action in QEMU but here it will just print -"Hello, world" to the standard output. - -Our command will be called "hello-world". It takes no arguments, nor does it -return any data. - -The first step is defining the command in the appropriate QAPI schema -module. We pick module qapi/misc.json, and add the following line at -the bottom: - -{ 'command': 'hello-world' } - -The "command" keyword defines a new QMP command. It's an JSON object. All -schema entries are JSON objects. The line above will instruct the QAPI to -generate any prototypes and the necessary code to marshal and unmarshal -protocol data. - -The next step is to write the "hello-world" implementation. As explained -earlier, it's preferable for commands to live in QEMU subsystems. But -"hello-world" doesn't pertain to any, so we put its implementation in -monitor/qmp-cmds.c: - -void qmp_hello_world(Error **errp) -{ - printf("Hello, world!\n"); -} - -There are a few things to be noticed: - -1. QMP command implementation functions must be prefixed with "qmp_" -2. qmp_hello_world() returns void, this is in accordance with the fact that the - command doesn't return any data -3. It takes an "Error **" argument. This is required. Later we will see how to - return errors and take additional arguments. The Error argument should not - be touched if the command doesn't return errors -4. We won't add the function's prototype. That's automatically done by the QAPI -5. Printing to the terminal is discouraged for QMP commands, we do it here - because it's the easiest way to demonstrate a QMP command - -You're done. Now build qemu, run it as suggested in the "Testing" section, -and then type the following QMP command: - -{ "execute": "hello-world" } - -Then check the terminal running qemu and look for the "Hello, world" string. If -you don't see it then something went wrong. - -=== Arguments === - -Let's add an argument called "message" to our "hello-world" command. The new -argument will contain the string to be printed to stdout. It's an optional -argument, if it's not present we print our default "Hello, World" string. - -The first change we have to do is to modify the command specification in the -schema file to the following: - -{ 'command': 'hello-world', 'data': { '*message': 'str' } } - -Notice the new 'data' member in the schema. It's an JSON object whose each -element is an argument to the command in question. Also notice the asterisk, -it's used to mark the argument optional (that means that you shouldn't use it -for mandatory arguments). Finally, 'str' is the argument's type, which -stands for "string". The QAPI also supports integers, booleans, enumerations -and user defined types. - -Now, let's update our C implementation in monitor/qmp-cmds.c: - -void qmp_hello_world(bool has_message, const char *message, Error **errp) -{ - if (has_message) { - printf("%s\n", message); - } else { - printf("Hello, world\n"); - } -} - -There are two important details to be noticed: - -1. All optional arguments are accompanied by a 'has_' boolean, which is set - if the optional argument is present or false otherwise -2. The C implementation signature must follow the schema's argument ordering, - which is defined by the "data" member - -Time to test our new version of the "hello-world" command. Build qemu, run it as -described in the "Testing" section and then send two commands: - -{ "execute": "hello-world" } -{ - "return": { - } -} - -{ "execute": "hello-world", "arguments": { "message": "We love qemu" } } -{ - "return": { - } -} - -You should see "Hello, world" and "We love qemu" in the terminal running qemu, -if you don't see these strings, then something went wrong. - -=== Errors === - -QMP commands should use the error interface exported by the error.h header -file. Basically, most errors are set by calling the error_setg() function. - -Let's say we don't accept the string "message" to contain the word "love". If -it does contain it, we want the "hello-world" command to return an error: - -void qmp_hello_world(bool has_message, const char *message, Error **errp) -{ - if (has_message) { - if (strstr(message, "love")) { - error_setg(errp, "the word 'love' is not allowed"); - return; - } - printf("%s\n", message); - } else { - printf("Hello, world\n"); - } -} - -The first argument to the error_setg() function is the Error pointer -to pointer, which is passed to all QMP functions. The next argument is a human -description of the error, this is a free-form printf-like string. - -Let's test the example above. Build qemu, run it as defined in the "Testing" -section, and then issue the following command: - -{ "execute": "hello-world", "arguments": { "message": "all you need is love" } } - -The QMP server's response should be: - -{ - "error": { - "class": "GenericError", - "desc": "the word 'love' is not allowed" - } -} - -Note that error_setg() produces a "GenericError" class. In general, -all QMP errors should have that error class. There are two exceptions -to this rule: - - 1. To support a management application's need to recognize a specific - error for special handling - - 2. Backward compatibility - -If the failure you want to report falls into one of the two cases above, -use error_set() with a second argument of an ErrorClass value. - -=== Command Documentation === - -There's only one step missing to make "hello-world"'s implementation complete, -and that's its documentation in the schema file. - -There are many examples of such documentation in the schema file already, but -here goes "hello-world"'s new entry for qapi/misc.json: - -## -# @hello-world: -# -# Print a client provided string to the standard output stream. -# -# @message: string to be printed -# -# Returns: Nothing on success. -# -# Notes: if @message is not provided, the "Hello, world" string will -# be printed instead -# -# Since: -## -{ 'command': 'hello-world', 'data': { '*message': 'str' } } - -Please, note that the "Returns" clause is optional if a command doesn't return -any data nor any errors. - -=== Implementing the HMP command === - -Now that the QMP command is in place, we can also make it available in the human -monitor (HMP). - -With the introduction of the QAPI, HMP commands make QMP calls. Most of the -time HMP commands are simple wrappers. All HMP commands implementation exist in -the monitor/hmp-cmds.c file. - -Here's the implementation of the "hello-world" HMP command: - -void hmp_hello_world(Monitor *mon, const QDict *qdict) -{ - const char *message = qdict_get_try_str(qdict, "message"); - Error *err = NULL; - - qmp_hello_world(!!message, message, &err); - if (err) { - monitor_printf(mon, "%s\n", error_get_pretty(err)); - error_free(err); - return; - } -} - -Also, you have to add the function's prototype to the hmp.h file. - -There are three important points to be noticed: - -1. The "mon" and "qdict" arguments are mandatory for all HMP functions. The - former is the monitor object. The latter is how the monitor passes - arguments entered by the user to the command implementation -2. hmp_hello_world() performs error checking. In this example we just print - the error description to the user, but we could do more, like taking - different actions depending on the error qmp_hello_world() returns -3. The "err" variable must be initialized to NULL before performing the - QMP call - -There's one last step to actually make the command available to monitor users, -we should add it to the hmp-commands.hx file: - - { - .name = "hello-world", - .args_type = "message:s?", - .params = "hello-world [message]", - .help = "Print message to the standard output", - .cmd = hmp_hello_world, - }, - -STEXI -@item hello_world @var{message} -@findex hello_world -Print message to the standard output -ETEXI - -To test this you have to open a user monitor and issue the "hello-world" -command. It might be instructive to check the command's documentation with -HMP's "help" command. - -Please, check the "-monitor" command-line option to know how to open a user -monitor. - -== Writing a command that returns data == - -A QMP command is capable of returning any data the QAPI supports like integers, -strings, booleans, enumerations and user defined types. - -In this section we will focus on user defined types. Please, check the QAPI -documentation for information about the other types. - -=== User Defined Types === - -FIXME This example needs to be redone after commit 6d32717 - -For this example we will write the query-alarm-clock command, which returns -information about QEMU's timer alarm. For more information about it, please -check the "-clock" command-line option. - -We want to return two pieces of information. The first one is the alarm clock's -name. The second one is when the next alarm will fire. The former information is -returned as a string, the latter is an integer in nanoseconds (which is not -very useful in practice, as the timer has probably already fired when the -information reaches the client). - -The best way to return that data is to create a new QAPI type, as shown below: - -## -# @QemuAlarmClock -# -# QEMU alarm clock information. -# -# @clock-name: The alarm clock method's name. -# -# @next-deadline: The time (in nanoseconds) the next alarm will fire. -# -# Since: 1.0 -## -{ 'type': 'QemuAlarmClock', - 'data': { 'clock-name': 'str', '*next-deadline': 'int' } } - -The "type" keyword defines a new QAPI type. Its "data" member contains the -type's members. In this example our members are the "clock-name" and the -"next-deadline" one, which is optional. - -Now let's define the query-alarm-clock command: - -## -# @query-alarm-clock -# -# Return information about QEMU's alarm clock. -# -# Returns a @QemuAlarmClock instance describing the alarm clock method -# being currently used by QEMU (this is usually set by the '-clock' -# command-line option). -# -# Since: 1.0 -## -{ 'command': 'query-alarm-clock', 'returns': 'QemuAlarmClock' } - -Notice the "returns" keyword. As its name suggests, it's used to define the -data returned by a command. - -It's time to implement the qmp_query_alarm_clock() function, you can put it -in the qemu-timer.c file: - -QemuAlarmClock *qmp_query_alarm_clock(Error **errp) -{ - QemuAlarmClock *clock; - int64_t deadline; - - clock = g_malloc0(sizeof(*clock)); - - deadline = qemu_next_alarm_deadline(); - if (deadline > 0) { - clock->has_next_deadline = true; - clock->next_deadline = deadline; - } - clock->clock_name = g_strdup(alarm_timer->name); - - return clock; -} - -There are a number of things to be noticed: - -1. The QemuAlarmClock type is automatically generated by the QAPI framework, - its members correspond to the type's specification in the schema file -2. As specified in the schema file, the function returns a QemuAlarmClock - instance and takes no arguments (besides the "errp" one, which is mandatory - for all QMP functions) -3. The "clock" variable (which will point to our QAPI type instance) is - allocated by the regular g_malloc0() function. Note that we chose to - initialize the memory to zero. This is recommended for all QAPI types, as - it helps avoiding bad surprises (specially with booleans) -4. Remember that "next_deadline" is optional? All optional members have a - 'has_TYPE_NAME' member that should be properly set by the implementation, - as shown above -5. Even static strings, such as "alarm_timer->name", should be dynamically - allocated by the implementation. This is so because the QAPI also generates - a function to free its types and it cannot distinguish between dynamically - or statically allocated strings -6. You have to include "qapi/qapi-commands-misc.h" in qemu-timer.c - -Time to test the new command. Build qemu, run it as described in the "Testing" -section and try this: - -{ "execute": "query-alarm-clock" } -{ - "return": { - "next-deadline": 2368219, - "clock-name": "dynticks" - } -} - -==== The HMP command ==== - -Here's the HMP counterpart of the query-alarm-clock command: - -void hmp_info_alarm_clock(Monitor *mon) -{ - QemuAlarmClock *clock; - Error *err = NULL; - - clock = qmp_query_alarm_clock(&err); - if (err) { - monitor_printf(mon, "Could not query alarm clock information\n"); - error_free(err); - return; - } - - monitor_printf(mon, "Alarm clock method in use: '%s'\n", clock->clock_name); - if (clock->has_next_deadline) { - monitor_printf(mon, "Next alarm will fire in %" PRId64 " nanoseconds\n", - clock->next_deadline); - } - - qapi_free_QemuAlarmClock(clock); -} - -It's important to notice that hmp_info_alarm_clock() calls -qapi_free_QemuAlarmClock() to free the data returned by qmp_query_alarm_clock(). -For user defined types, the QAPI will generate a qapi_free_QAPI_TYPE_NAME() -function and that's what you have to use to free the types you define and -qapi_free_QAPI_TYPE_NAMEList() for list types (explained in the next section). -If the QMP call returns a string, then you should g_free() to free it. - -Also note that hmp_info_alarm_clock() performs error handling. That's not -strictly required if you're sure the QMP function doesn't return errors, but -it's good practice to always check for errors. - -Another important detail is that HMP's "info" commands don't go into the -hmp-commands.hx. Instead, they go into the info_cmds[] table, which is defined -in the monitor/misc.c file. The entry for the "info alarmclock" follows: - - { - .name = "alarmclock", - .args_type = "", - .params = "", - .help = "show information about the alarm clock", - .cmd = hmp_info_alarm_clock, - }, - -To test this, run qemu and type "info alarmclock" in the user monitor. - -=== Returning Lists === - -For this example, we're going to return all available methods for the timer -alarm, which is pretty much what the command-line option "-clock ?" does, -except that we're also going to inform which method is in use. - -This first step is to define a new type: - -## -# @TimerAlarmMethod -# -# Timer alarm method information. -# -# @method-name: The method's name. -# -# @current: true if this alarm method is currently in use, false otherwise -# -# Since: 1.0 -## -{ 'type': 'TimerAlarmMethod', - 'data': { 'method-name': 'str', 'current': 'bool' } } - -The command will be called "query-alarm-methods", here is its schema -specification: - -## -# @query-alarm-methods -# -# Returns information about available alarm methods. -# -# Returns: a list of @TimerAlarmMethod for each method -# -# Since: 1.0 -## -{ 'command': 'query-alarm-methods', 'returns': ['TimerAlarmMethod'] } - -Notice the syntax for returning lists "'returns': ['TimerAlarmMethod']", this -should be read as "returns a list of TimerAlarmMethod instances". - -The C implementation follows: - -TimerAlarmMethodList *qmp_query_alarm_methods(Error **errp) -{ - TimerAlarmMethodList *method_list = NULL; - const struct qemu_alarm_timer *p; - bool current = true; - - for (p = alarm_timers; p->name; p++) { - TimerAlarmMethod *value = g_malloc0(*value); - value->method_name = g_strdup(p->name); - value->current = current; - QAPI_LIST_PREPEND(method_list, value); - current = false; - } - - return method_list; -} - -The most important difference from the previous examples is the -TimerAlarmMethodList type, which is automatically generated by the QAPI from -the TimerAlarmMethod type. - -Each list node is represented by a TimerAlarmMethodList instance. We have to -allocate it, and that's done inside the for loop: the "info" pointer points to -an allocated node. We also have to allocate the node's contents, which is -stored in its "value" member. In our example, the "value" member is a pointer -to an TimerAlarmMethod instance. - -Notice that the "current" variable is used as "true" only in the first -iteration of the loop. That's because the alarm timer method in use is the -first element of the alarm_timers array. Also notice that QAPI lists are handled -by hand and we return the head of the list. - -Now Build qemu, run it as explained in the "Testing" section and try our new -command: - -{ "execute": "query-alarm-methods" } -{ - "return": [ - { - "current": false, - "method-name": "unix" - }, - { - "current": true, - "method-name": "dynticks" - } - ] -} - -The HMP counterpart is a bit more complex than previous examples because it -has to traverse the list, it's shown below for reference: - -void hmp_info_alarm_methods(Monitor *mon) -{ - TimerAlarmMethodList *method_list, *method; - Error *err = NULL; - - method_list = qmp_query_alarm_methods(&err); - if (err) { - monitor_printf(mon, "Could not query alarm methods\n"); - error_free(err); - return; - } - - for (method = method_list; method; method = method->next) { - monitor_printf(mon, "%c %s\n", method->value->current ? '*' : ' ', - method->value->method_name); - } - - qapi_free_TimerAlarmMethodList(method_list); -} diff --git a/docs/hyperv.txt b/docs/hyperv.txt index e53c581f458..0417c183a3b 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -170,7 +170,7 @@ Recommended: hv-frequencies 3.16. hv-evmcs =============== The enlightenment is nested specific, it targets Hyper-V on KVM guests. When -enabled, it provides Enlightened VMCS feature to the guest. The feature +enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V) hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only. Note: some virtualization features (e.g. Posted Interrupts) are disabled when @@ -189,7 +189,15 @@ enabled. Requires: hv-vpindex, hv-synic, hv-time, hv-stimer -3.17. hv-no-nonarch-coresharing=on/off/auto +3.18. hv-avic (hv-apicv) +======================= +The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled. +Normally, Hyper-V SynIC disables these hardware feature and suggests the guest +to use paravirtualized AutoEOI feature. +Note: enabling this feature on old hardware (without APICv/AVIC support) may +have negative effect on guest's performance. + +3.19. hv-no-nonarch-coresharing=on/off/auto =========================================== This enlightenment tells guest OS that virtual processors will never share a physical core unless they are reported as sibling SMT threads. This information @@ -203,17 +211,45 @@ When the option is set to 'on' QEMU will always enable the feature, regardless of host setup. To keep guests secure, this can only be used in conjunction with exposing correct vCPU topology and vCPU pinning. -4. Development features -======================== +3.20. hv-version-id-{build,major,minor,spack,sbranch,snumber} +============================================================= +This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the +default (WS2016). +- hv-version-id-build sets 'Build Number' (32 bits) +- hv-version-id-major sets 'Major Version' (16 bits) +- hv-version-id-minor sets 'Minor Version' (16 bits) +- hv-version-id-spack sets 'Service Pack' (32 bits) +- hv-version-id-sbranch sets 'Service Branch' (8 bits) +- hv-version-id-snumber sets 'Service Number' (24 bits) + +Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V +identification when specified without any other enlightenments. + +4. Supplementary features +========================= + +4.1. hv-passthrough +=================== In some cases (e.g. during development) it may make sense to use QEMU in 'pass-through' mode and give Windows guests all enlightenments currently supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU flag. -Note: enabling this flag effectively prevents migration as supported features -may differ between target and destination. - - -4. Useful links +Note: "hv-passthrough" flag only enables enlightenments which are known to QEMU +(have corresponding "hv-*" flag) and copies "hv-spinlocks="/"hv-vendor-id=" +values from KVM to QEMU. "hv-passthrough" overrides all other "hv-*" settings on +the command line. Also, enabling this flag effectively prevents migration as the +list of enabled enlightenments may differ between target and destination hosts. + +4.2. hv-enforce-cpuid +===================== +By default, KVM allows the guest to use all currently supported Hyper-V +enlightenments when Hyper-V CPUID interface was exposed, regardless of if +some features were not announced in guest visible CPUIDs. 'hv-enforce-cpuid' +feature alters this behavior and only allows the guest to use exposed Hyper-V +enlightenments. + + +5. Useful links ================ Hyper-V Top Level Functional specification and other information: https://github.com/MicrosoftDocs/Virtualization-Documentation diff --git a/docs/image-fuzzer.txt b/docs/image-fuzzer.txt index 3e23ebec331..279cc8c807f 100644 --- a/docs/image-fuzzer.txt +++ b/docs/image-fuzzer.txt @@ -51,10 +51,10 @@ assumes that core dumps will be generated in the current working directory. For comprehensive test results, please, set up your test environment properly. -Paths to binaries under test (SUTs) qemu-img and qemu-io are retrieved from -environment variables. If the environment check fails the runner will +Paths to binaries under test (SUTs) ``qemu-img`` and ``qemu-io`` are retrieved +from environment variables. If the environment check fails the runner will use SUTs installed in system paths. -qemu-img is required for creation of backing files, so it's mandatory to set +``qemu-img`` is required for creation of backing files, so it's mandatory to set the related environment variable if it's not installed in the system path. For details about environment variables see qemu-iotests/check. diff --git a/docs/index.rst b/docs/index.rst index 763e3d0426e..0b9ee9901d9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,6 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. +================================ Welcome to QEMU's documentation! ================================ @@ -10,6 +11,7 @@ Welcome to QEMU's documentation! :maxdepth: 2 :caption: Contents: + about/index system/index user/index tools/index diff --git a/docs/interop/_templates/editpage.html b/docs/interop/_templates/editpage.html deleted file mode 100644 index 215e5626812..00000000000 --- a/docs/interop/_templates/editpage.html +++ /dev/null @@ -1,5 +0,0 @@ -
- -
diff --git a/docs/interop/barrier.rst b/docs/interop/barrier.rst new file mode 100644 index 00000000000..055f2c1aef3 --- /dev/null +++ b/docs/interop/barrier.rst @@ -0,0 +1,426 @@ +Barrier client protocol +======================= + +QEMU's ``input-barrier`` device implements the client end of +the KVM (Keyboard-Video-Mouse) software +`Barrier `__. + +This document briefly describes the protocol as we implement it. + +Message format +-------------- + +Message format between the server and client is in two parts: + +#. the payload length, a 32bit integer in network endianness +#. the payload + +The payload starts with a 4byte string (without NUL) which is the +command. The first command between the server and the client +is the only command not encoded on 4 bytes ("Barrier"). +The remaining part of the payload is decoded according to the command. + +Protocol Description +-------------------- + +This comes from ``barrier/src/lib/barrier/protocol_types.h``. + +barrierCmdHello "Barrier" +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t minor, int16_t major }`` +Description: + Say hello to client + + ``minor`` = protocol major version number supported by server + + ``major`` = protocol minor version number supported by server + +barrierCmdHelloBack "Barrier" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + client ->server +Parameters: + ``{ int16_t minor, int16_t major, char *name}`` +Description: + Respond to hello from server + + ``minor`` = protocol major version number supported by client + + ``major`` = protocol minor version number supported by client + + ``name`` = client name + +barrierCmdDInfo "DINF" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + client ->server +Parameters: + ``{ int16_t x_origin, int16_t y_origin, int16_t width, int16_t height, int16_t x, int16_t y}`` +Description: + The client screen must send this message in response to the + barrierCmdQInfo message. It must also send this message when the + screen's resolution changes. In this case, the client screen should + ignore any barrierCmdDMouseMove messages until it receives a + barrierCmdCInfoAck in order to prevent attempts to move the mouse off + the new screen area. + +barrierCmdCNoop "CNOP" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + client -> server +Parameters: + None +Description: + No operation + +barrierCmdCClose "CBYE" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Close connection + +barrierCmdCEnter "CINN" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t x, int16_t y, int32_t seq, int16_t modifier }`` +Description: + Enter screen. + + ``x``, ``y`` = entering screen absolute coordinates + + ``seq`` = sequence number, which is used to order messages between + screens. the secondary screen must return this number + with some messages + + ``modifier`` = modifier key mask. this will have bits set for each + toggle modifier key that is activated on entry to the + screen. the secondary screen should adjust its toggle + modifiers to reflect that state. + +barrierCmdCLeave "COUT" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Leaving screen. the secondary screen should send clipboard data in + response to this message for those clipboards that it has grabbed + (i.e. has sent a barrierCmdCClipboard for and has not received a + barrierCmdCClipboard for with a greater sequence number) and that + were grabbed or have changed since the last leave. + +barrierCmdCClipboard "CCLP" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int8_t id, int32_t seq }`` +Description: + Grab clipboard. Sent by screen when some other app on that screen + grabs a clipboard. + + ``id`` = the clipboard identifier + + ``seq`` = sequence number. Client must use the sequence number passed in + the most recent barrierCmdCEnter. the server always sends 0. + +barrierCmdCScreenSaver "CSEC" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int8_t started }`` +Description: + Screensaver change. + + ``started`` = Screensaver on primary has started (1) or closed (0) + +barrierCmdCResetOptions "CROP" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Reset options. Client should reset all of its options to their + defaults. + +barrierCmdCInfoAck "CIAK" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Resolution change acknowledgment. Sent by server in response to a + client screen's barrierCmdDInfo. This is sent for every + barrierCmdDInfo, whether or not the server had sent a barrierCmdQInfo. + +barrierCmdCKeepAlive "CALV" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Keep connection alive. Sent by the server periodically to verify + that connections are still up and running. clients must reply in + kind on receipt. if the server gets an error sending the message or + does not receive a reply within a reasonable time then the server + disconnects the client. if the client doesn't receive these (or any + message) periodically then it should disconnect from the server. the + appropriate interval is defined by an option. + +barrierCmdDKeyDown "DKDN" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t keyid, int16_t modifier [,int16_t button] }`` +Description: + Key pressed. + + ``keyid`` = X11 key id + + ``modified`` = modified mask + + ``button`` = X11 Xkb keycode (optional) + +barrierCmdDKeyRepeat "DKRP" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t keyid, int16_t modifier, int16_t repeat [,int16_t button] }`` +Description: + Key auto-repeat. + + ``keyid`` = X11 key id + + ``modified`` = modified mask + + ``repeat`` = number of repeats + + ``button`` = X11 Xkb keycode (optional) + +barrierCmdDKeyUp "DKUP" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t keyid, int16_t modifier [,int16_t button] }`` +Description: + Key released. + + ``keyid`` = X11 key id + + ``modified`` = modified mask + + ``button`` = X11 Xkb keycode (optional) + +barrierCmdDMouseDown "DMDN" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int8_t button }`` +Description: + Mouse button pressed. + + ``button`` = button id + +barrierCmdDMouseUp "DMUP" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int8_t button }`` +Description: + Mouse button release. + + ``button`` = button id + +barrierCmdDMouseMove "DMMV" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t x, int16_t y }`` +Description: + Absolute mouse moved. + + ``x``, ``y`` = absolute screen coordinates + +barrierCmdDMouseRelMove "DMRM" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t x, int16_t y }`` +Description: + Relative mouse moved. + + ``x``, ``y`` = r relative screen coordinates + +barrierCmdDMouseWheel "DMWM" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t x , int16_t y }`` or ``{ int16_t y }`` +Description: + Mouse scroll. The delta should be +120 for one tick forward (away + from the user) or right and -120 for one tick backward (toward the + user) or left. + + ``x`` = x delta + + ``y`` = y delta + +barrierCmdDClipboard "DCLP" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int8_t id, int32_t seq, int8_t mark, char *data }`` +Description: + Clipboard data. + + ``id`` = clipboard id + + ``seq`` = sequence number. The sequence number is 0 when sent by the + server. Client screens should use the/ sequence number from + the most recent barrierCmdCEnter. + +barrierCmdDSetOptions "DSOP" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int32 t nb, { int32_t id, int32_t val }[] }`` +Description: + Set options. Client should set the given option/value pairs. + + ``nb`` = numbers of ``{ id, val }`` entries + + ``id`` = option id + + ``val`` = option new value + +barrierCmdDFileTransfer "DFTR" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int8_t mark, char *content }`` +Description: + Transfer file data. + + * ``mark`` = 0 means the content followed is the file size + * 1 means the content followed is the chunk data + * 2 means the file transfer is finished + +barrierCmdDDragInfo "DDRG" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t nb, char *content }`` +Description: + Drag information. + + ``nb`` = number of dragging objects + + ``content`` = object's directory + +barrierCmdQInfo "QINF" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Query screen info + + Client should reply with a barrierCmdDInfo + +barrierCmdEIncompatible "EICV" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + ``{ int16_t nb, major *minor }`` +Description: + Incompatible version. + + ``major`` = major version + + ``minor`` = minor version + +barrierCmdEBusy "EBSY" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Name provided when connecting is already in use. + +barrierCmdEUnknown "EUNK" +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Unknown client. Name provided when connecting is not in primary's + screen configuration map. + +barrierCmdEBad "EBAD" +^^^^^^^^^^^^^^^^^^^^^^^ + +Direction: + server -> client +Parameters: + None +Description: + Protocol violation. Server should disconnect after sending this + message. + diff --git a/docs/interop/bitmaps.rst b/docs/interop/bitmaps.rst index 059ad679294..1de46febdc5 100644 --- a/docs/interop/bitmaps.rst +++ b/docs/interop/bitmaps.rst @@ -539,12 +539,11 @@ other partial disk images on top of a base image to reconstruct a full backup from the point in time at which the incremental backup was issued. The "Push Model" here references the fact that QEMU is "pushing" the modified -blocks out to a destination. We will be using the `drive-backup -`_ and `blockdev-backup -`_ QMP commands to create both +blocks out to a destination. We will be using the `blockdev-backup +`_ QMP command to create both full and incremental backups. -Both of these commands are jobs, which have their own QMP API for querying and +The command is a background job, which has its own QMP API for querying and management documented in `Background jobs `_. @@ -557,6 +556,10 @@ create a new incremental backup chain attached to a drive. This example creates a new, full backup of "drive0" and accompanies it with a new, empty bitmap that records writes from this point in time forward. +The target can be created with the help of `blockdev-add +`_ or `blockdev-create +`_ command. + .. note:: Any new writes that happen after this command is issued, even while the backup job runs, will be written locally and not to the backup destination. These writes will be recorded in the bitmap @@ -576,12 +579,11 @@ new, empty bitmap that records writes from this point in time forward. } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", - "target": "/path/to/drive0.full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target0", + "sync": "full" } } ] @@ -664,12 +666,11 @@ use a transaction to reset the bitmap while making a new full backup: } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", - "target": "/path/to/drive0.new_full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target0", + "sync": "full" } } ] @@ -728,19 +729,35 @@ Example: First Incremental Backup $ qemu-img create -f qcow2 drive0.inc0.qcow2 \ -b drive0.full.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Issue an incremental backup command: .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc0.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -785,20 +802,36 @@ Example: Second Incremental Backup $ qemu-img create -f qcow2 drive0.inc1.qcow2 \ -b drive0.inc0.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc1.qcow2" + } + } + } + + <- { "return": {} } + #. Issue a new incremental backup command. The only difference here is that we have changed the target image below. .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc1.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -866,20 +899,36 @@ image: file for you, but you lose control over format options like compatibility and preallocation presets. +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc2.qcow2" + } + } + } + + <- { "return": {} } + #. Issue a new incremental backup command. Apart from the new destination image, there is no difference from the last two examples. .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc2.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -930,6 +979,38 @@ point in time. $ qemu-img create -f qcow2 drive0.full.qcow2 64G $ qemu-img create -f qcow2 drive1.full.qcow2 64G +#. Add target block nodes: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.full.qcow2" + } + } + } + + <- { "return": {} } + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target1", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive1.full.qcow2" + } + } + } + + <- { "return": {} } + #. Create a full (anchor) backup for each drive, with accompanying bitmaps: .. code-block:: QMP @@ -953,21 +1034,19 @@ point in time. } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", - "target": "/path/to/drive0.full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target0", + "sync": "full" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", - "target": "/path/to/drive1.full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target1", + "sync": "full" } } ] @@ -1016,6 +1095,38 @@ point in time. $ qemu-img create -f qcow2 drive1.inc0.qcow2 \ -b drive1.full.qcow2 -F qcow2 +#. Add target block nodes: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target1", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive1.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Issue a multi-drive incremental push backup transaction: .. code-block:: QMP @@ -1025,25 +1136,21 @@ point in time. "arguments": { "actions": [ { - "type": "drive-backup", + "type": "blockev-backup", "data": { "device": "drive0", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive0.inc0.qcow2" + "target": "target0" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive1.inc0.qcow2" + "target": "target1" } }, ] @@ -1119,19 +1226,35 @@ described above. This example demonstrates the single-job failure case: $ qemu-img create -f qcow2 drive0.inc0.qcow2 \ -b drive0.full.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Attempt to create an incremental backup via QMP: .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc0.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -1164,6 +1287,19 @@ described above. This example demonstrates the single-job failure case: "event": "BLOCK_JOB_COMPLETED" } +#. Remove target node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-del", + "arguments": { + "node-name": "target0", + } + } + + <- { "return": {} } + #. Delete the failed image, and re-create it. .. code:: bash @@ -1172,20 +1308,36 @@ described above. This example demonstrates the single-job failure case: $ qemu-img create -f qcow2 drive0.inc0.qcow2 \ -b drive0.full.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Retry the command after fixing the underlying problem, such as freeing up space on the backup volume: .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc0.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -1210,7 +1362,8 @@ described above. This example demonstrates the single-job failure case: Example: Partial Transactional Failures ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -QMP commands like `drive-backup `_ +QMP commands like `blockdev-backup +`_ conceptually only start a job, and so transactions containing these commands may succeed even if the job it created later fails. This might have surprising interactions with notions of how a "transaction" ought to behave. @@ -1240,25 +1393,21 @@ and one succeeds: "arguments": { "actions": [ { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive0.inc0.qcow2" + "target": "target0" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive1.inc0.qcow2" + "target": "target1" } }] } @@ -1375,25 +1524,21 @@ applied: }, "actions": [ { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive0.inc0.qcow2" + "target": "target0" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive1.inc0.qcow2" + "target": "target1" } }] } diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json index 9d94ccafa9e..8d8b0be030e 100644 --- a/docs/interop/firmware.json +++ b/docs/interop/firmware.json @@ -115,6 +115,12 @@ # this feature are documented in # "docs/amd-memory-encryption.txt". # +# @amd-sev-es: The firmware supports running under AMD Secure Encrypted +# Virtualization - Encrypted State, as specified in the AMD64 +# Architecture Programmer's Manual. QEMU command line options +# related to this feature are documented in +# "docs/amd-memory-encryption.txt". +# # @enrolled-keys: The variable store (NVRAM) template associated with # the firmware binary has the UEFI Secure Boot # operational mode turned on, with certificates @@ -179,7 +185,7 @@ # Since: 3.0 ## { 'enum' : 'FirmwareFeature', - 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'enrolled-keys', + 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'amd-sev-es', 'enrolled-keys', 'requires-smm', 'secure-boot', 'verbose-dynamic', 'verbose-static' ] } @@ -504,6 +510,45 @@ # } # # { +# "description": "OVMF with SEV-ES support", +# "interface-types": [ +# "uefi" +# ], +# "mapping": { +# "device": "flash", +# "executable": { +# "filename": "/usr/share/OVMF/OVMF_CODE.fd", +# "format": "raw" +# }, +# "nvram-template": { +# "filename": "/usr/share/OVMF/OVMF_VARS.fd", +# "format": "raw" +# } +# }, +# "targets": [ +# { +# "architecture": "x86_64", +# "machines": [ +# "pc-q35-*" +# ] +# } +# ], +# "features": [ +# "acpi-s3", +# "amd-sev", +# "amd-sev-es", +# "verbose-dynamic" +# ], +# "tags": [ +# "-a X64", +# "-p OvmfPkg/OvmfPkgX64.dsc", +# "-t GCC48", +# "-b DEBUG", +# "-D FD_SIZE_4MB" +# ] +# } +# +# { # "description": "UEFI firmware for ARM64 virtual machines", # "interface-types": [ # "uefi" diff --git a/docs/interop/index.rst b/docs/interop/index.rst index 219a5e5fc50..47b9ed82bbc 100644 --- a/docs/interop/index.rst +++ b/docs/interop/index.rst @@ -1,17 +1,14 @@ -.. This is the top level page for the 'interop' manual. - - +------------------------------------------------ System Emulation Management and Interoperability -================================================ - -This manual contains documents and specifications that are useful -for making QEMU interoperate with other software. +------------------------------------------------ -Contents: +This section of the manual contains documents and specifications that +are useful for making QEMU interoperate with other software. .. toctree:: :maxdepth: 2 + barrier bitmaps dbus dbus-vmstate diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst index 1073b930dce..39e62c99151 100644 --- a/docs/interop/live-block-operations.rst +++ b/docs/interop/live-block-operations.rst @@ -116,8 +116,8 @@ QEMU block layer supports. (3) ``drive-mirror`` (and ``blockdev-mirror``): Synchronize a running disk to another image. -(4) ``drive-backup`` (and ``blockdev-backup``): Point-in-time (live) copy - of a block device to a destination. +(4) ``blockdev-backup`` (and the deprecated ``drive-backup``): + Point-in-time (live) copy of a block device to a destination. .. _`Interacting with a QEMU instance`: @@ -127,13 +127,15 @@ Interacting with a QEMU instance To show some example invocations of command-line, we will use the following invocation of QEMU, with a QMP server running over UNIX -socket:: +socket: - $ ./qemu-system-x86_64 -display none -no-user-config \ - -M q35 -nodefaults -m 512 \ - -blockdev node-name=node-A,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./a.qcow2 \ - -device virtio-blk,drive=node-A,id=virtio0 \ - -monitor stdio -qmp unix:/tmp/qmp-sock,server=on,wait=off +.. parsed-literal:: + + $ |qemu_system| -display none -no-user-config -nodefaults \\ + -m 512 -blockdev \\ + node-name=node-A,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./a.qcow2 \\ + -device virtio-blk,drive=node-A,id=virtio0 \\ + -monitor stdio -qmp unix:/tmp/qmp-sock,server=on,wait=off The ``-blockdev`` command-line option, used above, is available from QEMU 2.9 onwards. In the above invocation, notice the ``node-name`` @@ -553,13 +555,14 @@ Currently, there are four different kinds: (3) ``none`` -- Synchronize only the new writes from this point on. - .. note:: In the case of ``drive-backup`` (or ``blockdev-backup``), - the behavior of ``none`` synchronization mode is different. - Normally, a ``backup`` job consists of two parts: Anything - that is overwritten by the guest is first copied out to - the backup, and in the background the whole image is - copied from start to end. With ``sync=none``, it's only - the first part. + .. note:: In the case of ``blockdev-backup`` (or deprecated + ``drive-backup``), the behavior of ``none`` + synchronization mode is different. Normally, a + ``backup`` job consists of two parts: Anything that is + overwritten by the guest is first copied out to the + backup, and in the background the whole image is copied + from start to end. With ``sync=none``, it's only the + first part. (4) ``incremental`` -- Synchronize content that is described by the dirty bitmap @@ -638,7 +641,7 @@ at this point: (QEMU) block-job-complete device=job0 In either of the above cases, if you once again run the -`query-block-jobs` command, there should not be any active block +``query-block-jobs`` command, there should not be any active block operation. Comparing 'commit' and 'mirror': In both then cases, the overlay images @@ -692,14 +695,16 @@ And start the destination QEMU (we already have the source QEMU running -- discussed in the section: `Interacting with a QEMU instance`_) instance, with the following invocation. (As noted earlier, for simplicity's sake, the destination QEMU is started on the same host, but -it could be located elsewhere):: +it could be located elsewhere): + +.. parsed-literal:: - $ ./qemu-system-x86_64 -display none -no-user-config \ - -M q35 -nodefaults -m 512 \ - -blockdev node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \ - -device virtio-blk,drive=node-TargetDisk,id=virtio0 \ - -S -monitor stdio -qmp unix:./qmp-sock2,server=on,wait=off \ - -incoming tcp:localhost:6666 + $ |qemu_system| -display none -no-user-config -nodefaults \\ + -m 512 -blockdev \\ + node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \\ + -device virtio-blk,drive=node-TargetDisk,id=virtio0 \\ + -S -monitor stdio -qmp unix:./qmp-sock2,server=on,wait=off \\ + -incoming tcp:localhost:6666 Given the disk image chain on source QEMU:: @@ -777,7 +782,7 @@ the content of image [D]. } (6) [On *destination* QEMU] Finally, resume the guest vCPUs by issuing the - QMP command `cont`:: + QMP command ``cont``:: (QEMU) cont { @@ -924,19 +929,22 @@ Shutdown the guest, by issuing the ``quit`` QMP command:: } -Live disk backup --- ``drive-backup`` and ``blockdev-backup`` -------------------------------------------------------------- +Live disk backup --- ``blockdev-backup`` and the deprecated``drive-backup`` +--------------------------------------------------------------------------- -The ``drive-backup`` (and its newer equivalent ``blockdev-backup``) allows +The ``blockdev-backup`` (and the deprecated ``drive-backup``) allows you to create a point-in-time snapshot. -In this case, the point-in-time is when you *start* the ``drive-backup`` -(or its newer equivalent ``blockdev-backup``) command. +In this case, the point-in-time is when you *start* the +``blockdev-backup`` (or deprecated ``drive-backup``) command. QMP invocation for ``drive-backup`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note that ``drive-backup`` command is deprecated since QEMU 6.2 and +will be removed in future. + Yet again, starting afresh with our example disk image chain:: [A] <-- [B] <-- [C] <-- [D] @@ -961,11 +969,22 @@ will be issued, indicating the live block device job operation has completed, and no further action is required. +Moving from the deprecated ``drive-backup`` to newer ``blockdev-backup`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``blockdev-backup`` differs from ``drive-backup`` in how you specify +the backup target. With ``blockdev-backup`` you can't specify filename +as a target. Instead you use ``node-name`` of existing block node, +which you may add by ``blockdev-add`` or ``blockdev-create`` commands. +Correspondingly, ``blockdev-backup`` doesn't have ``mode`` and +``format`` arguments which don't apply to an existing block node. See +following sections for details and examples. + + Notes on ``blockdev-backup`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``blockdev-backup`` command is equivalent in functionality to -``drive-backup``, except that it operates at node-level in a Block Driver +The ``blockdev-backup`` command operates at node-level in a Block Driver State (BDS) graph. E.g. the sequence of actions to create a point-in-time backup diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index 10ce098a29b..bdb0f2a41ac 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -1,4 +1,4 @@ -Qemu supports the NBD protocol, and has an internal NBD client (see +QEMU supports the NBD protocol, and has an internal NBD client (see block/nbd.c), an internal NBD server (see blockdev-nbd.c), and an external NBD server tool (see qemu-nbd.c). The common code is placed in nbd/*. @@ -7,11 +7,11 @@ The NBD protocol is specified here: https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md The following paragraphs describe some specific properties of NBD -protocol realization in Qemu. +protocol realization in QEMU. = Metadata namespaces = -Qemu supports the "base:allocation" metadata context as defined in the +QEMU supports the "base:allocation" metadata context as defined in the NBD protocol specification, and also defines an additional metadata namespace "qemu". diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt index 0463f761efb..f7dc304ff69 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.txt @@ -313,7 +313,7 @@ The fields of the bitmaps extension are: The number of bitmaps contained in the image. Must be greater than or equal to 1. - Note: Qemu currently only supports up to 65535 bitmaps per + Note: QEMU currently only supports up to 65535 bitmaps per image. 4 - 7: Reserved, must be zero. @@ -775,7 +775,7 @@ Structure of a bitmap directory entry: 2: extra_data_compatible This flags is meaningful when the extra data is unknown to the software (currently any extra data is - unknown to Qemu). + unknown to QEMU). If it is set, the bitmap may be used as expected, extra data must be left as is. If it is not set, the bitmap must not be used, but @@ -793,7 +793,7 @@ Structure of a bitmap directory entry: 17: granularity_bits Granularity bits. Valid values: 0 - 63. - Note: Qemu currently supports only values 9 - 31. + Note: QEMU currently supports only values 9 - 31. Granularity is calculated as granularity = 1 << granularity_bits @@ -804,7 +804,7 @@ Structure of a bitmap directory entry: 18 - 19: name_size Size of the bitmap name. Must be non-zero. - Note: Qemu currently doesn't support values greater than + Note: QEMU currently doesn't support values greater than 1023. 20 - 23: extra_data_size diff --git a/docs/interop/qemu-ga-ref.rst b/docs/interop/qemu-ga-ref.rst index 3f1c4f908fa..032d4924552 100644 --- a/docs/interop/qemu-ga-ref.rst +++ b/docs/interop/qemu-ga-ref.rst @@ -1,13 +1,7 @@ QEMU Guest Agent Protocol Reference =================================== -.. - TODO: the old Texinfo manual used to note that this manual - is GPL-v2-or-later. We should make that reader-visible - both here and in our Sphinx manuals more generally. - -.. - TODO: display the QEMU version, both here and in our Sphinx manuals - more generally. +.. contents:: + :depth: 3 .. qapi-doc:: qga/qapi-schema.json diff --git a/docs/interop/qemu-qmp-ref.rst b/docs/interop/qemu-qmp-ref.rst index c8abaaf8e3e..357effd64f3 100644 --- a/docs/interop/qemu-qmp-ref.rst +++ b/docs/interop/qemu-qmp-ref.rst @@ -1,13 +1,7 @@ QEMU QMP Reference Manual ========================= -.. - TODO: the old Texinfo manual used to note that this manual - is GPL-v2-or-later. We should make that reader-visible - both here and in our Sphinx manuals more generally. - -.. - TODO: display the QEMU version, both here and in our Sphinx manuals - more generally. +.. contents:: + :depth: 3 .. qapi-doc:: qapi/qapi-schema.json diff --git a/docs/interop/qemu-storage-daemon-qmp-ref.rst b/docs/interop/qemu-storage-daemon-qmp-ref.rst index caf9dad23a7..9fed68152f5 100644 --- a/docs/interop/qemu-storage-daemon-qmp-ref.rst +++ b/docs/interop/qemu-storage-daemon-qmp-ref.rst @@ -1,13 +1,7 @@ QEMU Storage Daemon QMP Reference Manual ======================================== -.. - TODO: the old Texinfo manual used to note that this manual - is GPL-v2-or-later. We should make that reader-visible - both here and in our Sphinx manuals more generally. - -.. - TODO: display the QEMU version, both here and in our Sphinx manuals - more generally. +.. contents:: + :depth: 3 .. qapi-doc:: storage-daemon/qapi/qapi-schema.json diff --git a/docs/interop/vhost-user-gpu.rst b/docs/interop/vhost-user-gpu.rst index 3268bf405ce..71a2c52b313 100644 --- a/docs/interop/vhost-user-gpu.rst +++ b/docs/interop/vhost-user-gpu.rst @@ -2,9 +2,10 @@ Vhost-user-gpu Protocol ======================= -:Licence: This work is licensed under the terms of the GNU GPL, - version 2 or later. See the COPYING file in the top-level - directory. +.. + Licence: This work is licensed under the terms of the GNU GPL, + version 2 or later. See the COPYING file in the top-level + directory. .. contents:: Table of Contents diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index d6085f70452..edc3ad84a35 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -1,11 +1,15 @@ +.. _vhost_user_proto: + =================== Vhost-user Protocol =================== -:Copyright: 2014 Virtual Open Systems Sarl. -:Copyright: 2019 Intel Corporation -:Licence: This work is licensed under the terms of the GNU GPL, - version 2 or later. See the COPYING file in the top-level - directory. + +.. + Copyright 2014 Virtual Open Systems Sarl. + Copyright 2019 Intel Corporation + Licence: This work is licensed under the terms of the GNU GPL, + version 2 or later. See the COPYING file in the top-level + directory. .. contents:: Table of Contents diff --git a/docs/meson.build b/docs/meson.build index f84306ba7e0..27c6e156fff 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -9,7 +9,7 @@ endif # Check if tools are available to build documentation. build_docs = false if sphinx_build.found() - SPHINX_ARGS = ['env', 'CONFDIR=' + qemu_confdir, sphinx_build] + SPHINX_ARGS = ['env', 'CONFDIR=' + qemu_confdir, sphinx_build, '-q'] # If we're making warnings fatal, apply this to Sphinx runs as well if get_option('werror') SPHINX_ARGS += [ '-W' ] @@ -27,10 +27,9 @@ if sphinx_build.found() build_docs = (sphinx_build_test_out.returncode() == 0) if not build_docs - warning('@0@ is either too old or uses too old a Python version' - .format(sphinx_build.full_path())) + warning('@0@: @1@'.format(sphinx_build.full_path(), sphinx_build_test_out.stderr())) if get_option('docs').enabled() - error('Install a Python 3 version of python-sphinx') + error('Install a Python 3 version of python-sphinx and the readthedoc theme') endif endif endif @@ -38,14 +37,6 @@ endif if build_docs SPHINX_ARGS += ['-Dversion=' + meson.project_version(), '-Drelease=' + config_host['PKGVERSION']] - sphinx_extn_depends = [ meson.source_root() / 'docs/sphinx/depfile.py', - meson.source_root() / 'docs/sphinx/hxtool.py', - meson.source_root() / 'docs/sphinx/kerneldoc.py', - meson.source_root() / 'docs/sphinx/kernellog.py', - meson.source_root() / 'docs/sphinx/qapidoc.py', - meson.source_root() / 'docs/sphinx/qmp_lexer.py', - qapi_gen_depends ] - have_ga = have_tools and config_host.has_key('CONFIG_GUEST_AGENT') man_pages = { @@ -57,7 +48,7 @@ if build_docs 'qemu-nbd.8': (have_tools ? 'man8' : ''), 'qemu-pr-helper.8': (have_tools ? 'man8' : ''), 'qemu-storage-daemon.1': (have_tools ? 'man1' : ''), - 'qemu-trace-stap.1': (config_host.has_key('CONFIG_TRACE_SYSTEMTAP') ? 'man1' : ''), + 'qemu-trace-stap.1': (stap.found() ? 'man1' : ''), 'virtfs-proxy-helper.1': (have_virtfs_proxy_helper ? 'man1' : ''), 'virtiofsd.1': (have_virtiofsd ? 'man1' : ''), 'qemu.1': 'man1', @@ -77,7 +68,6 @@ if build_docs output: 'docs.stamp', input: files('conf.py'), depfile: 'docs.d', - depend_files: sphinx_extn_depends, command: [SPHINX_ARGS, '-Ddepfile=@DEPFILE@', '-Ddepfile_stamp=@OUTPUT0@', '-b', 'html', '-d', private_dir, diff --git a/docs/multiseat.txt b/docs/multiseat.txt index 11850c96ff8..2b297e979d6 100644 --- a/docs/multiseat.txt +++ b/docs/multiseat.txt @@ -123,7 +123,7 @@ Background info is here: guest side with pci-bridge-seat ------------------------------- -Qemu version 2.4 and newer has a new pci-bridge-seat device which +QEMU version 2.4 and newer has a new pci-bridge-seat device which can be used instead of pci-bridge. Just swap the device name in the qemu command line above. The only difference between the two devices is the pci id. We can match the pci id instead of the device path diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt index 0aae682be3e..fd7773dc5ab 100644 --- a/docs/nvdimm.txt +++ b/docs/nvdimm.txt @@ -15,7 +15,7 @@ backend (i.e. memory-backend-file and memory-backend-ram). A simple way to create a vNVDIMM device at startup time is done via the following command line options: - -machine pc,nvdimm + -machine pc,nvdimm=on -m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE -object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE,readonly=off -device nvdimm,id=nvdimm1,memdev=mem1,unarmed=off diff --git a/docs/pcie_pci_bridge.txt b/docs/pcie_pci_bridge.txt index ab35ebf3cae..1aa08fc5f0c 100644 --- a/docs/pcie_pci_bridge.txt +++ b/docs/pcie_pci_bridge.txt @@ -70,9 +70,9 @@ A detailed command line would be: [qemu-bin + storage options] \ -m 2G \ --device pcie-root-port,bus=pcie.0,id=rp1 \ --device pcie-root-port,bus=pcie.0,id=rp2 \ --device pcie-root-port,bus=pcie.0,id=rp3,bus-reserve=1 \ +-device pcie-root-port,bus=pcie.0,id=rp1,slot=1 \ +-device pcie-root-port,bus=pcie.0,id=rp2,slot=2 \ +-device pcie-root-port,bus=pcie.0,id=rp3,slot=3,bus-reserve=1 \ -device pcie-pci-bridge,id=br1,bus=rp1 \ -device pcie-pci-bridge,id=br2,bus=rp2 \ -device e1000,bus=br1,addr=8 diff --git a/docs/specs/_templates/editpage.html b/docs/specs/_templates/editpage.html deleted file mode 100644 index aaa468aa98d..00000000000 --- a/docs/specs/_templates/editpage.html +++ /dev/null @@ -1,5 +0,0 @@ -
- -
diff --git a/docs/specs/acpi_cpu_hotplug.rst b/docs/specs/acpi_cpu_hotplug.rst new file mode 100644 index 00000000000..351057c9676 --- /dev/null +++ b/docs/specs/acpi_cpu_hotplug.rst @@ -0,0 +1,235 @@ +QEMU<->ACPI BIOS CPU hotplug interface +====================================== + +QEMU supports CPU hotplug via ACPI. This document +describes the interface between QEMU and the ACPI BIOS. + +ACPI BIOS GPE.2 handler is dedicated for notifying OS about CPU hot-add +and hot-remove events. + + +Legacy ACPI CPU hotplug interface registers +------------------------------------------- + +CPU present bitmap for: + +- ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access) +- PIIX-PM (IO port 0xaf00-0xaf1f, 1-byte access) +- One bit per CPU. Bit position reflects corresponding CPU APIC ID. Read-only. +- The first DWORD in bitmap is used in write mode to switch from legacy + to modern CPU hotplug interface, write 0 into it to do switch. + +QEMU sets corresponding CPU bit on hot-add event and issues SCI +with GPE.2 event set. CPU present map is read by ACPI BIOS GPE.2 handler +to notify OS about CPU hot-add events. CPU hot-remove isn't supported. + + +Modern ACPI CPU hotplug interface registers +------------------------------------------- + +Register block base address: + +- ICH9-LPC IO port 0x0cd8 +- PIIX-PM IO port 0xaf00 + +Register block size: + +- ACPI_CPU_HOTPLUG_REG_LEN = 12 + +All accesses to registers described below, imply little-endian byte order. + +Reserved registers behavior: + +- write accesses are ignored +- read accesses return all bits set to 0. + +The last stored value in 'CPU selector' must refer to a possible CPU, otherwise + +- reads from any register return 0 +- writes to any other register are ignored until valid value is stored into it + +On QEMU start, 'CPU selector' is initialized to a valid value, on reset it +keeps the current value. + +Read access behavior +^^^^^^^^^^^^^^^^^^^^ + +offset [0x0-0x3] + Command data 2: (DWORD access) + + If value last stored in 'Command field' is: + + 0: + reads as 0x0 + 3: + upper 32 bits of architecture specific CPU ID value + other values: + reserved + +offset [0x4] + CPU device status fields: (1 byte access) + + bits: + + 0: + Device is enabled and may be used by guest + 1: + Device insert event, used to distinguish device for which + no device check event to OSPM was issued. + It's valid only when bit 0 is set. + 2: + Device remove event, used to distinguish device for which + no device eject request to OSPM was issued. Firmware must + ignore this bit. + 3: + reserved and should be ignored by OSPM + 4: + if set to 1, OSPM requests firmware to perform device eject. + 5-7: + reserved and should be ignored by OSPM + +offset [0x5-0x7] + reserved + +offset [0x8] + Command data: (DWORD access) + + If value last stored in 'Command field' is one of: + + 0: + contains 'CPU selector' value of a CPU with pending event[s] + 3: + lower 32 bits of architecture specific CPU ID value + (in x86 case: APIC ID) + otherwise: + contains 0 + +Write access behavior +^^^^^^^^^^^^^^^^^^^^^ + +offset [0x0-0x3] + CPU selector: (DWORD access) + + Selects active CPU device. All following accesses to other + registers will read/store data from/to selected CPU. + Valid values: [0 .. max_cpus) + +offset [0x4] + CPU device control fields: (1 byte access) + + bits: + + 0: + reserved, OSPM must clear it before writing to register. + 1: + if set to 1 clears device insert event, set by OSPM + after it has emitted device check event for the + selected CPU device + 2: + if set to 1 clears device remove event, set by OSPM + after it has emitted device eject request for the + selected CPU device. + 3: + if set to 1 initiates device eject, set by OSPM when it + triggers CPU device removal and calls _EJ0 method or by firmware + when bit #4 is set. In case bit #4 were set, it's cleared as + part of device eject. + 4: + if set to 1, OSPM hands over device eject to firmware. + Firmware shall issue device eject request as described above + (bit #3) and OSPM should not touch device eject bit (#3) in case + it's asked firmware to perform CPU device eject. + 5-7: + reserved, OSPM must clear them before writing to register + +offset[0x5] + Command field: (1 byte access) + + value: + + 0: + selects a CPU device with inserting/removing events and + following reads from 'Command data' register return + selected CPU ('CPU selector' value). + If no CPU with events found, the current 'CPU selector' doesn't + change and corresponding insert/remove event flags are not modified. + + 1: + following writes to 'Command data' register set OST event + register in QEMU + 2: + following writes to 'Command data' register set OST status + register in QEMU + 3: + following reads from 'Command data' and 'Command data 2' return + architecture specific CPU ID value for currently selected CPU. + other values: + reserved + +offset [0x6-0x7] + reserved + +offset [0x8] + Command data: (DWORD access) + + If last stored 'Command field' value is: + + 1: + stores value into OST event register + 2: + stores value into OST status register, triggers + ACPI_DEVICE_OST QMP event from QEMU to external applications + with current values of OST event and status registers. + other values: + reserved + +Typical usecases +---------------- + +(x86) Detecting and enabling modern CPU hotplug interface +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +QEMU starts with legacy CPU hotplug interface enabled. Detecting and +switching to modern interface is based on the 2 legacy CPU hotplug features: + +#. Writes into CPU bitmap are ignored. +#. CPU bitmap always has bit #0 set, corresponding to boot CPU. + +Use following steps to detect and enable modern CPU hotplug interface: + +#. Store 0x0 to the 'CPU selector' register, attempting to switch to modern mode +#. Store 0x0 to the 'CPU selector' register, to ensure valid selector value +#. Store 0x0 to the 'Command field' register +#. Read the 'Command data 2' register. + If read value is 0x0, the modern interface is enabled. + Otherwise legacy or no CPU hotplug interface available + +Get a cpu with pending event +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Store 0x0 to the 'CPU selector' register. +#. Store 0x0 to the 'Command field' register. +#. Read the 'CPU device status fields' register. +#. If both bit #1 and bit #2 are clear in the value read, there is no CPU + with a pending event and selected CPU remains unchanged. +#. Otherwise, read the 'Command data' register. The value read is the + selector of the CPU with the pending event (which is already selected). + +Enumerate CPUs present/non present CPUs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Set the present CPU count to 0. +#. Set the iterator to 0. +#. Store 0x0 to the 'CPU selector' register, to ensure that it's in + a valid state and that access to other registers won't be ignored. +#. Store 0x0 to the 'Command field' register to make 'Command data' + register return 'CPU selector' value of selected CPU +#. Read the 'CPU device status fields' register. +#. If bit #0 is set, increment the present CPU count. +#. Increment the iterator. +#. Store the iterator to the 'CPU selector' register. +#. Read the 'Command data' register. +#. If the value read is not zero, goto 05. +#. Otherwise store 0x0 to the 'CPU selector' register, to put it + into a valid state and exit. + The iterator at this point equals "max_cpus". diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt deleted file mode 100644 index 9bd59ae0dae..00000000000 --- a/docs/specs/acpi_cpu_hotplug.txt +++ /dev/null @@ -1,160 +0,0 @@ -QEMU<->ACPI BIOS CPU hotplug interface --------------------------------------- - -QEMU supports CPU hotplug via ACPI. This document -describes the interface between QEMU and the ACPI BIOS. - -ACPI BIOS GPE.2 handler is dedicated for notifying OS about CPU hot-add -and hot-remove events. - -============================================ -Legacy ACPI CPU hotplug interface registers: --------------------------------------------- -CPU present bitmap for: - ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access) - PIIX-PM (IO port 0xaf00-0xaf1f, 1-byte access) - One bit per CPU. Bit position reflects corresponding CPU APIC ID. Read-only. - The first DWORD in bitmap is used in write mode to switch from legacy - to modern CPU hotplug interface, write 0 into it to do switch. ---------------------------------------------------------------- -QEMU sets corresponding CPU bit on hot-add event and issues SCI -with GPE.2 event set. CPU present map is read by ACPI BIOS GPE.2 handler -to notify OS about CPU hot-add events. CPU hot-remove isn't supported. - -===================================== -Modern ACPI CPU hotplug interface registers: -------------------------------------- -Register block base address: - ICH9-LPC IO port 0x0cd8 - PIIX-PM IO port 0xaf00 -Register block size: - ACPI_CPU_HOTPLUG_REG_LEN = 12 - -All accesses to registers described below, imply little-endian byte order. - -Reserved resisters behavior: - - write accesses are ignored - - read accesses return all bits set to 0. - -The last stored value in 'CPU selector' must refer to a possible CPU, otherwise - - reads from any register return 0 - - writes to any other register are ignored until valid value is stored into it -On QEMU start, 'CPU selector' is initialized to a valid value, on reset it -keeps the current value. - -read access: - offset: - [0x0-0x3] Command data 2: (DWORD access) - if value last stored in 'Command field': - 0: reads as 0x0 - 3: upper 32 bits of architecture specific CPU ID value - other values: reserved - [0x4] CPU device status fields: (1 byte access) - bits: - 0: Device is enabled and may be used by guest - 1: Device insert event, used to distinguish device for which - no device check event to OSPM was issued. - It's valid only when bit 0 is set. - 2: Device remove event, used to distinguish device for which - no device eject request to OSPM was issued. Firmware must - ignore this bit. - 3: reserved and should be ignored by OSPM - 4: if set to 1, OSPM requests firmware to perform device eject. - 5-7: reserved and should be ignored by OSPM - [0x5-0x7] reserved - [0x8] Command data: (DWORD access) - contains 0 unless value last stored in 'Command field' is one of: - 0: contains 'CPU selector' value of a CPU with pending event[s] - 3: lower 32 bits of architecture specific CPU ID value - (in x86 case: APIC ID) - -write access: - offset: - [0x0-0x3] CPU selector: (DWORD access) - selects active CPU device. All following accesses to other - registers will read/store data from/to selected CPU. - Valid values: [0 .. max_cpus) - [0x4] CPU device control fields: (1 byte access) - bits: - 0: reserved, OSPM must clear it before writing to register. - 1: if set to 1 clears device insert event, set by OSPM - after it has emitted device check event for the - selected CPU device - 2: if set to 1 clears device remove event, set by OSPM - after it has emitted device eject request for the - selected CPU device. - 3: if set to 1 initiates device eject, set by OSPM when it - triggers CPU device removal and calls _EJ0 method or by firmware - when bit #4 is set. In case bit #4 were set, it's cleared as - part of device eject. - 4: if set to 1, OSPM hands over device eject to firmware. - Firmware shall issue device eject request as described above - (bit #3) and OSPM should not touch device eject bit (#3) in case - it's asked firmware to perform CPU device eject. - 5-7: reserved, OSPM must clear them before writing to register - [0x5] Command field: (1 byte access) - value: - 0: selects a CPU device with inserting/removing events and - following reads from 'Command data' register return - selected CPU ('CPU selector' value). - If no CPU with events found, the current 'CPU selector' doesn't - change and corresponding insert/remove event flags are not modified. - 1: following writes to 'Command data' register set OST event - register in QEMU - 2: following writes to 'Command data' register set OST status - register in QEMU - 3: following reads from 'Command data' and 'Command data 2' return - architecture specific CPU ID value for currently selected CPU. - other values: reserved - [0x6-0x7] reserved - [0x8] Command data: (DWORD access) - if last stored 'Command field' value: - 1: stores value into OST event register - 2: stores value into OST status register, triggers - ACPI_DEVICE_OST QMP event from QEMU to external applications - with current values of OST event and status registers. - other values: reserved - -Typical usecases: - - (x86) Detecting and enabling modern CPU hotplug interface. - QEMU starts with legacy CPU hotplug interface enabled. Detecting and - switching to modern interface is based on the 2 legacy CPU hotplug features: - 1. Writes into CPU bitmap are ignored. - 2. CPU bitmap always has bit#0 set, corresponding to boot CPU. - - Use following steps to detect and enable modern CPU hotplug interface: - 1. Store 0x0 to the 'CPU selector' register, - attempting to switch to modern mode - 2. Store 0x0 to the 'CPU selector' register, - to ensure valid selector value - 3. Store 0x0 to the 'Command field' register, - 4. Read the 'Command data 2' register. - If read value is 0x0, the modern interface is enabled. - Otherwise legacy or no CPU hotplug interface available - - - Get a cpu with pending event - 1. Store 0x0 to the 'CPU selector' register. - 2. Store 0x0 to the 'Command field' register. - 3. Read the 'CPU device status fields' register. - 4. If both bit#1 and bit#2 are clear in the value read, there is no CPU - with a pending event and selected CPU remains unchanged. - 5. Otherwise, read the 'Command data' register. The value read is the - selector of the CPU with the pending event (which is already - selected). - - - Enumerate CPUs present/non present CPUs - 01. Set the present CPU count to 0. - 02. Set the iterator to 0. - 03. Store 0x0 to the 'CPU selector' register, to ensure that it's in - a valid state and that access to other registers won't be ignored. - 04. Store 0x0 to the 'Command field' register to make 'Command data' - register return 'CPU selector' value of selected CPU - 05. Read the 'CPU device status fields' register. - 06. If bit#0 is set, increment the present CPU count. - 07. Increment the iterator. - 08. Store the iterator to the 'CPU selector' register. - 09. Read the 'Command data' register. - 10. If the value read is not zero, goto 05. - 11. Otherwise store 0x0 to the 'CPU selector' register, to put it - into a valid state and exit. - The iterator at this point equals "max_cpus". diff --git a/docs/specs/acpi_mem_hotplug.rst b/docs/specs/acpi_mem_hotplug.rst new file mode 100644 index 00000000000..069819bc3e0 --- /dev/null +++ b/docs/specs/acpi_mem_hotplug.rst @@ -0,0 +1,128 @@ +QEMU<->ACPI BIOS memory hotplug interface +========================================= + +ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add +and hot-remove events. + +Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access) +---------------------------------------------------------------- + +Read access behavior +^^^^^^^^^^^^^^^^^^^^ + +[0x0-0x3] + Lo part of memory device phys address +[0x4-0x7] + Hi part of memory device phys address +[0x8-0xb] + Lo part of memory device size in bytes +[0xc-0xf] + Hi part of memory device size in bytes +[0x10-0x13] + Memory device proximity domain +[0x14] + Memory device status fields + + bits: + + 0: + Device is enabled and may be used by guest + 1: + Device insert event, used to distinguish device for which + no device check event to OSPM was issued. + It's valid only when bit 1 is set. + 2: + Device remove event, used to distinguish device for which + no device eject request to OSPM was issued. + 3-7: + reserved and should be ignored by OSPM + +[0x15-0x17] + reserved + +Write access behavior +^^^^^^^^^^^^^^^^^^^^^ + + +[0x0-0x3] + Memory device slot selector, selects active memory device. + All following accesses to other registers in 0xa00-0xa17 + region will read/store data from/to selected memory device. +[0x4-0x7] + OST event code reported by OSPM +[0x8-0xb] + OST status code reported by OSPM +[0xc-0x13] + reserved, writes into it are ignored +[0x14] + Memory device control fields + + bits: + + 0: + reserved, OSPM must clear it before writing to register. + Due to BUG in versions prior 2.4 that field isn't cleared + when other fields are written. Keep it reserved and don't + try to reuse it. + 1: + if set to 1 clears device insert event, set by OSPM + after it has emitted device check event for the + selected memory device + 2: + if set to 1 clears device remove event, set by OSPM + after it has emitted device eject request for the + selected memory device + 3: + if set to 1 initiates device eject, set by OSPM when it + triggers memory device removal and calls _EJ0 method + 4-7: + reserved, OSPM must clear them before writing to register + +Selecting memory device slot beyond present range has no effect on platform: + +- write accesses to memory hot-plug registers not documented above are ignored +- read accesses to memory hot-plug registers not documented above return + all bits set to 1. + +Memory hot remove process diagram +--------------------------------- + +:: + + +-------------+ +-----------------------+ +------------------+ + | 1. QEMU | | 2. QEMU | |3. QEMU | + | device_del +---->+ device unplug request +----->+Send SCI to guest,| + | | | cb | |return control to | + | | | | |management | + +-------------+ +-----------------------+ +------------------+ + + +---------------------------------------------------------------------+ + + +---------------------+ +-------------------------+ + | OSPM: | remove event | OSPM: | + | send Eject Request, | | Scan memory devices | + | clear remove event +<-------------+ for event flags | + | | | | + +---------------------+ +-------------------------+ + | + | + +---------v--------+ +-----------------------+ + | Guest OS: | success | OSPM: | + | process Ejection +----------->+ Execute _EJ0 method, | + | request | | set eject bit in flags| + +------------------+ +-----------------------+ + |failure | + v v + +------------------------+ +-----------------------+ + | OSPM: | | QEMU: | + | set OST event & status | | call device unplug cb | + | fields | | | + +------------------------+ +-----------------------+ + | | + v v + +------------------+ +-------------------+ + |QEMU: | |QEMU: | + |Send OST QMP event| |Send device deleted| + | | |QMP event | + +------------------+ | | + +-------------------+ diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt deleted file mode 100644 index 3df3620ce42..00000000000 --- a/docs/specs/acpi_mem_hotplug.txt +++ /dev/null @@ -1,94 +0,0 @@ -QEMU<->ACPI BIOS memory hotplug interface --------------------------------------- - -ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add -and hot-remove events. - -Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): ---------------------------------------------------------------- -0xa00: - read access: - [0x0-0x3] Lo part of memory device phys address - [0x4-0x7] Hi part of memory device phys address - [0x8-0xb] Lo part of memory device size in bytes - [0xc-0xf] Hi part of memory device size in bytes - [0x10-0x13] Memory device proximity domain - [0x14] Memory device status fields - bits: - 0: Device is enabled and may be used by guest - 1: Device insert event, used to distinguish device for which - no device check event to OSPM was issued. - It's valid only when bit 1 is set. - 2: Device remove event, used to distinguish device for which - no device eject request to OSPM was issued. - 3-7: reserved and should be ignored by OSPM - [0x15-0x17] reserved - - write access: - [0x0-0x3] Memory device slot selector, selects active memory device. - All following accesses to other registers in 0xa00-0xa17 - region will read/store data from/to selected memory device. - [0x4-0x7] OST event code reported by OSPM - [0x8-0xb] OST status code reported by OSPM - [0xc-0x13] reserved, writes into it are ignored - [0x14] Memory device control fields - bits: - 0: reserved, OSPM must clear it before writing to register. - Due to BUG in versions prior 2.4 that field isn't cleared - when other fields are written. Keep it reserved and don't - try to reuse it. - 1: if set to 1 clears device insert event, set by OSPM - after it has emitted device check event for the - selected memory device - 2: if set to 1 clears device remove event, set by OSPM - after it has emitted device eject request for the - selected memory device - 3: if set to 1 initiates device eject, set by OSPM when it - triggers memory device removal and calls _EJ0 method - 4-7: reserved, OSPM must clear them before writing to register - -Selecting memory device slot beyond present range has no effect on platform: - - write accesses to memory hot-plug registers not documented above are - ignored - - read accesses to memory hot-plug registers not documented above return - all bits set to 1. - -Memory hot remove process diagram: ----------------------------------- - +-------------+     +-----------------------+      +------------------+      - |  1. QEMU    |     | 2. QEMU               |      |3. QEMU           |      - |  device_del +---->+ device unplug request +----->+Send SCI to guest,|      - |             |     |         cb            |      |return control to |      - +-------------+     +-----------------------+      |management        |      -                                                    +------------------+      -                                                                              - +---------------------------------------------------------------------+      -                                                                              - +---------------------+              +-------------------------+             - | OSPM:               | remove event | OSPM:                   |             - | send Eject Request, |              | Scan memory devices     |             - | clear remove event  +<-------------+ for event flags         |             - |                     |              |                         |             - +---------------------+              +-------------------------+             -           |                                                                  -           |                                                                  - +---------v--------+            +-----------------------+                    - | Guest OS:        |  success   | OSPM:                 |                    - | process Ejection +----------->+ Execute _EJ0 method,  |                    - | request          |            | set eject bit in flags|                    - +------------------+            +-----------------------+                    -           |failure                         |                                 -           v                                v                                 - +------------------------+      +-----------------------+                    - | OSPM:                  |      | QEMU:                 |                    - | set OST event & status |      | call device unplug cb |                    - | fields                 |      |                       |                    - +------------------------+      +-----------------------+                    -          |                                  |                                -          v                                  v                                - +------------------+              +-------------------+                      - |QEMU:             |              |QEMU:              |                      - |Send OST QMP event|              |Send device deleted|                      - |                  |              |QMP event          |                      - +------------------+              |                   |                      -                                   +-------------------+ diff --git a/docs/specs/acpi_nvdimm.rst b/docs/specs/acpi_nvdimm.rst new file mode 100644 index 00000000000..ab0335253d7 --- /dev/null +++ b/docs/specs/acpi_nvdimm.rst @@ -0,0 +1,228 @@ +QEMU<->ACPI BIOS NVDIMM interface +================================= + +QEMU supports NVDIMM via ACPI. This document describes the basic concepts of +NVDIMM ACPI and the interface between QEMU and the ACPI BIOS. + +NVDIMM ACPI Background +---------------------- + +NVDIMM is introduced in ACPI 6.0 which defines an NVDIMM root device under +_SB scope with a _HID of "ACPI0012". For each NVDIMM present or intended +to be supported by platform, platform firmware also exposes an ACPI +Namespace Device under the root device. + +The NVDIMM child devices under the NVDIMM root device are defined with _ADR +corresponding to the NFIT device handle. The NVDIMM root device and the +NVDIMM devices can have device specific methods (_DSM) to provide additional +functions specific to a particular NVDIMM implementation. + +This is an example from ACPI 6.0, a platform contains one NVDIMM:: + + Scope (\_SB){ + Device (NVDR) // Root device + { + Name (_HID, "ACPI0012") + Method (_STA) {...} + Method (_FIT) {...} + Method (_DSM, ...) {...} + Device (NVD) + { + Name(_ADR, h) //where h is NFIT Device Handle for this NVDIMM + Method (_DSM, ...) {...} + } + } + } + +Methods supported on both NVDIMM root device and NVDIMM device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +_DSM (Device Specific Method) + It is a control method that enables devices to provide device specific + control functions that are consumed by the device driver. + The NVDIMM DSM specification can be found at + http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf + + Arguments: + + Arg0 + A Buffer containing a UUID (16 Bytes) + Arg1 + An Integer containing the Revision ID (4 Bytes) + Arg2 + An Integer containing the Function Index (4 Bytes) + Arg3 + A package containing parameters for the function specified by the + UUID, Revision ID, and Function Index + + Return Value: + + If Function Index = 0, a Buffer containing a function index bitfield. + Otherwise, the return value and type depends on the UUID, revision ID + and function index which are described in the DSM specification. + +Methods on NVDIMM ROOT Device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +_FIT(Firmware Interface Table) + It evaluates to a buffer returning data in the format of a series of NFIT + Type Structure. + + Arguments: None + + Return Value: + A Buffer containing a list of NFIT Type structure entries. + + The detailed definition of the structure can be found at ACPI 6.0: 5.2.25 + NVDIMM Firmware Interface Table (NFIT). + +QEMU NVDIMM Implementation +-------------------------- + +QEMU uses 4 bytes IO Port starting from 0x0a18 and a RAM-based memory page +for NVDIMM ACPI. + +Memory: + QEMU uses BIOS Linker/loader feature to ask BIOS to allocate a memory + page and dynamically patch its address into an int32 object named "MEMA" + in ACPI. + + This page is RAM-based and it is used to transfer data between _DSM + method and QEMU. If ACPI has control, this pages is owned by ACPI which + writes _DSM input data to it, otherwise, it is owned by QEMU which + emulates _DSM access and writes the output data to it. + + ACPI writes _DSM Input Data (based on the offset in the page): + + [0x0 - 0x3] + 4 bytes, NVDIMM Device Handle. + + The handle is completely QEMU internal thing, the values in + range [1, 0xFFFF] indicate nvdimm device. Other values are + reserved for other purposes. + + Reserved handles: + + - 0 is reserved for nvdimm root device named NVDR. + - 0x10000 is reserved for QEMU internal DSM function called on + the root device. + + [0x4 - 0x7] + 4 bytes, Revision ID, that is the Arg1 of _DSM method. + + [0x8 - 0xB] + 4 bytes. Function Index, that is the Arg2 of _DSM method. + + [0xC - 0xFFF] + 4084 bytes, the Arg3 of _DSM method. + + QEMU writes Output Data (based on the offset in the page): + + [0x0 - 0x3] + 4 bytes, the length of result + + [0x4 - 0xFFF] + 4092 bytes, the DSM result filled by QEMU + +IO Port 0x0a18 - 0xa1b: + ACPI writes the address of the memory page allocated by BIOS to this + port then QEMU gets the control and fills the result in the memory page. + + Write Access: + + [0x0a18 - 0xa1b] + 4 bytes, the address of the memory page allocated by BIOS. + +_DSM process diagram +-------------------- + +"MEMA" indicates the address of memory page allocated by BIOS. + +:: + + +----------------------+ +-----------------------+ + | 1. OSPM | | 2. OSPM | + | save _DSM input data | | write "MEMA" to | Exit to QEMU + | to the page +----->| IO port 0x0a18 +------------+ + | indicated by "MEMA" | | | | + +----------------------+ +-----------------------+ | + | + v + +--------------------+ +-----------+ +------------------+--------+ + | 5 QEMU | | 4 QEMU | | 3. QEMU | + | write _DSM result | | emulate | | get _DSM input data from | + | to the page +<------+ _DSM +<-----+ the page indicated by the | + | | | | | value from the IO port | + +--------+-----------+ +-----------+ +---------------------------+ + | + | Enter Guest + | + v + +--------------------------+ +--------------+ + | 6 OSPM | | 7 OSPM | + | result size is returned | | _DSM return | + | by reading DSM +----->+ | + | result from the page | | | + +--------------------------+ +--------------+ + +NVDIMM hotplug +-------------- + +ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device +hot-add event. + +QEMU internal use only _DSM functions +------------------------------------- + +Read FIT +^^^^^^^^ + +_FIT method uses _DSM method to fetch NFIT structures blob from QEMU +in 1 page sized increments which are then concatenated and returned +as _FIT method result. + +Input parameters: + +Arg0 + UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62} +Arg1 + Revision ID (set to 1) +Arg2 + Function Index, 0x1 +Arg3 + A package containing a buffer whose layout is as follows: + + +----------+--------+--------+-------------------------------------------+ + | Field | Length | Offset | Description | + +----------+--------+--------+-------------------------------------------+ + | offset | 4 | 0 | offset in QEMU's NFIT structures blob to | + | | | | read from | + +----------+--------+--------+-------------------------------------------+ + +Output layout in the dsm memory page: + + +----------+--------+--------+-------------------------------------------+ + | Field | Length | Offset | Description | + +----------+--------+--------+-------------------------------------------+ + | length | 4 | 0 | length of entire returned data | + | | | | (including this header) | + +----------+--------+--------+-------------------------------------------+ + | | | | return status codes | + | | | | | + | | | | - 0x0 - success | + | | | | - 0x100 - error caused by NFIT update | + | status | 4 | 4 | while read by _FIT wasn't completed | + | | | | - other codes follow Chapter 3 in | + | | | | DSM Spec Rev1 | + +----------+--------+--------+-------------------------------------------+ + | fit data | Varies | 8 | contains FIT data. This field is present | + | | | | if status field is 0. | + +----------+--------+--------+-------------------------------------------+ + +The FIT offset is maintained by the OSPM itself, current offset plus +the size of the fit data returned by the function is the next offset +OSPM should read. When all FIT data has been read out, zero fit data +size is returned. + +If it returns status code 0x100, OSPM should restart to read FIT (read +from offset 0 again). diff --git a/docs/specs/acpi_nvdimm.txt b/docs/specs/acpi_nvdimm.txt deleted file mode 100644 index 3ec42ecbce4..00000000000 --- a/docs/specs/acpi_nvdimm.txt +++ /dev/null @@ -1,188 +0,0 @@ -QEMU<->ACPI BIOS NVDIMM interface ---------------------------------- - -QEMU supports NVDIMM via ACPI. This document describes the basic concepts of -NVDIMM ACPI and the interface between QEMU and the ACPI BIOS. - -NVDIMM ACPI Background ----------------------- -NVDIMM is introduced in ACPI 6.0 which defines an NVDIMM root device under -_SB scope with a _HID of “ACPI0012”. For each NVDIMM present or intended -to be supported by platform, platform firmware also exposes an ACPI -Namespace Device under the root device. - -The NVDIMM child devices under the NVDIMM root device are defined with _ADR -corresponding to the NFIT device handle. The NVDIMM root device and the -NVDIMM devices can have device specific methods (_DSM) to provide additional -functions specific to a particular NVDIMM implementation. - -This is an example from ACPI 6.0, a platform contains one NVDIMM: - -Scope (\_SB){ - Device (NVDR) // Root device - { - Name (_HID, “ACPI0012”) - Method (_STA) {...} - Method (_FIT) {...} - Method (_DSM, ...) {...} - Device (NVD) - { - Name(_ADR, h) //where h is NFIT Device Handle for this NVDIMM - Method (_DSM, ...) {...} - } - } -} - -Method supported on both NVDIMM root device and NVDIMM device -_DSM (Device Specific Method) - It is a control method that enables devices to provide device specific - control functions that are consumed by the device driver. - The NVDIMM DSM specification can be found at: - http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf - - Arguments: - Arg0 – A Buffer containing a UUID (16 Bytes) - Arg1 – An Integer containing the Revision ID (4 Bytes) - Arg2 – An Integer containing the Function Index (4 Bytes) - Arg3 – A package containing parameters for the function specified by the - UUID, Revision ID, and Function Index - - Return Value: - If Function Index = 0, a Buffer containing a function index bitfield. - Otherwise, the return value and type depends on the UUID, revision ID - and function index which are described in the DSM specification. - -Methods on NVDIMM ROOT Device -_FIT(Firmware Interface Table) - It evaluates to a buffer returning data in the format of a series of NFIT - Type Structure. - - Arguments: None - - Return Value: - A Buffer containing a list of NFIT Type structure entries. - - The detailed definition of the structure can be found at ACPI 6.0: 5.2.25 - NVDIMM Firmware Interface Table (NFIT). - -QEMU NVDIMM Implementation -========================== -QEMU uses 4 bytes IO Port starting from 0x0a18 and a RAM-based memory page -for NVDIMM ACPI. - -Memory: - QEMU uses BIOS Linker/loader feature to ask BIOS to allocate a memory - page and dynamically patch its address into an int32 object named "MEMA" - in ACPI. - - This page is RAM-based and it is used to transfer data between _DSM - method and QEMU. If ACPI has control, this pages is owned by ACPI which - writes _DSM input data to it, otherwise, it is owned by QEMU which - emulates _DSM access and writes the output data to it. - - ACPI writes _DSM Input Data (based on the offset in the page): - [0x0 - 0x3]: 4 bytes, NVDIMM Device Handle. - - The handle is completely QEMU internal thing, the values in - range [1, 0xFFFF] indicate nvdimm device. Other values are - reserved for other purposes. - - Reserved handles: - 0 is reserved for nvdimm root device named NVDR. - 0x10000 is reserved for QEMU internal DSM function called on - the root device. - - [0x4 - 0x7]: 4 bytes, Revision ID, that is the Arg1 of _DSM method. - [0x8 - 0xB]: 4 bytes. Function Index, that is the Arg2 of _DSM method. - [0xC - 0xFFF]: 4084 bytes, the Arg3 of _DSM method. - - QEMU Writes Output Data (based on the offset in the page): - [0x0 - 0x3]: 4 bytes, the length of result - [0x4 - 0xFFF]: 4092 bytes, the DSM result filled by QEMU - -IO Port 0x0a18 - 0xa1b: - ACPI writes the address of the memory page allocated by BIOS to this - port then QEMU gets the control and fills the result in the memory page. - - write Access: - [0x0a18 - 0xa1b]: 4 bytes, the address of the memory page allocated - by BIOS. - -_DSM process diagram: ---------------------- -"MEMA" indicates the address of memory page allocated by BIOS. - - +----------------------+   +-----------------------+ - |   1. OSPM   |      | 2. OSPM | - | save _DSM input data | | write "MEMA" to | Exit to QEMU - | to the page +----->| IO port 0x0a18 +------------+ - | indicated by "MEMA" | | | | - +----------------------+ +-----------------------+ | -  | -  v - +------------- ----+ +-----------+ +------------------+--------+ - | 5 QEMU | | 4 QEMU | | 3. QEMU | - | write _DSM result | | emulate | | get _DSM input data from | - | to the page +<------+ _DSM +<-----+ the page indicated by the | - | | | | | value from the IO port | - +--------+-----------+ +-----------+ +---------------------------+ - | - | Enter Guest - | - v - +--------------------------+ +--------------+ - | 6 OSPM | | 7 OSPM | - | result size is returned | | _DSM return | - | by reading DSM +----->+ | - | result from the page | | | - +--------------------------+ +--------------+ - -NVDIMM hotplug --------------- -ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device -hot-add event. - -QEMU internal use only _DSM function ------------------------------------- -1) Read FIT - _FIT method uses _DSM method to fetch NFIT structures blob from QEMU - in 1 page sized increments which are then concatenated and returned - as _FIT method result. - - Input parameters: - Arg0 – UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62} - Arg1 – Revision ID (set to 1) - Arg2 - Function Index, 0x1 - Arg3 - A package containing a buffer whose layout is as follows: - - +----------+--------+--------+-------------------------------------------+ - | Field | Length | Offset | Description | - +----------+--------+--------+-------------------------------------------+ - | offset | 4 | 0 | offset in QEMU's NFIT structures blob to | - | | | | read from | - +----------+--------+--------+-------------------------------------------+ - - Output layout in the dsm memory page: - +----------+--------+--------+-------------------------------------------+ - | Field | Length | Offset | Description | - +----------+--------+--------+-------------------------------------------+ - | length | 4 | 0 | length of entire returned data | - | | | | (including this header) | - +----------+-----------------+-------------------------------------------+ - | | | | return status codes | - | | | | 0x0 - success | - | | | | 0x100 - error caused by NFIT update while | - | status | 4 | 4 | read by _FIT wasn't completed, other | - | | | | codes follow Chapter 3 in DSM Spec Rev1 | - +----------+-----------------+-------------------------------------------+ - | fit data | Varies | 8 | contains FIT data, this field is present | - | | | | if status field is 0; | - +----------+--------+--------+-------------------------------------------+ - - The FIT offset is maintained by the OSPM itself, current offset plus - the size of the fit data returned by the function is the next offset - OSPM should read. When all FIT data has been read out, zero fit data - size is returned. - - If it returns status code 0x100, OSPM should restart to read FIT (read - from offset 0 again). diff --git a/docs/specs/acpi_pci_hotplug.rst b/docs/specs/acpi_pci_hotplug.rst new file mode 100644 index 00000000000..685bc5c322f --- /dev/null +++ b/docs/specs/acpi_pci_hotplug.rst @@ -0,0 +1,48 @@ +QEMU<->ACPI BIOS PCI hotplug interface +====================================== + +QEMU supports PCI hotplug via ACPI, for PCI bus 0. This document +describes the interface between QEMU and the ACPI BIOS. + +ACPI GPE block (IO ports 0xafe0-0xafe3, byte access) +---------------------------------------------------- + +Generic ACPI GPE block. Bit 1 (GPE.1) used to notify PCI hotplug/eject +event to ACPI BIOS, via SCI interrupt. + +PCI slot injection notification pending (IO port 0xae00-0xae03, 4-byte access) +------------------------------------------------------------------------------ + +Slot injection notification pending. One bit per slot. + +Read by ACPI BIOS GPE.1 handler to notify OS of injection +events. Read-only. + +PCI slot removal notification (IO port 0xae04-0xae07, 4-byte access) +-------------------------------------------------------------------- + +Slot removal notification pending. One bit per slot. + +Read by ACPI BIOS GPE.1 handler to notify OS of removal +events. Read-only. + +PCI device eject (IO port 0xae08-0xae0b, 4-byte access) +------------------------------------------------------- + +Write: Used by ACPI BIOS _EJ0 method to request device removal. +One bit per slot. + +Read: Hotplug features register. Used by platform to identify features +available. Current base feature set (no bits set): + +- Read-only "up" register @0xae00, 4-byte access, bit per slot +- Read-only "down" register @0xae04, 4-byte access, bit per slot +- Read/write "eject" register @0xae08, 4-byte access, + write: bit per slot eject, read: hotplug feature set +- Read-only hotplug capable register @0xae0c, 4-byte access, bit per slot + +PCI removability status (IO port 0xae0c-0xae0f, 4-byte access) +-------------------------------------------------------------- + +Used by ACPI BIOS _RMV method to indicate removability status to OS. One +bit per slot. Read-only. diff --git a/docs/specs/acpi_pci_hotplug.txt b/docs/specs/acpi_pci_hotplug.txt deleted file mode 100644 index a839434f313..00000000000 --- a/docs/specs/acpi_pci_hotplug.txt +++ /dev/null @@ -1,45 +0,0 @@ -QEMU<->ACPI BIOS PCI hotplug interface --------------------------------------- - -QEMU supports PCI hotplug via ACPI, for PCI bus 0. This document -describes the interface between QEMU and the ACPI BIOS. - -ACPI GPE block (IO ports 0xafe0-0xafe3, byte access): ------------------------------------------ - -Generic ACPI GPE block. Bit 1 (GPE.1) used to notify PCI hotplug/eject -event to ACPI BIOS, via SCI interrupt. - -PCI slot injection notification pending (IO port 0xae00-0xae03, 4-byte access): ---------------------------------------------------------------- -Slot injection notification pending. One bit per slot. - -Read by ACPI BIOS GPE.1 handler to notify OS of injection -events. Read-only. - -PCI slot removal notification (IO port 0xae04-0xae07, 4-byte access): ------------------------------------------------------ -Slot removal notification pending. One bit per slot. - -Read by ACPI BIOS GPE.1 handler to notify OS of removal -events. Read-only. - -PCI device eject (IO port 0xae08-0xae0b, 4-byte access): ----------------------------------------- - -Write: Used by ACPI BIOS _EJ0 method to request device removal. -One bit per slot. - -Read: Hotplug features register. Used by platform to identify features -available. Current base feature set (no bits set): - - Read-only "up" register @0xae00, 4-byte access, bit per slot - - Read-only "down" register @0xae04, 4-byte access, bit per slot - - Read/write "eject" register @0xae08, 4-byte access, - write: bit per slot eject, read: hotplug feature set - - Read-only hotplug capable register @0xae0c, 4-byte access, bit per slot - -PCI removability status (IO port 0xae0c-0xae0f, 4-byte access): ------------------------------------------------ - -Used by ACPI BIOS _RMV method to indicate removability status to OS. One -bit per slot. Read-only diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 7b08314d334..ecc43896bb2 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -1,11 +1,9 @@ -.. This is the top level page for the 'specs' manual - - +---------------------------------------------- System Emulation Guest Hardware Specifications -============================================== - +---------------------------------------------- -Contents: +This section of the manual contains specifications of +guest hardware that is specific to QEMU. .. toctree:: :maxdepth: 2 @@ -16,3 +14,7 @@ Contents: acpi_hw_reduced_hotplug tpm acpi_hest_ghes + acpi_cpu_hotplug + acpi_mem_hotplug + acpi_pci_hotplug + acpi_nvdimm diff --git a/docs/sphinx-static/custom.js b/docs/sphinx-static/custom.js new file mode 100644 index 00000000000..71a86053051 --- /dev/null +++ b/docs/sphinx-static/custom.js @@ -0,0 +1,9 @@ +document.addEventListener('keydown', (event) => { + // find a better way to look it up? + let search_input = document.getElementsByName('q')[0]; + + if (event.code === 'KeyS' && document.activeElement !== search_input) { + event.preventDefault(); + search_input.focus(); + } +}); diff --git a/docs/sphinx-static/theme_overrides.css b/docs/sphinx-static/theme_overrides.css new file mode 100644 index 00000000000..c70ef951286 --- /dev/null +++ b/docs/sphinx-static/theme_overrides.css @@ -0,0 +1,161 @@ +/* -*- coding: utf-8; mode: css -*- + * + * Sphinx HTML theme customization: read the doc + * Based on Linux Documentation/sphinx-static/theme_overrides.css + */ + +/* Improve contrast and increase size for easier reading. */ + +body { + font-family: serif; + color: black; + font-size: 100%; +} + +h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend { + font-family: sans-serif; +} + +.rst-content dl:not(.docutils) dt { + border-top: none; + border-left: solid 3px #ccc; + background-color: #f0f0f0; + color: black; +} + +.wy-nav-top { + background: #802400; +} + +.wy-side-nav-search input[type="text"] { + border-color: #f60; +} + +.wy-menu-vertical p.caption { + color: white; +} + +.wy-menu-vertical li.current a { + color: #505050; +} + +.wy-menu-vertical li.on a, .wy-menu-vertical li.current > a { + color: #303030; +} + +.fa-gitlab { + box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2), 0 3px 10px 0 rgba(0,0,0,0.19); + border-radius: 5px; +} + +div[class^="highlight"] pre { + font-family: monospace; + color: black; + font-size: 100%; +} + +.wy-menu-vertical { + font-family: sans-serif; +} + +.c { + font-style: normal; +} + +p { + font-size: 100%; +} + +/* Interim: Code-blocks with line nos - lines and line numbers don't line up. + * see: https://github.com/rtfd/sphinx_rtd_theme/issues/419 + */ + +div[class^="highlight"] pre { + line-height: normal; +} +.rst-content .highlight > pre { + line-height: normal; +} + +/* Keep fields from being strangely far apart due to inheirited table CSS. */ +.rst-content table.field-list th.field-name { + padding-top: 1px; + padding-bottom: 1px; +} +.rst-content table.field-list td.field-body { + padding-top: 1px; + padding-bottom: 1px; +} + +@media screen { + + /* content column + * + * RTD theme's default is 800px as max width for the content, but we have + * tables with tons of columns, which need the full width of the view-port. + */ + + .wy-nav-content{max-width: none; } + + /* table: + * + * - Sequences of whitespace should collapse into a single whitespace. + * - make the overflow auto (scrollbar if needed) + * - align caption "left" ("center" is unsuitable on vast tables) + */ + + .wy-table-responsive table td { white-space: normal; } + .wy-table-responsive { overflow: auto; } + .rst-content table.docutils caption { text-align: left; font-size: 100%; } + + /* captions: + * + * - captions should have 100% (not 85%) font size + * - hide the permalink symbol as long as link is not hovered + */ + + .toc-title { + font-size: 150%; + font-weight: bold; + } + + caption, .wy-table caption, .rst-content table.field-list caption { + font-size: 100%; + } + caption a.headerlink { opacity: 0; } + caption a.headerlink:hover { opacity: 1; } + + /* Menu selection and keystrokes */ + + span.menuselection { + color: blue; + font-family: "Courier New", Courier, monospace + } + + code.kbd, code.kbd span { + color: white; + background-color: darkblue; + font-weight: bold; + font-family: "Courier New", Courier, monospace + } + + /* fix bottom margin of lists items */ + + .rst-content .section ul li:last-child, .rst-content .section ul li p:last-child { + margin-bottom: 12px; + } + + /* inline literal: drop the borderbox, padding and red color */ + + code, .rst-content tt, .rst-content code { + color: inherit; + border: none; + padding: unset; + background: inherit; + font-size: 85%; + } + + .rst-content tt.literal,.rst-content tt.literal,.rst-content code.literal { + color: inherit; + } +} diff --git a/docs/sphinx/depfile.py b/docs/sphinx/depfile.py index 277fdf0f568..afdcbcec6e7 100644 --- a/docs/sphinx/depfile.py +++ b/docs/sphinx/depfile.py @@ -12,6 +12,8 @@ import os import sphinx +import sys +from pathlib import Path __version__ = '1.0' @@ -20,8 +22,21 @@ def get_infiles(env): yield env.doc2path(x) yield from ((os.path.join(env.srcdir, dep) for dep in env.dependencies[x])) + for mod in sys.modules.values(): + if hasattr(mod, '__file__'): + if mod.__file__: + yield mod.__file__ + # this is perhaps going to include unused files: + for static_path in env.config.html_static_path + env.config.templates_path: + for path in Path(static_path).rglob('*'): + yield str(path) -def write_depfile(app, env): + +def write_depfile(app, exception): + if exception: + return + + env = app.env if not env.config.depfile: return @@ -42,7 +57,7 @@ def write_depfile(app, env): def setup(app): app.add_config_value('depfile', None, 'env') app.add_config_value('depfile_stamp', None, 'env') - app.connect('env-updated', write_depfile) + app.connect('build-finished', write_depfile) return dict( version = __version__, diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py index b7a2d39c105..d791b594923 100644 --- a/docs/sphinx/qapidoc.py +++ b/docs/sphinx/qapidoc.py @@ -34,7 +34,8 @@ from sphinx.util.nodes import nested_parse_with_titles import sphinx from qapi.gen import QAPISchemaVisitor -from qapi.schema import QAPIError, QAPISemError, QAPISchema +from qapi.error import QAPIError, QAPISemError +from qapi.schema import QAPISchema # Sphinx up to 1.6 uses AutodocReporter; 1.7 and later @@ -111,17 +112,19 @@ def _make_section(self, title): def _nodes_for_ifcond(self, ifcond, with_if=True): """Return list of Text, literal nodes for the ifcond - Return a list which gives text like ' (If: cond1, cond2, cond3)', where - the conditions are in literal-text and the commas are not. + Return a list which gives text like ' (If: condition)'. If with_if is False, we don't return the "(If: " and ")". """ - condlist = intersperse([nodes.literal('', c) for c in ifcond], - nodes.Text(', ')) + + doc = ifcond.docgen() + if not doc: + return [] + doc = nodes.literal('', doc) if not with_if: - return condlist + return [doc] nodelist = [nodes.Text(' ('), nodes.strong('', 'If: ')] - nodelist.extend(condlist) + nodelist.append(doc) nodelist.append(nodes.Text(')')) return nodelist @@ -138,7 +141,7 @@ def _nodes_for_one_member(self, member): term.append(nodes.literal('', member.type.doc_type())) if member.optional: term.append(nodes.Text(' (optional)')) - if member.ifcond: + if member.ifcond.is_present(): term.extend(self._nodes_for_ifcond(member.ifcond)) return term @@ -153,7 +156,7 @@ def _nodes_for_variant_when(self, variants, variant): nodes.literal('', variants.tag_member.name), nodes.Text(' is '), nodes.literal('', '"%s"' % variant.name)] - if variant.ifcond: + if variant.ifcond.is_present(): term.extend(self._nodes_for_ifcond(variant.ifcond)) return term @@ -208,7 +211,7 @@ def _nodes_for_enum_values(self, doc): dlnode = nodes.definition_list() for section in doc.args.values(): termtext = [nodes.literal('', section.member.name)] - if section.member.ifcond: + if section.member.ifcond.is_present(): termtext.extend(self._nodes_for_ifcond(section.member.ifcond)) # TODO drop fallbacks when undocumented members are outlawed if section.text: @@ -276,7 +279,7 @@ def _nodes_for_sections(self, doc): def _nodes_for_if_section(self, ifcond): """Return list of doctree nodes for the "If" section""" nodelist = [] - if ifcond: + if ifcond.is_present(): snode = self._make_section('If') snode += nodes.paragraph( '', '', *self._nodes_for_ifcond(ifcond, with_if=False) diff --git a/docs/system/_templates/editpage.html b/docs/system/_templates/editpage.html deleted file mode 100644 index 6586b2e2579..00000000000 --- a/docs/system/_templates/editpage.html +++ /dev/null @@ -1,5 +0,0 @@ -
- -
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst index d1fb8f25b39..cec87e3743d 100644 --- a/docs/system/arm/aspeed.rst +++ b/docs/system/arm/aspeed.rst @@ -5,7 +5,7 @@ The QEMU Aspeed machines model BMCs of various OpenPOWER systems and Aspeed evaluation boards. They are based on different releases of the Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600 -with dual cores ARM Cortex A7 CPUs (1.2GHz). +with dual cores ARM Cortex-A7 CPUs (1.2GHz). The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C, etc. @@ -13,6 +13,7 @@ etc. AST2400 SoC based machines : - ``palmetto-bmc`` OpenPOWER Palmetto POWER8 BMC +- ``quanta-q71l-bmc`` OpenBMC Quanta BMC AST2500 SoC based machines : @@ -24,7 +25,7 @@ AST2500 SoC based machines : AST2600 SoC based machines : -- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex A7) +- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex-A7) - ``tacoma-bmc`` OpenPOWER Witherspoon POWER9 AST2600 BMC Supported devices @@ -49,6 +50,7 @@ Supported devices * Ethernet controllers * Front LEDs (PCA9552 on I2C bus) * LPC Peripheral Controller (a subset of subdevices are supported) + * Hash/Crypto Engine (HACE) - Hash support only. TODO: HMAC and RSA Missing devices @@ -59,7 +61,6 @@ Missing devices * PWM and Fan Controller * Slave GPIO Controller * Super I/O Controller - * Hash/Crypto Engine * PCI-Express 1 Controller * Graphic Display Controller * PECI Controller diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst index c455442eaf5..584eb170974 100644 --- a/docs/system/arm/cpu-features.rst +++ b/docs/system/arm/cpu-features.rst @@ -10,22 +10,22 @@ is the Performance Monitoring Unit (PMU). CPU types such as the Cortex-A15 and the Cortex-A57, which respectively implement Arm architecture reference manuals ARMv7-A and ARMv8-A, may both optionally implement PMUs. For example, if a user wants to use a Cortex-A15 without -a PMU, then the `-cpu` parameter should contain `pmu=off` on the QEMU -command line, i.e. `-cpu cortex-a15,pmu=off`. +a PMU, then the ``-cpu`` parameter should contain ``pmu=off`` on the QEMU +command line, i.e. ``-cpu cortex-a15,pmu=off``. As not all CPU types support all optional CPU features, then whether or not a CPU property exists depends on the CPU type. For example, CPUs that implement the ARMv8-A architecture reference manual may optionally support the AArch32 CPU feature, which may be enabled by disabling the -`aarch64` CPU property. A CPU type such as the Cortex-A15, which does -not implement ARMv8-A, will not have the `aarch64` CPU property. +``aarch64`` CPU property. A CPU type such as the Cortex-A15, which does +not implement ARMv8-A, will not have the ``aarch64`` CPU property. QEMU's support may be limited for some CPU features, only partially supporting the feature or only supporting the feature under certain -configurations. For example, the `aarch64` CPU feature, which, when +configurations. For example, the ``aarch64`` CPU feature, which, when disabled, enables the optional AArch32 CPU feature, is only supported when using the KVM accelerator and when running on a host CPU type that -supports the feature. While `aarch64` currently only works with KVM, +supports the feature. While ``aarch64`` currently only works with KVM, it could work with TCG. CPU features that are specific to KVM are prefixed with "kvm-" and are described in "KVM VCPU Features". @@ -33,12 +33,12 @@ CPU Feature Probing =================== Determining which CPU features are available and functional for a given -CPU type is possible with the `query-cpu-model-expansion` QMP command. -Below are some examples where `scripts/qmp/qmp-shell` (see the top comment +CPU type is possible with the ``query-cpu-model-expansion`` QMP command. +Below are some examples where ``scripts/qmp/qmp-shell`` (see the top comment block in the script for usage) is used to issue the QMP commands. -1. Determine which CPU features are available for the `max` CPU type - (Note, we started QEMU with qemu-system-aarch64, so `max` is +1. Determine which CPU features are available for the ``max`` CPU type + (Note, we started QEMU with qemu-system-aarch64, so ``max`` is implementing the ARMv8-A reference manual in this case):: (QEMU) query-cpu-model-expansion type=full model={"name":"max"} @@ -51,9 +51,9 @@ block in the script for usage) is used to issue the QMP commands. "sve896": true, "sve1280": true, "sve2048": true }}}} -We see that the `max` CPU type has the `pmu`, `aarch64`, `sve`, and many -`sve` CPU features. We also see that all the CPU features are -enabled, as they are all `true`. (The `sve` CPU features are all +We see that the ``max`` CPU type has the ``pmu``, ``aarch64``, ``sve``, and many +``sve`` CPU features. We also see that all the CPU features are +enabled, as they are all ``true``. (The ``sve`` CPU features are all optional SVE vector lengths (see "SVE CPU Properties"). While with TCG all SVE vector lengths can be supported, when KVM is in use it's more likely that only a few lengths will be supported, if SVE is supported at @@ -71,9 +71,9 @@ all.) "sve896": true, "sve1280": true, "sve2048": true }}}} -We see it worked, as `pmu` is now `false`. +We see it worked, as ``pmu`` is now ``false``. -(3) Let's try to disable `aarch64`, which enables the AArch32 CPU feature:: +(3) Let's try to disable ``aarch64``, which enables the AArch32 CPU feature:: (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"aarch64":false}} {"error": { @@ -84,7 +84,7 @@ We see it worked, as `pmu` is now `false`. It looks like this feature is limited to a configuration we do not currently have. -(4) Let's disable `sve` and see what happens to all the optional SVE +(4) Let's disable ``sve`` and see what happens to all the optional SVE vector lengths:: (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"sve":false}} @@ -97,14 +97,14 @@ currently have. "sve896": false, "sve1280": false, "sve2048": false }}}} -As expected they are now all `false`. +As expected they are now all ``false``. (5) Let's try probing CPU features for the Cortex-A15 CPU type:: (QEMU) query-cpu-model-expansion type=full model={"name":"cortex-a15"} {"return": {"model": {"name": "cortex-a15", "props": {"pmu": true}}}} -Only the `pmu` CPU feature is available. +Only the ``pmu`` CPU feature is available. A note about CPU feature dependencies ------------------------------------- @@ -123,29 +123,29 @@ A note about CPU models and KVM ------------------------------- Named CPU models generally do not work with KVM. There are a few cases -that do work, e.g. using the named CPU model `cortex-a57` with KVM on a -seattle host, but mostly if KVM is enabled the `host` CPU type must be +that do work, e.g. using the named CPU model ``cortex-a57`` with KVM on a +seattle host, but mostly if KVM is enabled the ``host`` CPU type must be used. This means the guest is provided all the same CPU features as the -host CPU type has. And, for this reason, the `host` CPU type should +host CPU type has. And, for this reason, the ``host`` CPU type should enable all CPU features that the host has by default. Indeed it's even a bit strange to allow disabling CPU features that the host has when using -the `host` CPU type, but in the absence of CPU models it's the best we can +the ``host`` CPU type, but in the absence of CPU models it's the best we can do if we want to launch guests without all the host's CPU features enabled. -Enabling KVM also affects the `query-cpu-model-expansion` QMP command. The +Enabling KVM also affects the ``query-cpu-model-expansion`` QMP command. The affect is not only limited to specific features, as pointed out in example (3) of "CPU Feature Probing", but also to which CPU types may be expanded. -When KVM is enabled, only the `max`, `host`, and current CPU type may be +When KVM is enabled, only the ``max``, ``host``, and current CPU type may be expanded. This restriction is necessary as it's not possible to know all CPU types that may work with KVM, but it does impose a small risk of users experiencing unexpected errors. For example on a seattle, as mentioned -above, the `cortex-a57` CPU type is also valid when KVM is enabled. -Therefore a user could use the `host` CPU type for the current type, but -then attempt to query `cortex-a57`, however that query will fail with our +above, the ``cortex-a57`` CPU type is also valid when KVM is enabled. +Therefore a user could use the ``host`` CPU type for the current type, but +then attempt to query ``cortex-a57``, however that query will fail with our restrictions. This shouldn't be an issue though as management layers and -users have been preferring the `host` CPU type for use with KVM for quite +users have been preferring the ``host`` CPU type for use with KVM for quite some time. Additionally, if the KVM-enabled QEMU instance running on a -seattle host is using the `cortex-a57` CPU type, then querying `cortex-a57` +seattle host is using the ``cortex-a57`` CPU type, then querying ``cortex-a57`` will work. Using CPU Features @@ -158,12 +158,12 @@ QEMU command line with that CPU type:: $ qemu-system-aarch64 -M virt -cpu max,pmu=off,sve=on,sve128=on,sve256=on The example above disables the PMU and enables the first two SVE vector -lengths for the `max` CPU type. Note, the `sve=on` isn't actually -necessary, because, as we observed above with our probe of the `max` CPU -type, `sve` is already on by default. Also, based on our probe of +lengths for the ``max`` CPU type. Note, the ``sve=on`` isn't actually +necessary, because, as we observed above with our probe of the ``max`` CPU +type, ``sve`` is already on by default. Also, based on our probe of defaults, it would seem we need to disable many SVE vector lengths, rather than only enabling the two we want. This isn't the case, because, as -disabling many SVE vector lengths would be quite verbose, the `sve` CPU +disabling many SVE vector lengths would be quite verbose, the ``sve`` CPU properties have special semantics (see "SVE CPU Property Parsing Semantics"). @@ -217,11 +217,11 @@ TCG VCPU Features TCG VCPU features are CPU features that are specific to TCG. Below is the list of TCG VCPU features and their descriptions. - pauth Enable or disable `FEAT_Pauth`, pointer + pauth Enable or disable ``FEAT_Pauth``, pointer authentication. By default, the feature is - enabled with `-cpu max`. + enabled with ``-cpu max``. - pauth-impdef When `FEAT_Pauth` is enabled, either the + pauth-impdef When ``FEAT_Pauth`` is enabled, either the *impdef* (Implementation Defined) algorithm is enabled or the *architected* QARMA algorithm is enabled. By default the impdef algorithm @@ -235,49 +235,49 @@ Below is the list of TCG VCPU features and their descriptions. SVE CPU Properties ================== -There are two types of SVE CPU properties: `sve` and `sve`. The first -is used to enable or disable the entire SVE feature, just as the `pmu` +There are two types of SVE CPU properties: ``sve`` and ``sve``. The first +is used to enable or disable the entire SVE feature, just as the ``pmu`` CPU property completely enables or disables the PMU. The second type -is used to enable or disable specific vector lengths, where `N` is the -number of bits of the length. The `sve` CPU properties have special +is used to enable or disable specific vector lengths, where ``N`` is the +number of bits of the length. The ``sve`` CPU properties have special dependencies and constraints, see "SVE CPU Property Dependencies and Constraints" below. Additionally, as we want all supported vector lengths to be enabled by default, then, in order to avoid overly verbose command -lines (command lines full of `sve=off`, for all `N` not wanted), we +lines (command lines full of ``sve=off``, for all ``N`` not wanted), we provide the parsing semantics listed in "SVE CPU Property Parsing Semantics". SVE CPU Property Dependencies and Constraints --------------------------------------------- - 1) At least one vector length must be enabled when `sve` is enabled. + 1) At least one vector length must be enabled when ``sve`` is enabled. - 2) If a vector length `N` is enabled, then, when KVM is enabled, all + 2) If a vector length ``N`` is enabled, then, when KVM is enabled, all smaller, host supported vector lengths must also be enabled. If KVM is not enabled, then only all the smaller, power-of-two vector lengths must be enabled. E.g. with KVM if the host supports all - vector lengths up to 512-bits (128, 256, 384, 512), then if `sve512` + vector lengths up to 512-bits (128, 256, 384, 512), then if ``sve512`` is enabled, the 128-bit vector length, 256-bit vector length, and 384-bit vector length must also be enabled. Without KVM, the 384-bit vector length would not be required. 3) If KVM is enabled then only vector lengths that the host CPU type support may be enabled. If SVE is not supported by the host, then - no `sve*` properties may be enabled. + no ``sve*`` properties may be enabled. SVE CPU Property Parsing Semantics ---------------------------------- - 1) If SVE is disabled (`sve=off`), then which SVE vector lengths + 1) If SVE is disabled (``sve=off``), then which SVE vector lengths are enabled or disabled is irrelevant to the guest, as the entire SVE feature is disabled and that disables all vector lengths for - the guest. However QEMU will still track any `sve` CPU - properties provided by the user. If later an `sve=on` is provided, - then the guest will get only the enabled lengths. If no `sve=on` + the guest. However QEMU will still track any ``sve`` CPU + properties provided by the user. If later an ``sve=on`` is provided, + then the guest will get only the enabled lengths. If no ``sve=on`` is provided and there are explicitly enabled vector lengths, then an error is generated. - 2) If SVE is enabled (`sve=on`), but no `sve` CPU properties are + 2) If SVE is enabled (``sve=on``), but no ``sve`` CPU properties are provided, then all supported vector lengths are enabled, which when KVM is not in use means including the non-power-of-two lengths, and, when KVM is in use, it means all vector lengths supported by the host @@ -293,7 +293,7 @@ SVE CPU Property Parsing Semantics constraint (2) of "SVE CPU Property Dependencies and Constraints"). 5) When KVM is enabled, if the host does not support SVE, then an error - is generated when attempting to enable any `sve*` properties (see + is generated when attempting to enable any ``sve*`` properties (see constraint (3) of "SVE CPU Property Dependencies and Constraints"). 6) When KVM is enabled, if the host does support SVE, then an error is @@ -301,8 +301,8 @@ SVE CPU Property Parsing Semantics by the host (see constraint (3) of "SVE CPU Property Dependencies and Constraints"). - 7) If one or more `sve` CPU properties are set `off`, but no `sve`, - CPU properties are set `on`, then the specified vector lengths are + 7) If one or more ``sve`` CPU properties are set ``off``, but no ``sve``, + CPU properties are set ``on``, then the specified vector lengths are disabled but the default for any unspecified lengths remains enabled. When KVM is not enabled, disabling a power-of-two vector length also disables all vector lengths larger than the power-of-two length. @@ -310,15 +310,15 @@ SVE CPU Property Parsing Semantics disables all larger vector lengths (see constraint (2) of "SVE CPU Property Dependencies and Constraints"). - 8) If one or more `sve` CPU properties are set to `on`, then they + 8) If one or more ``sve`` CPU properties are set to ``on``, then they are enabled and all unspecified lengths default to disabled, except for the required lengths per constraint (2) of "SVE CPU Property Dependencies and Constraints", which will even be auto-enabled if they were not explicitly enabled. - 9) If SVE was disabled (`sve=off`), allowing all vector lengths to be + 9) If SVE was disabled (``sve=off``), allowing all vector lengths to be explicitly disabled (i.e. avoiding the error specified in (3) of - "SVE CPU Property Parsing Semantics"), then if later an `sve=on` is + "SVE CPU Property Parsing Semantics"), then if later an ``sve=on`` is provided an error will be generated. To avoid this error, one must enable at least one vector length prior to enabling SVE. @@ -329,12 +329,12 @@ SVE CPU Property Examples $ qemu-system-aarch64 -M virt -cpu max,sve=off - 2) Implicitly enable all vector lengths for the `max` CPU type:: + 2) Implicitly enable all vector lengths for the ``max`` CPU type:: $ qemu-system-aarch64 -M virt -cpu max 3) When KVM is enabled, implicitly enable all host CPU supported vector - lengths with the `host` CPU type:: + lengths with the ``host`` CPU type:: $ qemu-system-aarch64 -M virt,accel=kvm -cpu host @@ -376,3 +376,18 @@ verbose command lines. However, the recommended way to select vector lengths is to explicitly enable each desired length. Therefore only example's (1), (4), and (6) exhibit recommended uses of the properties. +SVE User-mode Default Vector Length Property +-------------------------------------------- + +For qemu-aarch64, the cpu property ``sve-default-vector-length=N`` is +defined to mirror the Linux kernel parameter file +``/proc/sys/abi/sve_default_vector_length``. The default length, ``N``, +is in units of bytes and must be between 16 and 8192. +If not specified, the default vector length is 64. + +If the default length is larger than the maximum vector length enabled, +the actual vector length will be reduced. Note that the maximum vector +length supported by QEMU is 256. + +If this property is set to ``-1`` then the default vector length +is set to the maximum possible length. diff --git a/docs/system/arm/cubieboard.rst b/docs/system/arm/cubieboard.rst new file mode 100644 index 00000000000..344ff8cef99 --- /dev/null +++ b/docs/system/arm/cubieboard.rst @@ -0,0 +1,16 @@ +Cubietech Cubieboard (``cubieboard``) +===================================== + +The ``cubieboard`` model emulates the Cubietech Cubieboard, +which is a Cortex-A8 based single-board computer using +the AllWinner A10 SoC. + +Emulated devices: + +- Timer +- UART +- RTC +- EMAC +- SDHCI +- USB controller +- SATA controller diff --git a/docs/system/arm/emcraft-sf2.rst b/docs/system/arm/emcraft-sf2.rst new file mode 100644 index 00000000000..377e2487206 --- /dev/null +++ b/docs/system/arm/emcraft-sf2.rst @@ -0,0 +1,15 @@ +Emcraft SmartFusion2 SOM kit (``emcraft-sf2``) +============================================== + +The ``emcraft-sf2`` board emulates the SmartFusion2 SOM kit from +Emcraft (M2S010). This is a System-on-Module from EmCraft systems, +based on the SmartFusion2 SoC FPGA from Microsemi Corporation. +The SoC is based on a Cortex-M4 processor. + +Emulated devices: + +- System timer +- System registers +- SPI controller +- UART +- EMAC diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst new file mode 100644 index 00000000000..144dc491d95 --- /dev/null +++ b/docs/system/arm/emulation.rst @@ -0,0 +1,103 @@ +A-profile CPU architecture support +================================== + +QEMU's TCG emulation includes support for the Armv5, Armv6, Armv7 and +Armv8 versions of the A-profile architecture. It also has support for +the following architecture extensions: + +- FEAT_AA32BF16 (AArch32 BFloat16 instructions) +- FEAT_AA32HPD (AArch32 hierarchical permission disables) +- FEAT_AA32I8MM (AArch32 Int8 matrix multiplication instructions) +- FEAT_AES (AESD and AESE instructions) +- FEAT_BF16 (AArch64 BFloat16 instructions) +- FEAT_BTI (Branch Target Identification) +- FEAT_DIT (Data Independent Timing instructions) +- FEAT_DPB (DC CVAP instruction) +- FEAT_DotProd (Advanced SIMD dot product instructions) +- FEAT_FCMA (Floating-point complex number instructions) +- FEAT_FHM (Floating-point half-precision multiplication instructions) +- FEAT_FP16 (Half-precision floating-point data processing) +- FEAT_FRINTTS (Floating-point to integer instructions) +- FEAT_FlagM (Flag manipulation instructions v2) +- FEAT_FlagM2 (Enhancements to flag manipulation instructions) +- FEAT_HPDS (Hierarchical permission disables) +- FEAT_I8MM (AArch64 Int8 matrix multiplication instructions) +- FEAT_JSCVT (JavaScript conversion instructions) +- FEAT_LOR (Limited ordering regions) +- FEAT_LRCPC (Load-acquire RCpc instructions) +- FEAT_LRCPC2 (Load-acquire RCpc instructions v2) +- FEAT_LSE (Large System Extensions) +- FEAT_MTE (Memory Tagging Extension) +- FEAT_MTE2 (Memory Tagging Extension) +- FEAT_MTE3 (MTE Asymmetric Fault Handling) +- FEAT_PAN (Privileged access never) +- FEAT_PAN2 (AT S1E1R and AT S1E1W instruction variants affected by PSTATE.PAN) +- FEAT_PAuth (Pointer authentication) +- FEAT_PMULL (PMULL, PMULL2 instructions) +- FEAT_PMUv3p1 (PMU Extensions v3.1) +- FEAT_PMUv3p4 (PMU Extensions v3.4) +- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions) +- FEAT_RNG (Random number generator) +- FEAT_SB (Speculation Barrier) +- FEAT_SEL2 (Secure EL2) +- FEAT_SHA1 (SHA1 instructions) +- FEAT_SHA256 (SHA256 instructions) +- FEAT_SHA3 (Advanced SIMD SHA3 instructions) +- FEAT_SHA512 (Advanced SIMD SHA512 instructions) +- FEAT_SM3 (Advanced SIMD SM3 instructions) +- FEAT_SM4 (Advanced SIMD SM4 instructions) +- FEAT_SPECRES (Speculation restriction instructions) +- FEAT_SSBS (Speculative Store Bypass Safe) +- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain) +- FEAT_TLBIRANGE (TLB invalidate range instructions) +- FEAT_TTCNP (Translation table Common not private translations) +- FEAT_TTST (Small translation tables) +- FEAT_UAO (Unprivileged Access Override control) +- FEAT_VHE (Virtualization Host Extensions) +- FEAT_VMID16 (16-bit VMID) +- FEAT_XNX (Translation table stage 2 Unprivileged Execute-never) +- SVE (The Scalable Vector Extension) +- SVE2 (The Scalable Vector Extension v2) + +For information on the specifics of these extensions, please refer +to the `Armv8-A Arm Architecture Reference Manual +`_. + +When a specific named CPU is being emulated, only those features which +are present in hardware for that CPU are emulated. (If a feature is +not in the list above then it is not supported, even if the real +hardware should have it.) The ``max`` CPU enables all features. + +R-profile CPU architecture support +================================== + +QEMU's TCG emulation support for R-profile CPUs is currently limited. +We emulate only the Cortex-R5 and Cortex-R5F CPUs. + +M-profile CPU architecture support +================================== + +QEMU's TCG emulation includes support for Armv6-M, Armv7-M, Armv8-M, and +Armv8.1-M versions of the M-profile architucture. It also has support +for the following architecture extensions: + +- FP (Floating-point Extension) +- FPCXT (FPCXT access instructions) +- HP (Half-precision floating-point instructions) +- LOB (Low Overhead loops and Branch future) +- M (Main Extension) +- MPU (Memory Protection Unit Extension) +- PXN (Privileged Execute Never) +- RAS (Reliability, Serviceability and Availability): "minimum RAS Extension" only +- S (Security Extension) +- ST (System Timer Extension) + +For information on the specifics of these extensions, please refer +to the `Armv8-M Arm Architecture Reference Manual +`_. + +When a specific named CPU is being emulated, only those features which +are present in hardware for that CPU are emulated. (If a feature is +not in the list above then it is not supported, even if the real +hardware should have it.) There is no equivalent of the ``max`` CPU for +M-profile. diff --git a/docs/system/arm/highbank.rst b/docs/system/arm/highbank.rst new file mode 100644 index 00000000000..bb4965b367f --- /dev/null +++ b/docs/system/arm/highbank.rst @@ -0,0 +1,19 @@ +Calxeda Highbank and Midway (``highbank``, ``midway``) +====================================================== + +``highbank`` is a model of the Calxeda Highbank (ECX-1000) system, +which has four Cortex-A9 cores. + +``midway`` is a model of the Calxeda Midway (ECX-2000) system, +which has four Cortex-A15 cores. + +Emulated devices: + +- L2x0 cache controller +- SP804 dual timer +- PL011 UART +- PL061 GPIOs +- PL031 RTC +- PL022 synchronous serial port controller +- AHCI +- XGMAC ethernet controllers diff --git a/docs/system/arm/imx25-pdk.rst b/docs/system/arm/imx25-pdk.rst new file mode 100644 index 00000000000..2a9711e8a79 --- /dev/null +++ b/docs/system/arm/imx25-pdk.rst @@ -0,0 +1,19 @@ +NXP i.MX25 PDK board (``imx25-pdk``) +==================================== + +The ``imx25-pdk`` board emulates the NXP i.MX25 Product Development Kit +board, which is based on an i.MX25 SoC which uses an ARM926 CPU. + +Emulated devices: + +- SD controller +- AVIC +- CCM +- GPT +- EPIT timers +- FEC +- RNGC +- I2C +- GPIO controllers +- Watchdog timer +- USB controllers diff --git a/docs/system/arm/kzm.rst b/docs/system/arm/kzm.rst new file mode 100644 index 00000000000..bb018fbdf7c --- /dev/null +++ b/docs/system/arm/kzm.rst @@ -0,0 +1,18 @@ +Kyoto Microcomputer KZM-ARM11-01 (``kzm``) +========================================== + +The ``kzm`` board emulates the Kyoto Microcomputer KZM-ARM11-01 +evaluation board, which is based on an NXP i.MX32 SoC +which uses an ARM1136 CPU. + +Emulated devices: + +- UARTs +- LAN9118 ethernet +- AVIC +- CCM +- GPT +- EPIT timers +- I2C +- GPIO controllers +- Watchdog timer diff --git a/docs/system/arm/mainstone.rst b/docs/system/arm/mainstone.rst new file mode 100644 index 00000000000..05310f42c7f --- /dev/null +++ b/docs/system/arm/mainstone.rst @@ -0,0 +1,25 @@ +Intel Mainstone II board (``mainstone``) +======================================== + +The ``mainstone`` board emulates the Intel Mainstone II development +board, which uses a PXA270 CPU. + +Emulated devices: + +- Flash memory +- Keypad +- MMC controller +- 91C111 ethernet +- PIC +- Timer +- DMA +- GPIO +- FIR +- Serial +- LCD controller +- SSP +- USB controller +- RTC +- PCMCIA +- I2C +- I2S diff --git a/docs/system/arm/mps2.rst b/docs/system/arm/mps2.rst index f83b1517871..8a75beb3a08 100644 --- a/docs/system/arm/mps2.rst +++ b/docs/system/arm/mps2.rst @@ -45,3 +45,13 @@ Differences between QEMU and real hardware: flash, but only as simple ROM, so attempting to rewrite the flash from the guest will fail - QEMU does not model the USB controller in MPS3 boards + +Machine-specific options +"""""""""""""""""""""""" + +The following machine-specific options are supported: + +remap + Supported for ``mps3-an524`` only. + Set ``BRAM``/``QSPI`` to select the initial memory mapping. The + default is ``BRAM``. diff --git a/docs/system/arm/nrf.rst b/docs/system/arm/nrf.rst new file mode 100644 index 00000000000..eda87bd7602 --- /dev/null +++ b/docs/system/arm/nrf.rst @@ -0,0 +1,51 @@ +Nordic nRF boards (``microbit``) +================================ + +The `Nordic nRF`_ chips are a family of ARM-based System-on-Chip that +are designed to be used for low-power and short-range wireless solutions. + +.. _Nordic nRF: https://www.nordicsemi.com/Products + +The nRF51 series is the first series for short range wireless applications. +It is superseded by the nRF52 series. +The following machines are based on this chip : + +- ``microbit`` BBC micro:bit board with nRF51822 SoC + +There are other series such as nRF52, nRF53 and nRF91 which are currently not +supported by QEMU. + +Supported devices +----------------- + + * ARM Cortex-M0 (ARMv6-M) + * Serial ports (UART) + * Clock controller + * Timers + * Random Number Generator (RNG) + * GPIO controller + * NVMC + * SWI + +Missing devices +--------------- + + * Watchdog + * Real-Time Clock (RTC) controller + * TWI (i2c) + * SPI controller + * Analog to Digital Converter (ADC) + * Quadrature decoder + * Radio + +Boot options +------------ + +The Micro:bit machine can be started using the ``-device`` option to load a +firmware in `ihex format`_. Example: + +.. _ihex format: https://en.wikipedia.org/wiki/Intel_HEX + +.. code-block:: bash + + $ qemu-system-arm -M microbit -device loader,file=test.hex diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst index d3cf2d9cd7e..adf497e6791 100644 --- a/docs/system/arm/nuvoton.rst +++ b/docs/system/arm/nuvoton.rst @@ -1,24 +1,26 @@ -Nuvoton iBMC boards (``npcm750-evb``, ``quanta-gsj``) -===================================================== +Nuvoton iBMC boards (``*-bmc``, ``npcm750-evb``, ``quanta-gsj``) +================================================================ The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are designed to be used as Baseboard Management Controllers (BMCs) in various -servers. They all feature one or two ARM Cortex A9 CPU cores, as well as an +servers. They all feature one or two ARM Cortex-A9 CPU cores, as well as an assortment of peripherals targeted for either Enterprise or Data Center / Hyperscale applications. The former is a superset of the latter, so NPCM750 has all the peripherals of NPCM730 and more. .. _Nuvoton iBMC: https://www.nuvoton.com/products/cloud-computing/ibmc/ -The NPCM750 SoC has two Cortex A9 cores and is targeted for the Enterprise +The NPCM750 SoC has two Cortex-A9 cores and is targeted for the Enterprise segment. The following machines are based on this chip : - ``npcm750-evb`` Nuvoton NPCM750 Evaluation board -The NPCM730 SoC has two Cortex A9 cores and is targeted for Data Center and +The NPCM730 SoC has two Cortex-A9 cores and is targeted for Data Center and Hyperscale applications. The following machines are based on this chip : +- ``quanta-gbs-bmc`` Quanta GBS server BMC - ``quanta-gsj`` Quanta GSJ server BMC +- ``kudo-bmc`` Fii USA Kudo server BMC There are also two more SoCs, NPCM710 and NPCM705, which are single-core variants of NPCM750 and NPCM730, respectively. These are currently not @@ -78,7 +80,7 @@ Boot options ------------ The Nuvoton machines can boot from an OpenBMC firmware image, or directly into -a kernel using the ``-kernel`` option. OpenBMC images for `quanta-gsj` and +a kernel using the ``-kernel`` option. OpenBMC images for ``quanta-gsj`` and possibly others can be downloaded from the OpenPOWER jenkins : https://openpower.xyz/ diff --git a/docs/system/arm/orangepi.rst b/docs/system/arm/orangepi.rst index 6f23907fb69..83c7445197b 100644 --- a/docs/system/arm/orangepi.rst +++ b/docs/system/arm/orangepi.rst @@ -128,7 +128,7 @@ Alternatively, you can also choose to build you own image with buildroot using the orangepi_pc_defconfig. Also see https://buildroot.org for more information. When using an image as an SD card, it must be resized to a power of two. This can be -done with the qemu-img command. It is recommended to only increase the image size +done with the ``qemu-img`` command. It is recommended to only increase the image size instead of shrinking it to a power of two, to avoid loss of data. For example, to prepare a downloaded Armbian image, first extract it and then increase its size to one gigabyte as follows: @@ -250,14 +250,14 @@ and set the following environment variables before booting: Optionally you may save the environment variables to SD card with 'saveenv'. To continue booting simply give the 'boot' command and NetBSD boots. -Orange Pi PC acceptance tests -""""""""""""""""""""""""""""" +Orange Pi PC integration tests +"""""""""""""""""""""""""""""" -The Orange Pi PC machine has several acceptance tests included. +The Orange Pi PC machine has several integration tests included. To run the whole set of tests, build QEMU from source and simply provide the following command: .. code-block:: bash $ AVOCADO_ALLOW_LARGE_STORAGE=yes avocado --show=app,console run \ - -t machine:orangepi-pc tests/acceptance/boot_linux_console.py + -t machine:orangepi-pc tests/avocado/boot_linux_console.py diff --git a/docs/system/arm/sabrelite.rst b/docs/system/arm/sabrelite.rst index 71713310e3a..4ccb0560afe 100644 --- a/docs/system/arm/sabrelite.rst +++ b/docs/system/arm/sabrelite.rst @@ -10,7 +10,7 @@ Supported devices The SABRE Lite machine supports the following devices: - * Up to 4 Cortex A9 cores + * Up to 4 Cortex-A9 cores * Generic Interrupt Controller * 1 Clock Controller Module * 1 System Reset Controller diff --git a/docs/system/arm/sbsa.rst b/docs/system/arm/sbsa.rst index b8ecfdb62fd..b499d7e9272 100644 --- a/docs/system/arm/sbsa.rst +++ b/docs/system/arm/sbsa.rst @@ -1,10 +1,10 @@ Arm Server Base System Architecture Reference board (``sbsa-ref``) ================================================================== -While the `virt` board is a generic board platform that doesn't match -any real hardware the `sbsa-ref` board intends to look like real +While the ``virt`` board is a generic board platform that doesn't match +any real hardware the ``sbsa-ref`` board intends to look like real hardware. The `Server Base System Architecture -` defines a +`_ defines a minimum base line of hardware support and importantly how the firmware reports that to any operating system. It is a static system that reports a very minimal DT to the firmware for non-discoverable diff --git a/docs/system/arm/stm32.rst b/docs/system/arm/stm32.rst new file mode 100644 index 00000000000..508b92cf862 --- /dev/null +++ b/docs/system/arm/stm32.rst @@ -0,0 +1,66 @@ +STMicroelectronics STM32 boards (``netduino2``, ``netduinoplus2``, ``stm32vldiscovery``) +======================================================================================== + +The `STM32`_ chips are a family of 32-bit ARM-based microcontroller by +STMicroelectronics. + +.. _STM32: https://www.st.com/en/microcontrollers-microprocessors/stm32-32-bit-arm-cortex-mcus.html + +The STM32F1 series is based on ARM Cortex-M3 core. The following machines are +based on this chip : + +- ``stm32vldiscovery`` STM32VLDISCOVERY board with STM32F100RBT6 microcontroller + +The STM32F2 series is based on ARM Cortex-M3 core. The following machines are +based on this chip : + +- ``netduino2`` Netduino 2 board with STM32F205RFT6 microcontroller + +The STM32F4 series is based on ARM Cortex-M4F core. This series is pin-to-pin +compatible with STM32F2 series. The following machines are based on this chip : + +- ``netduinoplus2`` Netduino Plus 2 board with STM32F405RGT6 microcontroller + +There are many other STM32 series that are currently not supported by QEMU. + +Supported devices +----------------- + + * ARM Cortex-M3, Cortex M4F + * Analog to Digital Converter (ADC) + * EXTI interrupt + * Serial ports (USART) + * SPI controller + * System configuration (SYSCFG) + * Timer controller (TIMER) + +Missing devices +--------------- + + * Camera interface (DCMI) + * Controller Area Network (CAN) + * Cycle Redundancy Check (CRC) calculation unit + * Digital to Analog Converter (DAC) + * DMA controller + * Ethernet controller + * Flash Interface Unit + * GPIO controller + * I2C controller + * Inter-Integrated Sound (I2S) controller + * Power supply configuration (PWR) + * Random Number Generator (RNG) + * Real-Time Clock (RTC) controller + * Reset and Clock Controller (RCC) + * Secure Digital Input/Output (SDIO) interface + * USB OTG + * Watchdog controller (IWDG, WWDG) + +Boot options +------------ + +The STM32 machines can be started using the ``-kernel`` option to load a +firmware. Example: + +.. code-block:: bash + + $ qemu-system-arm -M stm32vldiscovery -kernel firmware.bin diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst index 27652adfae1..850787495be 100644 --- a/docs/system/arm/virt.rst +++ b/docs/system/arm/virt.rst @@ -1,7 +1,7 @@ 'virt' generic virtual platform (``virt``) ========================================== -The `virt` board is a platform which does not correspond to any +The ``virt`` board is a platform which does not correspond to any real hardware; it is designed for use in virtual machines. It is the recommended board type if you simply want to run a guest such as Linux and do not care about reproducing the @@ -55,6 +55,7 @@ Supported guest CPU types: - ``cortex-a53`` (64-bit) - ``cortex-a57`` (64-bit) - ``cortex-a72`` (64-bit) +- ``a64fx`` (64-bit) - ``host`` (with KVM only) - ``max`` (same as ``host`` for KVM; best possible emulation with TCG) diff --git a/docs/system/arm/xlnx-versal-virt.rst b/docs/system/arm/xlnx-versal-virt.rst index 27f73500d95..92ad10d2da4 100644 --- a/docs/system/arm/xlnx-versal-virt.rst +++ b/docs/system/arm/xlnx-versal-virt.rst @@ -32,6 +32,8 @@ Implemented devices: - OCM (256KB of On Chip Memory) - XRAM (4MB of on chip Accelerator RAM) - DDR memory +- BBRAM (36 bytes of Battery-backed RAM) +- eFUSE (3072 bytes of one-time field-programmable bit array) QEMU does not yet model any other devices, including the PL and the AI Engine. @@ -175,3 +177,50 @@ Run the following at the U-Boot prompt: fdt set /chosen/dom0 reg <0x00000000 0x40000000 0x0 0x03100000> booti 30000000 - 20000000 +BBRAM File Backend +"""""""""""""""""" +BBRAM can have an optional file backend, which must be a seekable +binary file with a size of 36 bytes or larger. A file with all +binary 0s is a 'blank'. + +To add a file-backend for the BBRAM: + +.. code-block:: bash + + -drive if=pflash,index=0,file=versal-bbram.bin,format=raw + +To use a different index value, N, from default of 0, add: + +.. code-block:: bash + + -global xlnx,bbram-ctrl.drive-index=N + +eFUSE File Backend +"""""""""""""""""" +eFUSE can have an optional file backend, which must be a seekable +binary file with a size of 3072 bytes or larger. A file with all +binary 0s is a 'blank'. + +To add a file-backend for the eFUSE: + +.. code-block:: bash + + -drive if=pflash,index=1,file=versal-efuse.bin,format=raw + +To use a different index value, N, from default of 1, add: + +.. code-block:: bash + + -global xlnx,efuse.drive-index=N + +.. warning:: + In actual physical Versal, BBRAM and eFUSE contain sensitive data. + The QEMU device models do **not** encrypt nor obfuscate any data + when holding them in models' memory or when writing them to their + file backends. + + Thus, a file backend should be used with caution, and 'format=luks' + is highly recommended (albeit with usage complexity). + + Better yet, do not use actual product data when running guest image + on this Xilinx Versal Virt board. diff --git a/docs/system/authz.rst b/docs/system/authz.rst new file mode 100644 index 00000000000..55b7315e496 --- /dev/null +++ b/docs/system/authz.rst @@ -0,0 +1,257 @@ +.. _client authorization: + +Client authorization +-------------------- + +When configuring a QEMU network backend with either TLS certificates or SASL +authentication, access will be granted if the client successfully proves +their identity. If the authorization identity database is scoped to the QEMU +client this may be sufficient. It is common, however, for the identity database +to be much broader and thus authentication alone does not enable sufficient +access control. In this case QEMU provides a flexible system for enforcing +finer grained authorization on clients post-authentication. + +Identity providers +~~~~~~~~~~~~~~~~~~ + +At the time of writing there are two authentication frameworks used by QEMU +that emit an identity upon completion. + + * TLS x509 certificate distinguished name. + + When configuring the QEMU backend as a network server with TLS, there + are a choice of credentials to use. The most common scenario is to utilize + x509 certificates. The simplest configuration only involves issuing + certificates to the servers, allowing the client to avoid a MITM attack + against their intended server. + + It is possible, however, to enable mutual verification by requiring that + the client provide a certificate to the server to prove its own identity. + This is done by setting the property ``verify-peer=yes`` on the + ``tls-creds-x509`` object, which is in fact the default. + + When peer verification is enabled, client will need to be issued with a + certificate by the same certificate authority as the server. If this is + still not sufficiently strong access control the Distinguished Name of + the certificate can be used as an identity in the QEMU authorization + framework. + + * SASL username. + + When configuring the QEMU backend as a network server with SASL, upon + completion of the SASL authentication mechanism, a username will be + provided. The format of this username will vary depending on the choice + of mechanism configured for SASL. It might be a simple UNIX style user + ``joebloggs``, while if using Kerberos/GSSAPI it can have a realm + attached ``joebloggs@QEMU.ORG``. Whatever format the username is presented + in, it can be used with the QEMU authorization framework. + +Authorization drivers +~~~~~~~~~~~~~~~~~~~~~ + +The QEMU authorization framework is a general purpose design with choice of +user customizable drivers. These are provided as objects that can be +created at startup using the ``-object`` argument, or at runtime using the +``object_add`` monitor command. + +Simple +^^^^^^ + +This authorization driver provides a simple mechanism for granting access +based on an exact match against a single identity. This is useful when it is +known that only a single client is to be allowed access. + +A possible use case would be when configuring QEMU for an incoming live +migration. It is known exactly which source QEMU the migration is expected +to arrive from. The x509 certificate associated with this source QEMU would +thus be used as the identity to match against. Alternatively if the virtual +machine is dedicated to a specific tenant, then the VNC server would be +configured with SASL and the username of only that tenant listed. + +To create an instance of this driver via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-simple", + "id": "authz0", + "identity": "fred" + } + } + + +Or via the command line + +:: + + -object authz-simple,id=authz0,identity=fred + + +List +^^^^ + +In some network backends it will be desirable to grant access to a range of +clients. This authorization driver provides a list mechanism for granting +access by matching identities against a list of permitted one. Each match +rule has an associated policy and a catch all policy applies if no rule +matches. The match can either be done as an exact string comparison, or can +use the shell-like glob syntax, which allows for use of wildcards. + +To create an instance of this class via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-list", + "id": "authz0", + "rules": [ + { "match": "fred", "policy": "allow", "format": "exact" }, + { "match": "bob", "policy": "allow", "format": "exact" }, + { "match": "danb", "policy": "deny", "format": "exact" }, + { "match": "dan*", "policy": "allow", "format": "glob" } + ], + "policy": "deny" + } + } + + +Due to the way this driver requires setting nested properties, creating +it on the command line will require use of the JSON syntax for ``-object``. +In most cases, however, the next driver will be more suitable. + +List file +^^^^^^^^^ + +This is a variant on the previous driver that allows for a more dynamic +access control policy by storing the match rules in a standalone file +that can be reloaded automatically upon change. + +To create an instance of this class via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-list-file", + "id": "authz0", + "filename": "/etc/qemu/myvm-vnc.acl", + "refresh": true + } + } + + +If ``refresh`` is ``yes``, inotify is used to monitor for changes +to the file and auto-reload the rules. + +The ``myvm-vnc.acl`` file should contain the match rules in a format that +closely matches the previous driver: + +:: + + { + "rules": [ + { "match": "fred", "policy": "allow", "format": "exact" }, + { "match": "bob", "policy": "allow", "format": "exact" }, + { "match": "danb", "policy": "deny", "format": "exact" }, + { "match": "dan*", "policy": "allow", "format": "glob" } + ], + "policy": "deny" + } + + +The object can be created on the command line using + +:: + + -object authz-list-file,id=authz0,\ + filename=/etc/qemu/myvm-vnc.acl,refresh=on + + +PAM +^^^ + +In some scenarios it might be desirable to integrate with authorization +mechanisms that are implemented outside of QEMU. In order to allow maximum +flexibility, QEMU provides a driver that uses the ``PAM`` framework. + +To create an instance of this class via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-pam", + "id": "authz0", + "parameters": { + "service": "qemu-vnc-tls" + } + } + } + + +The driver only uses the PAM "account" verification +subsystem. The above config would require a config +file /etc/pam.d/qemu-vnc-tls. For a simple file +lookup it would contain + +:: + + account requisite pam_listfile.so item=user sense=allow \ + file=/etc/qemu/vnc.allow + + +The external file would then contain a list of usernames. +If x509 cert was being used as the username, a suitable +entry would match the distinguished name: + +:: + + CN=laptop.berrange.com,O=Berrange Home,L=London,ST=London,C=GB + + +On the command line it can be created using + +:: + + -object authz-pam,id=authz0,service=qemu-vnc-tls + + +There are a variety of PAM plugins that can be used which are not illustrated +here, and it is possible to implement brand new plugins using the PAM API. + + +Connecting backends +~~~~~~~~~~~~~~~~~~~ + +The authorization driver is created using the ``-object`` argument and then +needs to be associated with a network service. The authorization driver object +will be given a unique ID that needs to be referenced. + +The property to set in the network service will vary depending on the type of +identity to verify. By convention, any network server backend that uses TLS +will provide ``tls-authz`` property, while any server using SASL will provide +a ``sasl-authz`` property. + +Thus an example using SASL and authorization for the VNC server would look +like: + +:: + + $QEMU --object authz-simple,id=authz0,identity=fred \ + --vnc 0.0.0.0:1,sasl,sasl-authz=authz0 + +While to validate both the x509 certificate and SASL username: + +:: + + echo "CN=laptop.qemu.org,O=QEMU Project,L=London,ST=London,C=GB" >> tls.acl + $QEMU --object authz-simple,id=authz0,identity=fred \ + --object authz-list-file,id=authz1,filename=tls.acl \ + --object tls-creds-x509,id=tls0,dir=/etc/qemu/tls,verify-peer=yes \ + --vnc 0.0.0.0:1,sasl,sasl-authz=auth0,tls-creds=tls0,tls-authz=authz1 diff --git a/docs/system/barrier.rst b/docs/system/barrier.rst new file mode 100644 index 00000000000..155d7d29013 --- /dev/null +++ b/docs/system/barrier.rst @@ -0,0 +1,44 @@ +QEMU Barrier Client +=================== + +Generally, mouse and keyboard are grabbed through the QEMU video +interface emulation. + +But when we want to use a video graphic adapter via a PCI passthrough +there is no way to provide the keyboard and mouse inputs to the VM +except by plugging a second set of mouse and keyboard to the host +or by installing a KVM software in the guest OS. + +The QEMU Barrier client avoids this by implementing directly the Barrier +protocol into QEMU. + +`Barrier `__ +is a KVM (Keyboard-Video-Mouse) software forked from Symless's +synergy 1.9 codebase. + +This protocol is enabled by adding an input-barrier object to QEMU. + +Syntax:: + + input-barrier,id=,name= + [,server=][,port=] + [,x-origin=][,y-origin=] + [,width=][,height=] + +The object can be added on the QEMU command line, for instance with:: + + -object input-barrier,id=barrier0,name=VM-1 + +where VM-1 is the name the display configured in the Barrier server +on the host providing the mouse and the keyboard events. + +by default ```` is ``localhost``, +```` is ``24800``, ```` and ```` are set to ``0``, +```` and ```` to ``1920`` and ``1080``. + +If the Barrier server is stopped QEMU needs to be reconnected manually, +by removing and re-adding the input-barrier object, for instance +with the help of the HMP monitor:: + + (qemu) object_del barrier0 + (qemu) object_add input-barrier,id=barrier0,name=VM-1 diff --git a/docs/system/bootindex.rst b/docs/system/bootindex.rst new file mode 100644 index 00000000000..8b057f812f2 --- /dev/null +++ b/docs/system/bootindex.rst @@ -0,0 +1,76 @@ +Managing device boot order with bootindex properties +==================================================== + +QEMU can tell QEMU-aware guest firmware (like the x86 PC BIOS) +which order it should look for a bootable OS on which devices. +A simple way to set this order is to use the ``-boot order=`` option, +but you can also do this more flexibly, by setting a ``bootindex`` +property on the individual block or net devices you specify +on the QEMU command line. + +The ``bootindex`` properties are used to determine the order in which +firmware will consider devices for booting the guest OS. If the +``bootindex`` property is not set for a device, it gets the lowest +boot priority. There is no particular order in which devices with no +``bootindex`` property set will be considered for booting, but they +will still be bootable. + +Some guest machine types (for instance the s390x machines) do +not support ``-boot order=``; on those machines you must always +use ``bootindex`` properties. + +There is no way to set a ``bootindex`` property if you are using +a short-form option like ``-hda`` or ``-cdrom``, so to use +``bootindex`` properties you will need to expand out those options +into long-form ``-drive`` and ``-device`` option pairs. + +Example +------- + +Let's assume we have a QEMU machine with two NICs (virtio, e1000) and two +disks (IDE, virtio): + +.. parsed-literal:: + + |qemu_system| -drive file=disk1.img,if=none,id=disk1 \\ + -device ide-hd,drive=disk1,bootindex=4 \\ + -drive file=disk2.img,if=none,id=disk2 \\ + -device virtio-blk-pci,drive=disk2,bootindex=3 \\ + -netdev type=user,id=net0 \\ + -device virtio-net-pci,netdev=net0,bootindex=2 \\ + -netdev type=user,id=net1 \\ + -device e1000,netdev=net1,bootindex=1 + +Given the command above, firmware should try to boot from the e1000 NIC +first. If this fails, it should try the virtio NIC next; if this fails +too, it should try the virtio disk, and then the IDE disk. + +Limitations +----------- + +Some firmware has limitations on which devices can be considered for +booting. For instance, the PC BIOS boot specification allows only one +disk to be bootable. If boot from disk fails for some reason, the BIOS +won't retry booting from other disk. It can still try to boot from +floppy or net, though. + +Sometimes, firmware cannot map the device path QEMU wants firmware to +boot from to a boot method. It doesn't happen for devices the firmware +can natively boot from, but if firmware relies on an option ROM for +booting, and the same option ROM is used for booting from more then one +device, the firmware may not be able to ask the option ROM to boot from +a particular device reliably. For instance with the PC BIOS, if a SCSI HBA +has three bootable devices target1, target3, target5 connected to it, +the option ROM will have a boot method for each of them, but it is not +possible to map from boot method back to a specific target. This is a +shortcoming of the PC BIOS boot specification. + +Mixing bootindex and boot order parameters +------------------------------------------ + +Note that it does not make sense to use the bootindex property together +with the ``-boot order=...`` (or ``-boot once=...``) parameter. The guest +firmware implementations normally either support the one or the other, +but not both parameters at the same time. Mixing them will result in +undefined behavior, and thus the guest firmware will likely not boot +from the expected devices. diff --git a/docs/system/build-platforms.rst b/docs/system/build-platforms.rst deleted file mode 100644 index 692323609e7..00000000000 --- a/docs/system/build-platforms.rst +++ /dev/null @@ -1,62 +0,0 @@ -.. _Supported-build-platforms: - -Supported build platforms -========================= - -QEMU aims to support building and executing on multiple host OS -platforms. This appendix outlines which platforms are the major build -targets. These platforms are used as the basis for deciding upon the -minimum required versions of 3rd party software QEMU depends on. The -supported platforms are the targets for automated testing performed by -the project when patches are submitted for review, and tested before and -after merge. - -If a platform is not listed here, it does not imply that QEMU won't -work. If an unlisted platform has comparable software versions to a -listed platform, there is every expectation that it will work. Bug -reports are welcome for problems encountered on unlisted platforms -unless they are clearly older vintage than what is described here. - -Note that when considering software versions shipped in distros as -support targets, QEMU considers only the version number, and assumes the -features in that distro match the upstream release with the same -version. In other words, if a distro backports extra features to the -software in their distro, QEMU upstream code will not add explicit -support for those backports, unless the feature is auto-detectable in a -manner that works for the upstream releases too. - -The `Repology`_ site is a useful resource to identify -currently shipped versions of software in various operating systems, -though it does not cover all distros listed below. - -Linux OS, macOS, FreeBSD, NetBSD, OpenBSD ------------------------------------------ - -The project aims to support the most recent major version at all times. Support -for the previous major version will be dropped 2 years after the new major -version is released or when the vendor itself drops support, whichever comes -first. In this context, third-party efforts to extend the lifetime of a distro -are not considered, even when they are endorsed by the vendor (eg. Debian LTS). - -For the purposes of identifying supported software versions available on Linux, -the project will look at CentOS, Debian, Fedora, openSUSE, RHEL, SLES and -Ubuntu LTS. Other distros will be assumed to ship similar software versions. - -For FreeBSD and OpenBSD, decisions will be made based on the contents of the -respective ports repository, while NetBSD will use the pkgsrc repository. - -For macOS, `HomeBrew`_ will be used, although `MacPorts`_ is expected to carry -similar versions. - -Windows -------- - -The project supports building with current versions of the MinGW toolchain, -hosted on Linux (Debian/Fedora). - -The version of the Windows API that's currently targeted is Vista / Server -2008. - -.. _HomeBrew: https://brew.sh/ -.. _MacPorts: https://www.macports.org/ -.. _Repology: https://repology.org/ diff --git a/docs/system/cpu-hotplug.rst b/docs/system/cpu-hotplug.rst index bd0663616e8..015ce2b6ec3 100644 --- a/docs/system/cpu-hotplug.rst +++ b/docs/system/cpu-hotplug.rst @@ -78,7 +78,7 @@ vCPU hotplug } (QEMU) -(5) Optionally, run QMP `query-cpus-fast` for some details about the +(5) Optionally, run QMP ``query-cpus-fast`` for some details about the vCPUs:: (QEMU) query-cpus-fast diff --git a/docs/system/cpu-models-x86-abi.csv b/docs/system/cpu-models-x86-abi.csv new file mode 100644 index 00000000000..f3f3b60be10 --- /dev/null +++ b/docs/system/cpu-models-x86-abi.csv @@ -0,0 +1,67 @@ +Model,baseline,v2,v3,v4 +486-v1,,,, +Broadwell-v1,✅,✅,✅, +Broadwell-v2,✅,✅,✅, +Broadwell-v3,✅,✅,✅, +Broadwell-v4,✅,✅,✅, +Cascadelake-Server-v1,✅,✅,✅,✅ +Cascadelake-Server-v2,✅,✅,✅,✅ +Cascadelake-Server-v3,✅,✅,✅,✅ +Cascadelake-Server-v4,✅,✅,✅,✅ +Conroe-v1,✅,,, +Cooperlake-v1,✅,✅,✅,✅ +Denverton-v1,✅,✅,, +Denverton-v2,✅,✅,, +Dhyana-v1,✅,✅,✅, +EPYC-Milan-v1,✅,✅,✅, +EPYC-Rome-v1,✅,✅,✅, +EPYC-Rome-v2,✅,✅,✅, +EPYC-v1,✅,✅,✅, +EPYC-v2,✅,✅,✅, +EPYC-v3,✅,✅,✅, +Haswell-v1,✅,✅,✅, +Haswell-v2,✅,✅,✅, +Haswell-v3,✅,✅,✅, +Haswell-v4,✅,✅,✅, +Icelake-Client-v1,✅,✅,✅, +Icelake-Client-v2,✅,✅,✅, +Icelake-Server-v1,✅,✅,✅,✅ +Icelake-Server-v2,✅,✅,✅,✅ +Icelake-Server-v3,✅,✅,✅,✅ +Icelake-Server-v4,✅,✅,✅,✅ +IvyBridge-v1,✅,✅,, +IvyBridge-v2,✅,✅,, +KnightsMill-v1,✅,✅,✅, +Nehalem-v1,✅,✅,, +Nehalem-v2,✅,✅,, +Opteron_G1-v1,✅,,, +Opteron_G2-v1,✅,,, +Opteron_G3-v1,✅,,, +Opteron_G4-v1,✅,✅,, +Opteron_G5-v1,✅,✅,, +Penryn-v1,✅,,, +SandyBridge-v1,✅,✅,, +SandyBridge-v2,✅,✅,, +Skylake-Client-v1,✅,✅,✅, +Skylake-Client-v2,✅,✅,✅, +Skylake-Client-v3,✅,✅,✅, +Skylake-Server-v1,✅,✅,✅,✅ +Skylake-Server-v2,✅,✅,✅,✅ +Skylake-Server-v3,✅,✅,✅,✅ +Skylake-Server-v4,✅,✅,✅,✅ +Snowridge-v1,✅,✅,, +Snowridge-v2,✅,✅,, +Westmere-v1,✅,✅,, +Westmere-v2,✅,✅,, +athlon-v1,,,, +core2duo-v1,✅,,, +coreduo-v1,,,, +kvm32-v1,,,, +kvm64-v1,✅,,, +n270-v1,,,, +pentium-v1,,,, +pentium2-v1,,,, +pentium3-v1,,,, +phenom-v1,✅,,, +qemu32-v1,,,, +qemu64-v1,✅,,, diff --git a/docs/system/cpu-models-x86.rst.inc b/docs/system/cpu-models-x86.rst.inc index 867c8216b5a..7f6368f999b 100644 --- a/docs/system/cpu-models-x86.rst.inc +++ b/docs/system/cpu-models-x86.rst.inc @@ -1,5 +1,5 @@ Recommendations for KVM CPU model configuration on x86 hosts -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +============================================================ The information that follows provides recommendations for configuring CPU models on x86 hosts. The goals are to maximise performance, while @@ -39,6 +39,28 @@ CPU, as they would with "Host passthrough", but gives much of the benefit of passthrough, while making live migration safe. +ABI compatibility levels for CPU models +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The x86_64 architecture has a number of `ABI compatibility levels`_ +defined. Traditionally most operating systems and toolchains would +only target the original baseline ABI. It is expected that in +future OS and toolchains are likely to target newer ABIs. The +table that follows illustrates which ABI compatibility levels +can be satisfied by the QEMU CPU models. Note that the table only +lists the long term stable CPU model versions (eg Haswell-v4). +In addition to what is listed, there are also many CPU model +aliases which resolve to a different CPU model version, +depending on the machine type is in use. + +.. _ABI compatibility levels: https://gitlab.com/x86-psABIs/x86-64-ABI/ + +.. csv-table:: x86-64 ABI compatibility levels + :file: cpu-models-x86-abi.csv + :widths: 40,15,15,15,15 + :header-rows: 2 + + Preferred CPU models for Intel x86 hosts ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -205,7 +227,7 @@ features are included if using "Host passthrough" or "Host model". Preferred CPU models for AMD x86 hosts ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The following CPU models are preferred for use on Intel hosts. +The following CPU models are preferred for use on AMD hosts. Administrators / applications are recommended to use the CPU model that matches the generation of the host CPUs in use. In a deployment with a mixture of host CPU models between machines, if live migration @@ -346,7 +368,7 @@ featureset, which prevents guests having optimal performance. Syntax for configuring CPU models -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +================================= The examples below illustrate the approach to configuring the various CPU models / features in QEMU and libvirt. diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst deleted file mode 100644 index 80cae862528..00000000000 --- a/docs/system/deprecated.rst +++ /dev/null @@ -1,386 +0,0 @@ -Deprecated features -=================== - -In general features are intended to be supported indefinitely once -introduced into QEMU. In the event that a feature needs to be removed, -it will be listed in this section. The feature will remain functional for the -release in which it was deprecated and one further release. After these two -releases, the feature is liable to be removed. Deprecated features may also -generate warnings on the console when QEMU starts up, or if activated via a -monitor command, however, this is not a mandatory requirement. - -Prior to the 2.10.0 release there was no official policy on how -long features would be deprecated prior to their removal, nor -any documented list of which features were deprecated. Thus -any features deprecated prior to 2.10.0 will be treated as if -they were first deprecated in the 2.10.0 release. - -What follows is a list of all features currently marked as -deprecated. - -System emulator command line arguments --------------------------------------- - -``QEMU_AUDIO_`` environment variables and ``-audio-help`` (since 4.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``-audiodev`` argument is now the preferred way to specify audio -backend settings instead of environment variables. To ease migration to -the new format, the ``-audiodev-help`` option can be used to convert -the current values of the environment variables to ``-audiodev`` options. - -Creating sound card devices and vnc without ``audiodev=`` property (since 4.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -When not using the deprecated legacy audio config, each sound card -should specify an ``audiodev=`` property. Additionally, when using -vnc, you should specify an ``audiodev=`` property if you plan to -transmit audio through the VNC protocol. - -Creating sound card devices using ``-soundhw`` (since 5.1) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Sound card devices should be created using ``-device`` instead. The -names are the same for most devices. The exceptions are ``hda`` which -needs two devices (``-device intel-hda -device hda-duplex``) and -``pcspk`` which can be activated using ``-machine -pcspk-audiodev=``. - -``-chardev`` backend aliases ``tty`` and ``parport`` (since 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -``tty`` and ``parport`` are aliases that will be removed. Instead, the -actual backend names ``serial`` and ``parallel`` should be used. - -RISC-V ``-bios`` (since 5.1) -'''''''''''''''''''''''''''' - -QEMU 4.1 introduced support for the -bios option in QEMU for RISC-V for the -RISC-V virt machine and sifive_u machine. QEMU 4.1 had no changes to the -default behaviour to avoid breakages. - -QEMU 5.1 changes the default behaviour from ``-bios none`` to ``-bios default``. - -QEMU 5.1 has three options: - 1. ``-bios default`` - This is the current default behavior if no -bios option - is included. This option will load the default OpenSBI firmware automatically. - The firmware is included with the QEMU release and no user interaction is - required. All a user needs to do is specify the kernel they want to boot - with the -kernel option - 2. ``-bios none`` - QEMU will not automatically load any firmware. It is up - to the user to load all the images they need. - 3. ``-bios `` - Tells QEMU to load the specified file as the firmwrae. - -Short-form boolean options (since 6.0) -'''''''''''''''''''''''''''''''''''''' - -Boolean options such as ``share=on``/``share=off`` could be written -in short form as ``share`` and ``noshare``. This is now deprecated -and will cause a warning. - -``delay`` option for socket character devices (since 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The replacement for the ``nodelay`` short-form boolean option is ``nodelay=on`` -rather than ``delay=off``. - -``--enable-fips`` (since 6.0) -''''''''''''''''''''''''''''' - -This option restricts usage of certain cryptographic algorithms when -the host is operating in FIPS mode. - -If FIPS compliance is required, QEMU should be built with the ``libgcrypt`` -library enabled as a cryptography provider. - -Neither the ``nettle`` library, or the built-in cryptography provider are -supported on FIPS enabled hosts. - -``-writeconfig`` (since 6.0) -''''''''''''''''''''''''''''' - -The ``-writeconfig`` option is not able to serialize the entire contents -of the QEMU command line. It is thus considered a failed experiment -and deprecated, with no current replacement. - -Userspace local APIC with KVM (x86, since 6.0) -'''''''''''''''''''''''''''''''''''''''''''''' - -Using ``-M kernel-irqchip=off`` with x86 machine types that include a local -APIC is deprecated. The ``split`` setting is supported, as is using -``-M kernel-irqchip=off`` with the ISA PC machine type. - -hexadecimal sizes with scaling multipliers (since 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Input parameters that take a size value should only use a size suffix -(such as 'k' or 'M') when the base is written in decimal, and not when -the value is hexadecimal. That is, '0x20M' is deprecated, and should -be written either as '32M' or as '0x2000000'. - -``-spice password=string`` (since 6.0) -'''''''''''''''''''''''''''''''''''''' - -This option is insecure because the SPICE password remains visible in -the process listing. This is replaced by the new ``password-secret`` -option which lets the password be securely provided on the command -line using a ``secret`` object instance. - -``opened`` property of ``rng-*`` objects (since 6.0.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The only effect of specifying ``opened=on`` in the command line or QMP -``object-add`` is that the device is opened immediately, possibly before all -other options have been processed. This will either have no effect (if -``opened`` was the last option) or cause errors. The property is therefore -useless and should not be specified. - -``loaded`` property of ``secret`` and ``secret_keyring`` objects (since 6.0.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The only effect of specifying ``loaded=on`` in the command line or QMP -``object-add`` is that the secret is loaded immediately, possibly before all -other options have been processed. This will either have no effect (if -``loaded`` was the last option) or cause options to be effectively ignored as -if they were not given. The property is therefore useless and should not be -specified. - - -QEMU Machine Protocol (QMP) commands ------------------------------------- - -``blockdev-open-tray``, ``blockdev-close-tray`` argument ``device`` (since 2.8.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use argument ``id`` instead. - -``eject`` argument ``device`` (since 2.8.0) -''''''''''''''''''''''''''''''''''''''''''' - -Use argument ``id`` instead. - -``blockdev-change-medium`` argument ``device`` (since 2.8.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use argument ``id`` instead. - -``block_set_io_throttle`` argument ``device`` (since 2.8.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use argument ``id`` instead. - -``blockdev-add`` empty string argument ``backing`` (since 2.10.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use argument value ``null`` instead. - -``block-commit`` arguments ``base`` and ``top`` (since 3.1.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use arguments ``base-node`` and ``top-node`` instead. - -``nbd-server-add`` and ``nbd-server-remove`` (since 5.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use the more generic commands ``block-export-add`` and ``block-export-del`` -instead. As part of this deprecation, where ``nbd-server-add`` used a -single ``bitmap``, the new ``block-export-add`` uses a list of ``bitmaps``. - -System accelerators -------------------- - -MIPS ``Trap-and-Emul`` KVM support (since 6.0) -'''''''''''''''''''''''''''''''''''''''''''''' - -The MIPS ``Trap-and-Emul`` KVM host and guest support has been removed -from Linux upstream kernel, declare it deprecated. - -System emulator CPUS --------------------- - -``moxie`` CPU (since 5.2.0) -''''''''''''''''''''''''''' - -The ``moxie`` guest CPU support is deprecated and will be removed in -a future version of QEMU. It's unclear whether anybody is still using -CPU emulation in QEMU, and there are no test images available to make -sure that the code is still working. - -``lm32`` CPUs (since 5.2.0) -''''''''''''''''''''''''''' - -The ``lm32`` guest CPU support is deprecated and will be removed in -a future version of QEMU. The only public user of this architecture -was the milkymist project, which has been dead for years; there was -never an upstream Linux port. - -``unicore32`` CPUs (since 5.2.0) -'''''''''''''''''''''''''''''''' - -The ``unicore32`` guest CPU support is deprecated and will be removed in -a future version of QEMU. Support for this CPU was removed from the -upstream Linux kernel, and there is no available upstream toolchain -to build binaries for it. - -``Icelake-Client`` CPU Model (since 5.2.0) -'''''''''''''''''''''''''''''''''''''''''' - -``Icelake-Client`` CPU Models are deprecated. Use ``Icelake-Server`` CPU -Models instead. - -MIPS ``I7200`` CPU Model (since 5.2) -'''''''''''''''''''''''''''''''''''' - -The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated -(the ISA has never been upstreamed to a compiler toolchain). Therefore -this CPU is also deprecated. - -System emulator machines ------------------------- - -Raspberry Pi ``raspi2`` and ``raspi3`` machines (since 5.2) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The Raspberry Pi machines come in various models (A, A+, B, B+). To be able -to distinguish which model QEMU is implementing, the ``raspi2`` and ``raspi3`` -machines have been renamed ``raspi2b`` and ``raspi3b``. - -Device options --------------- - -Emulated device options -''''''''''''''''''''''' - -``-device virtio-blk,scsi=on|off`` (since 5.0.0) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature. VIRTIO 1.0 -and later do not support it because the virtio-scsi device was introduced for -full SCSI support. Use virtio-scsi instead when SCSI passthrough is required. - -Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an -alias. - -Block device options -'''''''''''''''''''' - -``"backing": ""`` (since 2.12.0) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In order to prevent QEMU from automatically opening an image's backing -chain, use ``"backing": null`` instead. - -``rbd`` keyvalue pair encoded filenames: ``""`` (since 3.1.0) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Options for ``rbd`` should be specified according to its runtime options, -like other block drivers. Legacy parsing of keyvalue pair encoded -filenames is useful to open images with the old format for backing files; -These image files should be updated to use the current format. - -Example of legacy encoding:: - - json:{"file.driver":"rbd", "file.filename":"rbd:rbd/name"} - -The above, converted to the current supported format:: - - json:{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"} - -``sheepdog`` driver (since 5.2.0) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``sheepdog`` block device driver is deprecated. The corresponding upstream -server project is no longer actively maintained. Users are recommended to switch -to an alternative distributed block device driver such as RBD. The -``qemu-img convert`` command can be used to liberate existing data by moving -it out of sheepdog volumes into an alternative storage backend. - -linux-user mode CPUs --------------------- - -``ppc64abi32`` CPUs (since 5.2.0) -''''''''''''''''''''''''''''''''' - -The ``ppc64abi32`` architecture has a number of issues which regularly -trip up our CI testing and is suspected to be quite broken. For that -reason the maintainers strongly suspect no one actually uses it. - -MIPS ``I7200`` CPU (since 5.2) -'''''''''''''''''''''''''''''' - -The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated -(the ISA has never been upstreamed to a compiler toolchain). Therefore -this CPU is also deprecated. - -Related binaries ----------------- - -qemu-img amend to adjust backing file (since 5.1) -''''''''''''''''''''''''''''''''''''''''''''''''' - -The use of ``qemu-img amend`` to modify the name or format of a qcow2 -backing image is deprecated; this functionality was never fully -documented or tested, and interferes with other amend operations that -need access to the original backing image (such as deciding whether a -v3 zero cluster may be left unallocated when converting to a v2 -image). Rather, any changes to the backing chain should be performed -with ``qemu-img rebase -u`` either before or after the remaining -changes being performed by amend, as appropriate. - -qemu-img backing file without format (since 5.1) -'''''''''''''''''''''''''''''''''''''''''''''''' - -The use of ``qemu-img create``, ``qemu-img rebase``, or ``qemu-img -convert`` to create or modify an image that depends on a backing file -now recommends that an explicit backing format be provided. This is -for safety: if QEMU probes a different format than what you thought, -the data presented to the guest will be corrupt; similarly, presenting -a raw image to a guest allows a potential security exploit if a future -probe sees a non-raw image based on guest writes. - -To avoid the warning message, or even future refusal to create an -unsafe image, you must pass ``-o backing_fmt=`` (or the shorthand -``-F`` during create) to specify the intended backing format. You may -use ``qemu-img rebase -u`` to retroactively add a backing format to an -existing image. However, be aware that there are already potential -security risks to blindly using ``qemu-img info`` to probe the format -of an untrusted backing image, when deciding what format to add into -an existing image. - -Backwards compatibility ------------------------ - -Runnability guarantee of CPU models (since 4.1.0) -''''''''''''''''''''''''''''''''''''''''''''''''' - -Previous versions of QEMU never changed existing CPU models in -ways that introduced additional host software or hardware -requirements to the VM. This allowed management software to -safely change the machine type of an existing VM without -introducing new requirements ("runnability guarantee"). This -prevented CPU models from being updated to include CPU -vulnerability mitigations, leaving guests vulnerable in the -default configuration. - -The CPU model runnability guarantee won't apply anymore to -existing CPU models. Management software that needs runnability -guarantees must resolve the CPU model aliases using the -``alias-of`` field returned by the ``query-cpu-definitions`` QMP -command. - -While those guarantees are kept, the return value of -``query-cpu-definitions`` will have existing CPU model aliases -point to a version that doesn't break runnability guarantees -(specifically, version 1 of those CPU models). In future QEMU -versions, aliases will point to newer CPU model versions -depending on the machine type, so management software must -resolve CPU model aliases before starting a virtual machine. - -Guest Emulator ISAs -------------------- - -nanoMIPS ISA -'''''''''''' - -The ``nanoMIPS`` ISA has never been upstreamed to any compiler toolchain. -As it is hard to generate binaries for it, declare it deprecated. diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst new file mode 100644 index 00000000000..19944f526ce --- /dev/null +++ b/docs/system/device-emulation.rst @@ -0,0 +1,91 @@ +.. _device-emulation: + +Device Emulation +---------------- + +QEMU supports the emulation of a large number of devices from +peripherals such network cards and USB devices to integrated systems +on a chip (SoCs). Configuration of these is often a source of +confusion so it helps to have an understanding of some of the terms +used to describes devices within QEMU. + +Common Terms +~~~~~~~~~~~~ + +Device Front End +================ + +A device front end is how a device is presented to the guest. The type +of device presented should match the hardware that the guest operating +system is expecting to see. All devices can be specified with the +``--device`` command line option. Running QEMU with the command line +options ``--device help`` will list all devices it is aware of. Using +the command line ``--device foo,help`` will list the additional +configuration options available for that device. + +A front end is often paired with a back end, which describes how the +host's resources are used in the emulation. + +Device Buses +============ + +Most devices will exist on a BUS of some sort. Depending on the +machine model you choose (``-M foo``) a number of buses will have been +automatically created. In most cases the BUS a device is attached to +can be inferred, for example PCI devices are generally automatically +allocated to the next free address of first PCI bus found. However in +complicated configurations you can explicitly specify what bus +(``bus=ID``) a device is attached to along with its address +(``addr=N``). + +Some devices, for example a PCI SCSI host controller, will add an +additional buses to the system that other devices can be attached to. +A hypothetical chain of devices might look like: + + --device foo,bus=pci.0,addr=0,id=foo + --device bar,bus=foo.0,addr=1,id=baz + +which would be a bar device (with the ID of baz) which is attached to +the first foo bus (foo.0) at address 1. The foo device which provides +that bus is itself is attached to the first PCI bus (pci.0). + + +Device Back End +=============== + +The back end describes how the data from the emulated device will be +processed by QEMU. The configuration of the back end is usually +specific to the class of device being emulated. For example serial +devices will be backed by a ``--chardev`` which can redirect the data +to a file or socket or some other system. Storage devices are handled +by ``--blockdev`` which will specify how blocks are handled, for +example being stored in a qcow2 file or accessing a raw host disk +partition. Back ends can sometimes be stacked to implement features +like snapshots. + +While the choice of back end is generally transparent to the guest, +there are cases where features will not be reported to the guest if +the back end is unable to support it. + +Device Pass Through +=================== + +Device pass through is where the device is actually given access to +the underlying hardware. This can be as simple as exposing a single +USB device on the host system to the guest or dedicating a video card +in a PCI slot to the exclusive use of the guest. + + +Emulated Devices +~~~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 1 + + devices/ivshmem.rst + devices/net.rst + devices/nvme.rst + devices/usb.rst + devices/vhost-user.rst + devices/virtio-pmem.rst + devices/vhost-user-rng.rst diff --git a/docs/system/device-url-syntax.rst.inc b/docs/system/device-url-syntax.rst.inc index 6f6ec8366b7..7dbc525fa80 100644 --- a/docs/system/device-url-syntax.rst.inc +++ b/docs/system/device-url-syntax.rst.inc @@ -15,7 +15,7 @@ These are specified using a special URL syntax. 'iqn.2008-11.org.linux-kvm[:]' but this can also be set from the command line or a configuration file. - Since version Qemu 2.4 it is possible to specify a iSCSI request + Since version QEMU 2.4 it is possible to specify a iSCSI request timeout to detect stalled requests and force a reestablishment of the session. The timeout is specified in seconds. The default is 0 which means no timeout. Libiscsi 1.15.0 or greater is required for this @@ -85,24 +85,6 @@ These are specified using a special URL syntax. Currently authentication must be done using ssh-agent. Other authentication methods may be supported in future. -``Sheepdog`` - Sheepdog is a distributed storage system for QEMU. QEMU supports - using either local sheepdog devices or remote networked devices. - - Syntax for specifying a sheepdog device - - :: - - sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] - - Example - - .. parsed-literal:: - - |qemu_system| --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine - - See also https://sheepdog.github.io/sheepdog/. - ``GlusterFS`` GlusterFS is a user space distributed file system. QEMU supports the use of GlusterFS volumes for hosting VM disk images using TCP, Unix diff --git a/docs/system/ivshmem.rst b/docs/system/devices/ivshmem.rst similarity index 100% rename from docs/system/ivshmem.rst rename to docs/system/devices/ivshmem.rst diff --git a/docs/system/net.rst b/docs/system/devices/net.rst similarity index 100% rename from docs/system/net.rst rename to docs/system/devices/net.rst diff --git a/docs/system/nvme.rst b/docs/system/devices/nvme.rst similarity index 82% rename from docs/system/nvme.rst rename to docs/system/devices/nvme.rst index f7f63d6bf61..b5acb2a9c19 100644 --- a/docs/system/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -70,7 +70,7 @@ namespaces and additional features, the ``nvme-ns`` device must be used. The namespaces defined by the ``nvme-ns`` device will attach to the most recently defined ``nvme-bus`` that is created by the ``nvme`` device. Namespace -identifers are allocated automatically, starting from ``1``. +identifiers are allocated automatically, starting from ``1``. There are a number of parameters available: @@ -81,6 +81,12 @@ There are a number of parameters available: Set the UUID of the namespace. This will be reported as a "Namespace UUID" descriptor in the Namespace Identification Descriptor List. +``eui64`` + Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended + Unique Identifier" descriptor in the Namespace Identification Descriptor List. + Since machine type 6.1 a non-zero default value is used if the parameter + is not provided. For earlier machine types the field defaults to 0. + ``bus`` If there are more ``nvme`` devices defined, this parameter may be used to attach the namespace to a specific ``nvme`` device (identified by an ``id`` @@ -104,28 +110,32 @@ multipath I/O. This will create an NVM subsystem with two controllers. Having controllers linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters: -``shared`` (default: ``off``) +``shared`` (default: ``on`` since 6.2) Specifies that the namespace will be attached to all controllers in the - subsystem. If set to ``off`` (the default), the namespace will remain a - private namespace and may only be attached to a single controller at a time. + subsystem. If set to ``off``, the namespace will remain a private namespace + and may only be attached to a single controller at a time. Shared namespaces + are always automatically attached to all controllers (also when controllers + are hotplugged). ``detached`` (default: ``off``) If set to ``on``, the namespace will be be available in the subsystem, but - not attached to any controllers initially. + not attached to any controllers initially. A shared namespace with this set + to ``on`` will never be automatically attached to controllers. Thus, adding .. code-block:: console -drive file=nvm-1.img,if=none,id=nvm-1 - -device nvme-ns,drive=nvm-1,nsid=1,shared=on + -device nvme-ns,drive=nvm-1,nsid=1 -drive file=nvm-2.img,if=none,id=nvm-2 - -device nvme-ns,drive=nvm-2,nsid=3,detached=on + -device nvme-ns,drive=nvm-2,nsid=3,shared=off,detached=on -will cause NSID 1 will be a shared namespace (due to ``shared=on``) that is -initially attached to both controllers. NSID 3 will be a private namespace -(i.e. only attachable to a single controller at a time) and will not be -attached to any controller initially (due to ``detached=on``). +will cause NSID 1 will be a shared namespace that is initially attached to both +controllers. NSID 3 will be a private namespace due to ``shared=off`` and only +attachable to a single controller at a time. Additionally it will not be +attached to any controller initially (due to ``detached=on``) or to hotplugged +controllers. Optional Features ================= @@ -196,6 +206,12 @@ The namespace may be configured with additional parameters allows all zones to be open. If ``zoned.max_active`` is specified, this value must be less than or equal to that. +``zoned.zasl=UINT8`` (default: ``0``) + Set the maximum data transfer size for the Zone Append command. Like + ``mdts``, the value is specified as a power of two (2^n) and is in units of + the minimum memory page size (CAP.MPSMIN). The default value (``0``) + has this property inherit the ``mdts`` value. + Metadata -------- diff --git a/docs/system/devices/usb.rst b/docs/system/devices/usb.rst new file mode 100644 index 00000000000..afb7d6c2268 --- /dev/null +++ b/docs/system/devices/usb.rst @@ -0,0 +1,351 @@ +.. _pcsys_005fusb: + +USB emulation +------------- + +QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can +plug virtual USB devices or real host USB devices (only works with +certain host operating systems). QEMU will automatically create and +connect virtual USB hubs as necessary to connect multiple USB devices. + +USB controllers +~~~~~~~~~~~~~~~ + +XHCI controller support +^^^^^^^^^^^^^^^^^^^^^^^ + +QEMU has XHCI host adapter support. The XHCI hardware design is much +more virtualization-friendly when compared to EHCI and UHCI, thus XHCI +emulation uses less resources (especially CPU). So if your guest +supports XHCI (which should be the case for any operating system +released around 2010 or later) we recommend using it: + + qemu -device qemu-xhci + +XHCI supports USB 1.1, USB 2.0 and USB 3.0 devices, so this is the +only controller you need. With only a single USB controller (and +therefore only a single USB bus) present in the system there is no +need to use the bus= parameter when adding USB devices. + + +EHCI controller support +^^^^^^^^^^^^^^^^^^^^^^^ + +The QEMU EHCI Adapter supports USB 2.0 devices. It can be used either +standalone or with companion controllers (UHCI, OHCI) for USB 1.1 +devices. The companion controller setup is more convenient to use +because it provides a single USB bus supporting both USB 2.0 and USB +1.1 devices. See next section for details. + +When running EHCI in standalone mode you can add UHCI or OHCI +controllers for USB 1.1 devices too. Each controller creates its own +bus though, so there are two completely separate USB buses: One USB +1.1 bus driven by the UHCI controller and one USB 2.0 bus driven by +the EHCI controller. Devices must be attached to the correct +controller manually. + +The easiest way to add a UHCI controller to a ``pc`` machine is the +``-usb`` switch. QEMU will create the UHCI controller as function of +the PIIX3 chipset. The USB 1.1 bus will carry the name ``usb-bus.0``. + +You can use the standard ``-device`` switch to add a EHCI controller to +your virtual machine. It is strongly recommended to specify an ID for +the controller so the USB 2.0 bus gets an individual name, for example +``-device usb-ehci,id=ehci``. This will give you a USB 2.0 bus named +``ehci.0``. + +When adding USB devices using the ``-device`` switch you can specify the +bus they should be attached to. Here is a complete example: + +.. parsed-literal:: + + |qemu_system| -M pc ${otheroptions} \\ + -drive if=none,id=usbstick,format=raw,file=/path/to/image \\ + -usb \\ + -device usb-ehci,id=ehci \\ + -device usb-tablet,bus=usb-bus.0 \\ + -device usb-storage,bus=ehci.0,drive=usbstick + +This attaches a USB tablet to the UHCI adapter and a USB mass storage +device to the EHCI adapter. + + +Companion controller support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The UHCI and OHCI controllers can attach to a USB bus created by EHCI +as companion controllers. This is done by specifying the ``masterbus`` +and ``firstport`` properties. ``masterbus`` specifies the bus name the +controller should attach to. ``firstport`` specifies the first port the +controller should attach to, which is needed as usually one EHCI +controller with six ports has three UHCI companion controllers with +two ports each. + +There is a config file in docs which will do all this for +you, which you can use like this: + +.. parsed-literal:: + + |qemu_system| -readconfig docs/config/ich9-ehci-uhci.cfg + +Then use ``bus=ehci.0`` to assign your USB devices to that bus. + +Using the ``-usb`` switch for ``q35`` machines will create a similar +USB controller configuration. + + +.. _Connecting USB devices: + +Connecting USB devices +~~~~~~~~~~~~~~~~~~~~~~ + +USB devices can be connected with the ``-device usb-...`` command line +option or the ``device_add`` monitor command. Available devices are: + +``usb-mouse`` + Virtual Mouse. This will override the PS/2 mouse emulation when + activated. + +``usb-tablet`` + Pointer device that uses absolute coordinates (like a touchscreen). + This means QEMU is able to report the mouse position without having + to grab the mouse. Also overrides the PS/2 mouse emulation when + activated. + +``usb-storage,drive=drive_id`` + Mass storage device backed by drive_id (see the :ref:`disk images` + chapter in the System Emulation Users Guide). This is the classic + bulk-only transport protocol used by 99% of USB sticks. This + example shows it connected to an XHCI USB controller and with + a drive backed by a raw format disk image: + + .. parsed-literal:: + + |qemu_system| [...] \\ + -drive if=none,id=stick,format=raw,file=/path/to/file.img \\ + -device nec-usb-xhci,id=xhci \\ + -device usb-storage,bus=xhci.0,drive=stick + +``usb-uas`` + USB attached SCSI device. This does not create a SCSI disk, so + you need to explicitly create a ``scsi-hd`` or ``scsi-cd`` device + on the command line, as well as using the ``-drive`` option to + specify what those disks are backed by. One ``usb-uas`` device can + handle multiple logical units (disks). This example creates three + logical units: two disks and one cdrom drive: + + .. parsed-literal:: + + |qemu_system| [...] \\ + -drive if=none,id=uas-disk1,format=raw,file=/path/to/file1.img \\ + -drive if=none,id=uas-disk2,format=raw,file=/path/to/file2.img \\ + -drive if=none,id=uas-cdrom,media=cdrom,format=raw,file=/path/to/image.iso \\ + -device nec-usb-xhci,id=xhci \\ + -device usb-uas,id=uas,bus=xhci.0 \\ + -device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=uas-disk1 \\ + -device scsi-hd,bus=uas.0,scsi-id=0,lun=1,drive=uas-disk2 \\ + -device scsi-cd,bus=uas.0,scsi-id=0,lun=5,drive=uas-cdrom + +``usb-bot`` + Bulk-only transport storage device. This presents the guest with the + same USB bulk-only transport protocol interface as ``usb-storage``, but + the QEMU command line option works like ``usb-uas`` and does not + automatically create SCSI disks for you. ``usb-bot`` supports up to + 16 LUNs. Unlike ``usb-uas``, the LUN numbers must be continuous, + i.e. for three devices you must use 0+1+2. The 0+1+5 numbering from the + ``usb-uas`` example above won't work with ``usb-bot``. + +``usb-mtp,rootdir=dir`` + Media transfer protocol device, using dir as root of the file tree + that is presented to the guest. + +``usb-host,hostbus=bus,hostaddr=addr`` + Pass through the host device identified by bus and addr + +``usb-host,vendorid=vendor,productid=product`` + Pass through the host device identified by vendor and product ID + +``usb-wacom-tablet`` + Virtual Wacom PenPartner tablet. This device is similar to the + ``tablet`` above but it can be used with the tslib library because in + addition to touch coordinates it reports touch pressure. + +``usb-kbd`` + Standard USB keyboard. Will override the PS/2 keyboard (if present). + +``usb-serial,chardev=id`` + Serial converter. This emulates an FTDI FT232BM chip connected to + host character device id. + +``usb-braille,chardev=id`` + Braille device. This will use BrlAPI to display the braille output on + a real or fake device referenced by id. + +``usb-net[,netdev=id]`` + Network adapter that supports CDC ethernet and RNDIS protocols. id + specifies a netdev defined with ``-netdev …,id=id``. For instance, + user-mode networking can be used with + + .. parsed-literal:: + + |qemu_system| [...] -netdev user,id=net0 -device usb-net,netdev=net0 + +``usb-ccid`` + Smartcard reader device + +``usb-audio`` + USB audio device + +``u2f-{emulated,passthru}`` + Universal Second Factor device + +Physical port addressing +^^^^^^^^^^^^^^^^^^^^^^^^ + +For all the above USB devices, by default QEMU will plug the device +into the next available port on the specified USB bus, or onto +some available USB bus if you didn't specify one explicitly. +If you need to, you can also specify the physical port where +the device will show up in the guest. This can be done using the +``port`` property. UHCI has two root ports (1,2). EHCI has six root +ports (1-6), and the emulated (1.1) USB hub has eight ports. + +Plugging a tablet into UHCI port 1 works like this:: + + -device usb-tablet,bus=usb-bus.0,port=1 + +Plugging a hub into UHCI port 2 works like this:: + + -device usb-hub,bus=usb-bus.0,port=2 + +Plugging a virtual USB stick into port 4 of the hub just plugged works +this way:: + + -device usb-storage,bus=usb-bus.0,port=2.4,drive=... + +In the monitor, the ``device_add` command also accepts a ``port`` +property specification. If you want to unplug devices too you should +specify some unique id which you can use to refer to the device. +You can then use ``device_del`` to unplug the device later. +For example:: + + (qemu) device_add usb-tablet,bus=usb-bus.0,port=1,id=my-tablet + (qemu) device_del my-tablet + +Hotplugging USB storage +~~~~~~~~~~~~~~~~~~~~~~~ + +The ``usb-bot`` and ``usb-uas`` devices can be hotplugged. In the hotplug +case they are added with ``attached = false`` so the guest will not see +the device until the ``attached`` property is explicitly set to true. +That allows you to attach one or more scsi devices before making the +device visible to the guest. The workflow looks like this: + +#. ``device-add usb-bot,id=foo`` +#. ``device-add scsi-{hd,cd},bus=foo.0,lun=0`` +#. optionally add more devices (luns 1 ... 15) +#. ``scripts/qmp/qom-set foo.attached = true`` + +.. _host_005fusb_005fdevices: + +Using host USB devices on a Linux host +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +WARNING: this is an experimental feature. QEMU will slow down when using +it. USB devices requiring real time streaming (i.e. USB Video Cameras) +are not supported yet. + +1. If you use an early Linux 2.4 kernel, verify that no Linux driver is + actually using the USB device. A simple way to do that is simply to + disable the corresponding kernel module by renaming it from + ``mydriver.o`` to ``mydriver.o.disabled``. + +2. Verify that ``/proc/bus/usb`` is working (most Linux distributions + should enable it by default). You should see something like that: + + :: + + ls /proc/bus/usb + 001 devices drivers + +3. Since only root can access to the USB devices directly, you can + either launch QEMU as root or change the permissions of the USB + devices you want to use. For testing, the following suffices: + + :: + + chown -R myuid /proc/bus/usb + +4. Launch QEMU and do in the monitor: + + :: + + info usbhost + Device 1.2, speed 480 Mb/s + Class 00: USB device 1234:5678, USB DISK + + You should see the list of the devices you can use (Never try to use + hubs, it won't work). + +5. Add the device in QEMU by using: + + :: + + device_add usb-host,vendorid=0x1234,productid=0x5678 + + Normally the guest OS should report that a new USB device is plugged. + You can use the option ``-device usb-host,...`` to do the same. + +6. Now you can try to use the host USB device in QEMU. + +When relaunching QEMU, you may have to unplug and plug again the USB +device to make it work again (this is a bug). + +``usb-host`` properties for specifying the host device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The example above uses the ``vendorid`` and ``productid`` to +specify which host device to pass through, but this is not +the only way to specify the host device. ``usb-host`` supports +the following properties: + +``hostbus=`` + Specifies the bus number the device must be attached to +``hostaddr=`` + Specifies the device address the device got assigned by the guest os +``hostport=`` + Specifies the physical port the device is attached to +``vendorid=`` + Specifies the vendor ID of the device +``productid=`` + Specifies the product ID of the device. + +In theory you can combine all these properties as you like. In +practice only a few combinations are useful: + +- ``vendorid`` and ``productid`` -- match for a specific device, pass it to + the guest when it shows up somewhere in the host. + +- ``hostbus`` and ``hostport`` -- match for a specific physical port in the + host, any device which is plugged in there gets passed to the + guest. + +- ``hostbus`` and ``hostaddr`` -- most useful for ad-hoc pass through as the + hostaddr isn't stable. The next time you plug the device into the host it + will get a new hostaddr. + +Note that on the host USB 1.1 devices are handled by UHCI/OHCI and USB +2.0 by EHCI. That means different USB devices plugged into the very +same physical port on the host may show up on different host buses +depending on the speed. Supposing that devices plugged into a given +physical port appear as bus 1 + port 1 for 2.0 devices and bus 3 + port 1 +for 1.1 devices, you can pass through any device plugged into that port +and also assign it to the correct USB bus in QEMU like this: + +.. parsed-literal:: + + |qemu_system| -M pc [...] \\ + -usb \\ + -device usb-ehci,id=ehci \\ + -device usb-host,bus=usb-bus.0,hostbus=3,hostport=1 \\ + -device usb-host,bus=ehci.0,hostbus=1,hostport=1 diff --git a/docs/system/devices/vhost-user-rng.rst b/docs/system/devices/vhost-user-rng.rst new file mode 100644 index 00000000000..a145d4105c1 --- /dev/null +++ b/docs/system/devices/vhost-user-rng.rst @@ -0,0 +1,39 @@ +QEMU vhost-user-rng - RNG emulation +=================================== + +Background +---------- + +What follows builds on the material presented in vhost-user.rst - it should +be reviewed before moving forward with the content in this file. + +Description +----------- + +The vhost-user-rng device implementation was designed to work with a random +number generator daemon such as the one found in the vhost-device crate of +the rust-vmm project available on github [1]. + +[1]. https://github.com/rust-vmm/vhost-device + +Examples +-------- + +The daemon should be started first: + +:: + + host# vhost-device-rng --socket-path=rng.sock -c 1 -m 512 -p 1000 + +The QEMU invocation needs to create a chardev socket the device can +use to communicate as well as share the guests memory over a memfd. + +:: + + host# qemu-system \ + -chardev socket,path=$(PATH)/rng.sock,id=rng0 \ + -device vhost-user-rng-pci,chardev=rng0 \ + -m 4096 \ + -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on \ + -numa node,memdev=mem \ + ... diff --git a/docs/system/devices/vhost-user.rst b/docs/system/devices/vhost-user.rst new file mode 100644 index 00000000000..86128114fa3 --- /dev/null +++ b/docs/system/devices/vhost-user.rst @@ -0,0 +1,59 @@ +.. _vhost_user: + +vhost-user back ends +-------------------- + +vhost-user back ends are way to service the request of VirtIO devices +outside of QEMU itself. To do this there are a number of things +required. + +vhost-user device +=================== + +These are simple stub devices that ensure the VirtIO device is visible +to the guest. The code is mostly boilerplate although each device has +a ``chardev`` option which specifies the ID of the ``--chardev`` +device that connects via a socket to the vhost-user *daemon*. + +vhost-user daemon +================= + +This is a separate process that is connected to by QEMU via a socket +following the :ref:`vhost_user_proto`. There are a number of daemons +that can be built when enabled by the project although any daemon that +meets the specification for a given device can be used. + +Shared memory object +==================== + +In order for the daemon to access the VirtIO queues to process the +requests it needs access to the guest's address space. This is +achieved via the ``memory-backend-file`` or ``memory-backend-memfd`` +objects. A reference to a file-descriptor which can access this object +will be passed via the socket as part of the protocol negotiation. + +Currently the shared memory object needs to match the size of the main +system memory as defined by the ``-m`` argument. + +Example +======= + +First start you daemon. + +.. parsed-literal:: + + $ virtio-foo --socket-path=/var/run/foo.sock $OTHER_ARGS + +The you start your QEMU instance specifying the device, chardev and +memory objects. + +.. parsed-literal:: + + $ |qemu_system| \\ + -m 4096 \\ + -chardev socket,id=ba1,path=/var/run/foo.sock \\ + -device vhost-user-foo,chardev=ba1,$OTHER_ARGS \\ + -object memory-backend-memfd,id=mem,size=4G,share=on \\ + -numa node,memdev=mem \\ + ... + diff --git a/docs/system/virtio-pmem.rst b/docs/system/devices/virtio-pmem.rst similarity index 100% rename from docs/system/virtio-pmem.rst rename to docs/system/devices/virtio-pmem.rst diff --git a/docs/system/gdb.rst b/docs/system/gdb.rst index 144d083df31..453eb73f6c4 100644 --- a/docs/system/gdb.rst +++ b/docs/system/gdb.rst @@ -15,7 +15,8 @@ The ``-s`` option will make QEMU listen for an incoming connection from gdb on TCP port 1234, and ``-S`` will make QEMU not start the guest until you tell it to from gdb. (If you want to specify which TCP port to use or to use something other than TCP for the gdbstub -connection, use the ``-gdb dev`` option instead of ``-s``.) +connection, use the ``-gdb dev`` option instead of ``-s``. See +`Using unix sockets`_ for an example.) .. parsed-literal:: @@ -55,7 +56,7 @@ machine has more than one CPU, QEMU exposes each CPU cluster as a separate "inferior", where each CPU within the cluster is a separate "thread". Most QEMU machine types have identical CPUs, so there is a single cluster which has all the CPUs in it. A few machine types are -heterogenous and have multiple clusters: for example the ``sifive_u`` +heterogeneous and have multiple clusters: for example the ``sifive_u`` machine has a cluster with one E51 core and a second cluster with four U54 cores. Here the E51 is the only thread in the first inferior, and the U54 cores are all threads in the second inferior. @@ -100,6 +101,29 @@ not just those in the cluster you are currently working on:: (gdb) set schedule-multiple on +Using unix sockets +================== + +An alternate method for connecting gdb to the QEMU gdbstub is to use +a unix socket (if supported by your operating system). This is useful when +running several tests in parallel, or if you do not have a known free TCP +port (e.g. when running automated tests). + +First create a chardev with the appropriate options, then +instruct the gdbserver to use that device: + +.. parsed-literal:: + + |qemu_system| -chardev socket,path=/tmp/gdb-socket,server=on,wait=off,id=gdb0 -gdb chardev:gdb0 -S ... + +Start gdb as before, but this time connect using the path to +the socket:: + + (gdb) target remote /tmp/gdb-socket + +Note that to use a unix socket for the connection you will need +gdb version 9.0 or newer. + Advanced debugging options ========================== diff --git a/docs/system/generic-loader.rst b/docs/system/generic-loader.rst index 6bf8a4eb486..4f9fb005f1d 100644 --- a/docs/system/generic-loader.rst +++ b/docs/system/generic-loader.rst @@ -1,8 +1,8 @@ .. Copyright (c) 2016, Xilinx Inc. -This work is licensed under the terms of the GNU GPL, version 2 or later. See -the COPYING file in the top-level directory. + This work is licensed under the terms of the GNU GPL, version 2 or later. See + the COPYING file in the top-level directory. Generic Loader -------------- @@ -92,9 +92,12 @@ shown below: specified in the executable format header. This option should only be used for the boot image. This will also cause the image to be written to the specified CPU's address space. If not specified, the - default is CPU 0. - Setting force-raw=on forces the file - to be treated as a raw image. This can be used to load supported - executable formats as if they were raw. + default is CPU 0. + +```` + Setting 'force-raw=on' forces the file to be treated as a raw image. + This can be used to load supported executable formats as if they + were raw. All values are parsed using the standard QemuOpts parsing. This allows the user to specify any values in any format supported. By default the values diff --git a/docs/system/guest-loader.rst b/docs/system/guest-loader.rst index 37d03cbd892..9ef9776bf07 100644 --- a/docs/system/guest-loader.rst +++ b/docs/system/guest-loader.rst @@ -4,7 +4,7 @@ Guest Loader ------------ -The guest loader is similar to the `generic-loader` although it is +The guest loader is similar to the ``generic-loader`` although it is aimed at a particular use case of loading hypervisor guests. This is useful for debugging hypervisors without having to jump through the hoops of firmware and boot-loaders. @@ -27,12 +27,12 @@ multi-boot capability. A typical example would look like: In the above example the Xen hypervisor is loaded by the -kernel parameter and passed it's boot arguments via -append. The Dom0 guest is loaded into the areas of memory. Each blob will get -`/chosen/module@` entry in the FDT to indicate it's location and +``/chosen/module@`` entry in the FDT to indicate it's location and size. Additional information can be passed with by using additional arguments. Currently the only supported machines which use FDT data to boot are -the ARM and RiscV `virt` machines. +the ARM and RiscV ``virt`` machines. Arguments ^^^^^^^^^ @@ -51,4 +51,4 @@ The full syntax of the guest-loader is:: ``bootargs=`` This is an optional field for kernel blobs which will pass command - like via the `/chosen/module@/bootargs` node. + like via the ``/chosen/module@/bootargs`` node. diff --git a/docs/system/i386/cpu.rst b/docs/system/i386/cpu.rst new file mode 100644 index 00000000000..738719da9a2 --- /dev/null +++ b/docs/system/i386/cpu.rst @@ -0,0 +1 @@ +.. include:: ../cpu-models-x86.rst.inc diff --git a/docs/system/i386/kvm-pv.rst b/docs/system/i386/kvm-pv.rst new file mode 100644 index 00000000000..1e5a9923ef4 --- /dev/null +++ b/docs/system/i386/kvm-pv.rst @@ -0,0 +1,100 @@ +Paravirtualized KVM features +============================ + +Description +----------- + +In some cases when implementing hardware interfaces in software is slow, ``KVM`` +implements its own paravirtualized interfaces. + +Setup +----- + +Paravirtualized ``KVM`` features are represented as CPU flags. The following +features are enabled by default for any CPU model when ``KVM`` acceleration is +enabled: + +- ``kvmclock`` +- ``kvm-nopiodelay`` +- ``kvm-asyncpf`` +- ``kvm-steal-time`` +- ``kvm-pv-eoi`` +- ``kvmclock-stable-bit`` + +``kvm-msi-ext-dest-id`` feature is enabled by default in x2apic mode with split +irqchip (e.g. "-machine ...,kernel-irqchip=split -cpu ...,x2apic"). + +Note: when CPU model ``host`` is used, QEMU passes through all supported +paravirtualized ``KVM`` features to the guest. + +Existing features +----------------- + +``kvmclock`` + Expose a ``KVM`` specific paravirtualized clocksource to the guest. Supported + since Linux v2.6.26. + +``kvm-nopiodelay`` + The guest doesn't need to perform delays on PIO operations. Supported since + Linux v2.6.26. + +``kvm-mmu`` + This feature is deprecated. + +``kvm-asyncpf`` + Enable asynchronous page fault mechanism. Supported since Linux v2.6.38. + Note: since Linux v5.10 the feature is deprecated and not enabled by ``KVM``. + Use ``kvm-asyncpf-int`` instead. + +``kvm-steal-time`` + Enable stolen (when guest vCPU is not running) time accounting. Supported + since Linux v3.1. + +``kvm-pv-eoi`` + Enable paravirtualized end-of-interrupt signaling. Supported since Linux + v3.10. + +``kvm-pv-unhalt`` + Enable paravirtualized spinlocks support. Supported since Linux v3.12. + +``kvm-pv-tlb-flush`` + Enable paravirtualized TLB flush mechanism. Supported since Linux v4.16. + +``kvm-pv-ipi`` + Enable paravirtualized IPI mechanism. Supported since Linux v4.19. + +``kvm-poll-control`` + Enable host-side polling on HLT control from the guest. Supported since Linux + v5.10. + +``kvm-pv-sched-yield`` + Enable paravirtualized sched yield feature. Supported since Linux v5.10. + +``kvm-asyncpf-int`` + Enable interrupt based asynchronous page fault mechanism. Supported since Linux + v5.10. + +``kvm-msi-ext-dest-id`` + Support 'Extended Destination ID' for external interrupts. The feature allows + to use up to 32768 CPUs without IRQ remapping (but other limits may apply making + the number of supported vCPUs for a given configuration lower). Supported since + Linux v5.10. + +``kvmclock-stable-bit`` + Tell the guest that guest visible TSC value can be fully trusted for kvmclock + computations and no warps are expected. Supported since Linux v2.6.35. + +Supplementary features +---------------------- + +``kvm-pv-enforce-cpuid`` + Limit the supported paravirtualized feature set to the exposed features only. + Note, by default, ``KVM`` allows the guest to use all currently supported + paravirtualized features even when they were not announced in guest visible + CPUIDs. Supported since Linux v5.10. + + +Useful links +------------ + +Please refer to Documentation/virt/kvm in Linux for additional details. diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst new file mode 100644 index 00000000000..f8fade5ac2d --- /dev/null +++ b/docs/system/i386/sgx.rst @@ -0,0 +1,165 @@ +Software Guard eXtensions (SGX) +=============================== + +Overview +-------- + +Intel Software Guard eXtensions (SGX) is a set of instructions and mechanisms +for memory accesses in order to provide security accesses for sensitive +applications and data. SGX allows an application to use it's pariticular +address space as an *enclave*, which is a protected area provides confidentiality +and integrity even in the presence of privileged malware. Accesses to the +enclave memory area from any software not resident in the enclave are prevented, +including those from privileged software. + +Virtual SGX +----------- + +SGX feature is exposed to guest via SGX CPUID. Looking at SGX CPUID, we can +report the same CPUID info to guest as on host for most of SGX CPUID. With +reporting the same CPUID guest is able to use full capacity of SGX, and KVM +doesn't need to emulate those info. + +The guest's EPC base and size are determined by QEMU, and KVM needs QEMU to +notify such info to it before it can initialize SGX for guest. + +Virtual EPC +~~~~~~~~~~~ + +By default, QEMU does not assign EPC to a VM, i.e. fully enabling SGX in a VM +requires explicit allocation of EPC to the VM. Similar to other specialized +memory types, e.g. hugetlbfs, EPC is exposed as a memory backend. + +SGX EPC is enumerated through CPUID, i.e. EPC "devices" need to be realized +prior to realizing the vCPUs themselves, which occurs long before generic +devices are parsed and realized. This limitation means that EPC does not +require -maxmem as EPC is not treated as {cold,hot}plugged memory. + +QEMU does not artificially restrict the number of EPC sections exposed to a +guest, e.g. QEMU will happily allow you to create 64 1M EPC sections. Be aware +that some kernels may not recognize all EPC sections, e.g. the Linux SGX driver +is hardwired to support only 8 EPC sections. + +The following QEMU snippet creates two EPC sections, with 64M pre-allocated +to the VM and an additional 28M mapped but not allocated:: + + -object memory-backend-epc,id=mem1,size=64M,prealloc=on \ + -object memory-backend-epc,id=mem2,size=28M \ + -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 + +Note: + +The size and location of the virtual EPC are far less restricted compared +to physical EPC. Because physical EPC is protected via range registers, +the size of the physical EPC must be a power of two (though software sees +a subset of the full EPC, e.g. 92M or 128M) and the EPC must be naturally +aligned. KVM SGX's virtual EPC is purely a software construct and only +requires the size and location to be page aligned. QEMU enforces the EPC +size is a multiple of 4k and will ensure the base of the EPC is 4k aligned. +To simplify the implementation, EPC is always located above 4g in the guest +physical address space. + +Migration +~~~~~~~~~ + +QEMU/KVM doesn't prevent live migrating SGX VMs, although from hardware's +perspective, SGX doesn't support live migration, since both EPC and the SGX +key hierarchy are bound to the physical platform. However live migration +can be supported in the sense if guest software stack can support recreating +enclaves when it suffers sudden lose of EPC; and if guest enclaves can detect +SGX keys being changed, and handle gracefully. For instance, when ERESUME fails +with #PF.SGX, guest software can gracefully detect it and recreate enclaves; +and when enclave fails to unseal sensitive information from outside, it can +detect such error and sensitive information can be provisioned to it again. + +CPUID +~~~~~ + +Due to its myriad dependencies, SGX is currently not listed as supported +in any of QEMU's built-in CPU configuration. To expose SGX (and SGX Launch +Control) to a guest, you must either use ``-cpu host`` to pass-through the +host CPU model, or explicitly enable SGX when using a built-in CPU model, +e.g. via ``-cpu ,+sgx`` or ``-cpu ,+sgx,+sgxlc``. + +All SGX sub-features enumerated through CPUID, e.g. SGX2, MISCSELECT, +ATTRIBUTES, etc... can be restricted via CPUID flags. Be aware that enforcing +restriction of MISCSELECT, ATTRIBUTES and XFRM requires intercepting ECREATE, +i.e. may marginally reduce SGX performance in the guest. All SGX sub-features +controlled via -cpu are prefixed with "sgx", e.g.:: + + $ qemu-system-x86_64 -cpu help | xargs printf "%s\n" | grep sgx + sgx + sgx-debug + sgx-encls-c + sgx-enclv + sgx-exinfo + sgx-kss + sgx-mode64 + sgx-provisionkey + sgx-tokenkey + sgx1 + sgx2 + sgxlc + +The following QEMU snippet passes through the host CPU but restricts access to +the provision and EINIT token keys:: + + -cpu host,-sgx-provisionkey,-sgx-tokenkey + +SGX sub-features cannot be emulated, i.e. sub-features that are not present +in hardware cannot be forced on via '-cpu'. + +Virtualize SGX Launch Control +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +QEMU SGX support for Launch Control (LC) is passive, in the sense that it +does not actively change the LC configuration. QEMU SGX provides the user +the ability to set/clear the CPUID flag (and by extension the associated +IA32_FEATURE_CONTROL MSR bit in fw_cfg) and saves/restores the LE Hash MSRs +when getting/putting guest state, but QEMU does not add new controls to +directly modify the LC configuration. Similar to hardware behavior, locking +the LC configuration to a non-Intel value is left to guest firmware. Unlike +host bios setting for SGX launch control(LC), there is no special bios setting +for SGX guest by our design. If host is in locked mode, we can still allow +creating VM with SGX. + +Feature Control +~~~~~~~~~~~~~~~ + +QEMU SGX updates the ``etc/msr_feature_control`` fw_cfg entry to set the SGX +(bit 18) and SGX LC (bit 17) flags based on their respective CPUID support, +i.e. existing guest firmware will automatically set SGX and SGX LC accordingly, +assuming said firmware supports fw_cfg.msr_feature_control. + +Launching a guest +----------------- + +To launch a SGX guest: + +.. parsed-literal:: + + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ + -object memory-backend-epc,id=mem2,size=28M \\ + -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 + +Utilizing SGX in the guest requires a kernel/OS with SGX support. +The support can be determined in guest by:: + + $ grep sgx /proc/cpuinfo + +and SGX epc info by:: + + $ dmesg | grep sgx + [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff + [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff + +References +---------- + +- `SGX Homepage `__ + +- `SGX SDK `__ + +- SGX specification: Intel SDM Volume 3 diff --git a/docs/system/images.rst b/docs/system/images.rst index 3d9144e6258..d000bd6b6f1 100644 --- a/docs/system/images.rst +++ b/docs/system/images.rst @@ -20,7 +20,7 @@ where myimage.img is the disk image filename and mysize is its size in kilobytes. You can add an ``M`` suffix to give the size in megabytes and a ``G`` suffix for gigabytes. -See the qemu-img invocation documentation for more information. +See the ``qemu-img`` invocation documentation for more information. .. _disk_005fimages_005fsnapshot_005fmode: diff --git a/docs/system/index.rst b/docs/system/index.rst index b05af716a97..73bbedbc22d 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -1,44 +1,36 @@ -.. This is the top level page for the 'system' manual. - - +---------------- System Emulation -================ +---------------- -This manual is the overall guide for users using QEMU +This section of the manual is the overall guide for users using QEMU for full system emulation (as opposed to user-mode emulation). This includes working with hypervisors such as KVM, Xen, Hax or Hypervisor.Framework. -Contents: - .. toctree:: :maxdepth: 3 quickstart invocation + device-emulation keys mux-chardev monitor images - net virtio-net-failover - usb - nvme - ivshmem linuxboot generic-loader guest-loader + barrier vnc-security tls + secrets + authz gdb managed-startup + bootindex cpu-hotplug - virtio-pmem pr-manager targets security multi-process - deprecated - removed-features - build-platforms - license diff --git a/docs/system/multi-process.rst b/docs/system/multi-process.rst index 46bb0cafc27..210531ee17d 100644 --- a/docs/system/multi-process.rst +++ b/docs/system/multi-process.rst @@ -45,7 +45,7 @@ Following is a description of command-line used to launch mpqemu. -device lsi53c895a,id=lsi0 \ -drive id=drive_image2,file=/build/ol7-nvme-test-1.qcow2 \ -device scsi-hd,id=drive2,drive=drive_image2,bus=lsi0.0,scsi-id=0 \ - -object x-remote-object,id=robj1,devid=lsi1,fd=4, + -object x-remote-object,id=robj1,devid=lsi0,fd=4, * QEMU: diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst index 43c58bc32e7..86186b7d2cb 100644 --- a/docs/system/ppc/powernv.rst +++ b/docs/system/ppc/powernv.rst @@ -48,15 +48,14 @@ Firmware -------- The OPAL firmware (OpenPower Abstraction Layer) for OpenPower systems -includes the runtime services `skiboot` and the bootloader kernel and -initramfs `skiroot`. Source code can be found on GitHub: +includes the runtime services ``skiboot`` and the bootloader kernel and +initramfs ``skiroot``. Source code can be found on GitHub: https://github.com/open-power. -Prebuilt images of `skiboot` and `skiboot` are made available on the `OpenPOWER `__ site. To boot a POWER9 machine, use the `witherspoon `__ images. For POWER8, use -the `palmetto `__ images. +Prebuilt images of ``skiboot`` and ``skiroot`` are made available on the `OpenPOWER `__ site. -QEMU includes a prebuilt image of `skiboot` which is updated when a +QEMU includes a prebuilt image of ``skiboot`` which is updated when a more recent version is required by the models. Boot options diff --git a/docs/system/ppc/ppce500.rst b/docs/system/ppc/ppce500.rst new file mode 100644 index 00000000000..9beef391717 --- /dev/null +++ b/docs/system/ppc/ppce500.rst @@ -0,0 +1,164 @@ +ppce500 generic platform (``ppce500``) +====================================== + +QEMU for PPC supports a special ``ppce500`` machine designed for emulation and +virtualization purposes. + +Supported devices +----------------- + +The ``ppce500`` machine supports the following devices: + +* PowerPC e500 series core (e500v2/e500mc/e5500/e6500) +* Configuration, Control, and Status Register (CCSR) +* Multicore Programmable Interrupt Controller (MPIC) with MSI support +* 1 16550A UART device +* 1 Freescale MPC8xxx I2C controller +* 1 Pericom pt7c4338 RTC via I2C +* 1 Freescale MPC8xxx GPIO controller +* Power-off functionality via one GPIO pin +* 1 Freescale MPC8xxx PCI host controller +* VirtIO devices via PCI bus +* 1 Freescale Enhanced Triple Speed Ethernet controller (eTSEC) + +Hardware configuration information +---------------------------------- + +The ``ppce500`` machine automatically generates a device tree blob ("dtb") +which it passes to the guest, if there is no ``-dtb`` option. This provides +information about the addresses, interrupt lines and other configuration of +the various devices in the system. + +If users want to provide their own DTB, they can use the ``-dtb`` option. +These DTBs should have the following requirements: + +* The number of subnodes under /cpus node should match QEMU's ``-smp`` option +* The /memory reg size should match QEMU’s selected ram_size via ``-m`` + +Both ``qemu-system-ppc`` and ``qemu-system-ppc64`` provide emulation for the +following 32-bit PowerPC CPUs: + +* e500v2 +* e500mc + +Additionally ``qemu-system-ppc64`` provides support for the following 64-bit +PowerPC CPUs: + +* e5500 +* e6500 + +The CPU type can be specified via the ``-cpu`` command line. If not specified, +it creates a machine with e500v2 core. The following example shows an e6500 +based machine creation: + +.. code-block:: bash + + $ qemu-system-ppc64 -nographic -M ppce500 -cpu e6500 + +Boot options +------------ + +The ``ppce500`` machine can start using the standard -kernel functionality +for loading a payload like an OS kernel (e.g.: Linux), or U-Boot firmware. + +When -bios is omitted, the default pc-bios/u-boot.e500 firmware image is used +as the BIOS. QEMU follows below truth table to select which payload to execute: + +===== ========== ======= +-bios -kernel payload +===== ========== ======= + N N u-boot + N Y kernel + Y don't care u-boot +===== ========== ======= + +When both -bios and -kernel are present, QEMU loads U-Boot and U-Boot in turns +automatically loads the kernel image specified by the -kernel parameter via +U-Boot's built-in "bootm" command, hence a legacy uImage format is required in +such scenario. + +Running Linux kernel +-------------------- + +Linux mainline v5.11 release is tested at the time of writing. To build a +Linux mainline kernel that can be booted by the ``ppce500`` machine in +64-bit mode, simply configure the kernel using the defconfig configuration: + +.. code-block:: bash + + $ export ARCH=powerpc + $ export CROSS_COMPILE=powerpc-linux- + $ make corenet64_smp_defconfig + $ make menuconfig + +then manually select the following configuration: + + Platform support > Freescale Book-E Machine Type > QEMU generic e500 platform + +To boot the newly built Linux kernel in QEMU with the ``ppce500`` machine: + +.. code-block:: bash + + $ qemu-system-ppc64 -M ppce500 -cpu e5500 -smp 4 -m 2G \ + -display none -serial stdio \ + -kernel vmlinux \ + -initrd /path/to/rootfs.cpio \ + -append "root=/dev/ram" + +To build a Linux mainline kernel that can be booted by the ``ppce500`` machine +in 32-bit mode, use the same 64-bit configuration steps except the defconfig +file should use corenet32_smp_defconfig. + +To boot the 32-bit Linux kernel: + +.. code-block:: bash + + $ qemu-system-ppc{64|32} -M ppce500 -cpu e500mc -smp 4 -m 2G \ + -display none -serial stdio \ + -kernel vmlinux \ + -initrd /path/to/rootfs.cpio \ + -append "root=/dev/ram" + +Running U-Boot +-------------- + +U-Boot mainline v2021.07 release is tested at the time of writing. To build a +U-Boot mainline bootloader that can be booted by the ``ppce500`` machine, use +the qemu-ppce500_defconfig with similar commands as described above for Linux: + +.. code-block:: bash + + $ export CROSS_COMPILE=powerpc-linux- + $ make qemu-ppce500_defconfig + +You will get u-boot file in the build tree. + +When U-Boot boots, you will notice the following if using with ``-cpu e6500``: + +.. code-block:: none + + CPU: Unknown, Version: 0.0, (0x00000000) + Core: e6500, Version: 2.0, (0x80400020) + +This is because we only specified a core name to QEMU and it does not have a +meaningful SVR value which represents an actual SoC that integrates such core. +You can specify a real world SoC device that QEMU has built-in support but all +these SoCs are e500v2 based MPC85xx series, hence you cannot test anything +built for P4080 (e500mc), P5020 (e5500) and T2080 (e6500). + +By default a VirtIO standard PCI networking device is connected as an ethernet +interface at PCI address 0.1.0, but we can switch that to an e1000 NIC by: + +.. code-block:: bash + + $ qemu-system-ppc -M ppce500 -smp 4 -m 2G \ + -display none -serial stdio \ + -bios u-boot \ + -nic tap,ifname=tap0,script=no,downscript=no,model=e1000 + +The QEMU ``ppce500`` machine can also dynamically instantiate an eTSEC device +if “-device eTSEC” is given to QEMU: + +.. code-block:: bash + + -netdev tap,ifname=tap0,script=no,downscript=no,id=net0 -device eTSEC,netdev=net0 diff --git a/docs/system/qemu-block-drivers.rst b/docs/system/qemu-block-drivers.rst index bd99d4fa8eb..c2c0114cec4 100644 --- a/docs/system/qemu-block-drivers.rst +++ b/docs/system/qemu-block-drivers.rst @@ -1,18 +1,22 @@ :orphan: +============================ QEMU block drivers reference ============================ +-------- Synopsis -------- QEMU block driver reference manual +----------- Description ----------- .. include:: qemu-block-drivers.rst.inc +-------- See also -------- diff --git a/docs/system/qemu-block-drivers.rst.inc b/docs/system/qemu-block-drivers.rst.inc index 60a064b2327..e313784426d 100644 --- a/docs/system/qemu-block-drivers.rst.inc +++ b/docs/system/qemu-block-drivers.rst.inc @@ -511,13 +511,13 @@ of an inet socket: |qemu_system| linux.img -hdb nbd+unix://?socket=/tmp/my_socket -In this case, the block device must be exported using qemu-nbd: +In this case, the block device must be exported using ``qemu-nbd``: .. parsed-literal:: qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 -The use of qemu-nbd allows sharing of a disk between several guests: +The use of ``qemu-nbd`` allows sharing of a disk between several guests: .. parsed-literal:: @@ -530,7 +530,7 @@ and then you can use it with two guests: |qemu_system| linux1.img -hdb nbd+unix://?socket=/tmp/my_socket |qemu_system| linux2.img -hdb nbd+unix://?socket=/tmp/my_socket -If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's +If the ``nbd-server`` uses named exports (supported since NBD 2.9.18, or with QEMU's own embedded NBD server), you must specify an export name in the URI: .. parsed-literal:: @@ -547,75 +547,6 @@ also available. Here are some example of the older syntax: |qemu_system| linux2.img -hdb nbd:unix:/tmp/my_socket |qemu_system| -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst - - -Sheepdog disk images -~~~~~~~~~~~~~~~~~~~~ - -Sheepdog is a distributed storage system for QEMU. It provides highly -available block level storage volumes that can be attached to -QEMU-based virtual machines. - -You can create a Sheepdog disk image with the command: - -.. parsed-literal:: - - qemu-img create sheepdog:///IMAGE SIZE - -where *IMAGE* is the Sheepdog image name and *SIZE* is its -size. - -To import the existing *FILENAME* to Sheepdog, you can use a -convert command. - -.. parsed-literal:: - - qemu-img convert FILENAME sheepdog:///IMAGE - -You can boot from the Sheepdog disk image with the command: - -.. parsed-literal:: - - |qemu_system| sheepdog:///IMAGE - -You can also create a snapshot of the Sheepdog image like qcow2. - -.. parsed-literal:: - - qemu-img snapshot -c TAG sheepdog:///IMAGE - -where *TAG* is a tag name of the newly created snapshot. - -To boot from the Sheepdog snapshot, specify the tag name of the -snapshot. - -.. parsed-literal:: - - |qemu_system| sheepdog:///IMAGE#TAG - -You can create a cloned image from the existing snapshot. - -.. parsed-literal:: - - qemu-img create -b sheepdog:///BASE#TAG sheepdog:///IMAGE - -where *BASE* is an image name of the source snapshot and *TAG* -is its tag name. - -You can use an unix socket instead of an inet socket: - -.. parsed-literal:: - - |qemu_system| sheepdog+unix:///IMAGE?socket=PATH - -If the Sheepdog daemon doesn't run on the local host, you need to -specify one of the Sheepdog servers to connect to. - -.. parsed-literal:: - - qemu-img create sheepdog://HOSTNAME:PORT/IMAGE SIZE - |qemu_system| sheepdog://HOSTNAME:PORT/IMAGE - iSCSI LUNs ~~~~~~~~~~ diff --git a/docs/system/qemu-cpu-models.rst b/docs/system/qemu-cpu-models.rst index 53d7538c473..5cf6e46f8ae 100644 --- a/docs/system/qemu-cpu-models.rst +++ b/docs/system/qemu-cpu-models.rst @@ -1,20 +1,24 @@ :orphan: +================================== QEMU / KVM CPU model configuration ================================== +-------- Synopsis -'''''''' +-------- QEMU CPU Modelling Infrastructure manual +----------- Description -''''''''''' +----------- .. include:: cpu-models-x86.rst.inc .. include:: cpu-models-mips.rst.inc +-------- See also -'''''''' +-------- The HTML documentation of QEMU for more precise information and Linux user mode emulator invocation. diff --git a/docs/system/qemu-manpage.rst b/docs/system/qemu-manpage.rst index e9a25d0680f..c47a4127582 100644 --- a/docs/system/qemu-manpage.rst +++ b/docs/system/qemu-manpage.rst @@ -6,9 +6,11 @@ parts of the documentation that go in the manpage as well as the HTML manual. -Title -===== +======================= +QEMU User Documentation +======================= +-------- Synopsis -------- @@ -16,11 +18,13 @@ Synopsis |qemu_system| [options] [disk_image] +----------- Description ----------- .. include:: target-i386-desc.rst.inc +------- Options ------- @@ -33,11 +37,13 @@ not need a disk image. .. include:: mux-chardev.rst.inc +----- Notes ----- .. include:: device-url-syntax.rst.inc +-------- See also -------- diff --git a/docs/system/removed-features.rst b/docs/system/removed-features.rst deleted file mode 100644 index 29e90601a51..00000000000 --- a/docs/system/removed-features.rst +++ /dev/null @@ -1,463 +0,0 @@ - -Removed features -================ - -What follows is a record of recently removed, formerly deprecated -features that serves as a record for users who have encountered -trouble after a recent upgrade. - -System emulator command line arguments --------------------------------------- - -``-net ...,name=``\ *name* (removed in 5.1) -''''''''''''''''''''''''''''''''''''''''''' - -The ``name`` parameter of the ``-net`` option was a synonym -for the ``id`` parameter, which should now be used instead. - -``-no-kvm`` (removed in 5.2) -'''''''''''''''''''''''''''' - -The ``-no-kvm`` argument was a synonym for setting ``-machine accel=tcg``. - -``-realtime`` (removed in 6.0) -'''''''''''''''''''''''''''''' - -The ``-realtime mlock=on|off`` argument has been replaced by the -``-overcommit mem-lock=on|off`` argument. - -``-show-cursor`` option (removed in 6.0) -'''''''''''''''''''''''''''''''''''''''' - -Use ``-display sdl,show-cursor=on``, ``-display gtk,show-cursor=on`` -or ``-display default,show-cursor=on`` instead. - -``-tb-size`` option (removed in 6.0) -'''''''''''''''''''''''''''''''''''' - -QEMU 5.0 introduced an alternative syntax to specify the size of the translation -block cache, ``-accel tcg,tb-size=``. - -``-usbdevice audio`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''' - -This option lacked the possibility to specify an audio backend device. -Use ``-device usb-audio`` now instead (and specify a corresponding USB -host controller or ``-usb`` if necessary). - -``-vnc acl`` (removed in 6.0) -''''''''''''''''''''''''''''' - -The ``acl`` option to the ``-vnc`` argument has been replaced -by the ``tls-authz`` and ``sasl-authz`` options. - -``-mon ...,control=readline,pretty=on|off`` (removed in 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``pretty=on|off`` switch has no effect for HMP monitors and -its use is rejected. - -``-drive file=json:{...{'driver':'file'}}`` (removed 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The 'file' driver for drives is no longer appropriate for character or host -devices and will only accept regular files (S_IFREG). The correct driver -for these file types is 'host_cdrom' or 'host_device' as appropriate. - -Floppy controllers' drive properties (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use ``-device floppy,...`` instead. When configuring onboard floppy -controllers -:: - - -global isa-fdc.driveA=... - -global sysbus-fdc.driveA=... - -global SUNW,fdtwo.drive=... - -become -:: - - -device floppy,unit=0,drive=... - -and -:: - - -global isa-fdc.driveB=... - -global sysbus-fdc.driveB=... - -become -:: - - -device floppy,unit=1,drive=... - -When plugging in a floppy controller -:: - - -device isa-fdc,...,driveA=... - -becomes -:: - - -device isa-fdc,... - -device floppy,unit=0,drive=... - -and -:: - - -device isa-fdc,...,driveB=... - -becomes -:: - - -device isa-fdc,... - -device floppy,unit=1,drive=... - -``-drive`` with bogus interface type (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Drives with interface types other than ``if=none`` are for onboard -devices. Drives the board doesn't pick up can no longer be used with --device. Use ``if=none`` instead. - -``-usbdevice ccid`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''' - -This option was undocumented and not used in the field. -Use `-device usb-ccid`` instead. - - -QEMU Machine Protocol (QMP) commands ------------------------------------- - -``block-dirty-bitmap-add`` "autoload" parameter (removed in 4.2.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The "autoload" parameter has been ignored since 2.12.0. All bitmaps -are automatically loaded from qcow2 images. - -``cpu-add`` (removed in 5.2) -'''''''''''''''''''''''''''' - -Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``. See -documentation of ``query-hotpluggable-cpus`` for additional details. - -``change`` (removed in 6.0) -''''''''''''''''''''''''''' - -Use ``blockdev-change-medium`` or ``change-vnc-password`` instead. - -``query-events`` (removed in 6.0) -''''''''''''''''''''''''''''''''' - -The ``query-events`` command has been superseded by the more powerful -and accurate ``query-qmp-schema`` command. - -``migrate_set_cache_size`` and ``query-migrate-cache-size`` (removed in 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use ``migrate_set_parameter`` and ``info migrate_parameters`` instead. - -``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use ``migrate_set_parameter`` instead. - -``query-cpus`` (removed in 6.0) -''''''''''''''''''''''''''''''' - -The ``query-cpus`` command is replaced by the ``query-cpus-fast`` command. - -``query-cpus-fast`` ``arch`` output member (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``arch`` output member of the ``query-cpus-fast`` command is -replaced by the ``target`` output member. - -chardev client socket with ``wait`` option (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Character devices creating sockets in client mode should not specify -the 'wait' field, which is only applicable to sockets in server mode - -``query-named-block-nodes`` result ``encryption_key_missing`` (removed in 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Removed with no replacement. - -``query-block`` result ``inserted.encryption_key_missing`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Removed with no replacement. - -``query-named-block-nodes`` and ``query-block`` result dirty-bitmaps[i].status (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``status`` field of the ``BlockDirtyInfo`` structure, returned by -these commands is removed. Two new boolean fields, ``recording`` and -``busy`` effectively replace it. - -``query-block`` result field ``dirty-bitmaps`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``dirty-bitmaps`` field of the ``BlockInfo`` structure, returned by -the query-block command is itself now removed. The ``dirty-bitmaps`` -field of the ``BlockDeviceInfo`` struct should be used instead, which is the -type of the ``inserted`` field in query-block replies, as well as the -type of array items in query-named-block-nodes. - -``object-add`` option ``props`` (removed in 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''' - -Specify the properties for the object as top-level arguments instead. - -Human Monitor Protocol (HMP) commands -------------------------------------- - -The ``hub_id`` parameter of ``hostfwd_add`` / ``hostfwd_remove`` (removed in 5.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``[hub_id name]`` parameter tuple of the 'hostfwd_add' and -'hostfwd_remove' HMP commands has been replaced by ``netdev_id``. - -``cpu-add`` (removed in 5.2) -'''''''''''''''''''''''''''' - -Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``. See -documentation of ``query-hotpluggable-cpus`` for additional details. - -``change vnc TARGET`` (removed in 6.0) -'''''''''''''''''''''''''''''''''''''' - -No replacement. The ``change vnc password`` and ``change DEVICE MEDIUM`` -commands are not affected. - -``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, ``acl_remove`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, and -``acl_remove`` commands were removed with no replacement. Authorization -for VNC should be performed using the pluggable QAuthZ objects. - -``migrate-set-cache-size`` and ``info migrate-cache-size`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use ``migrate-set-parameters`` and ``info migrate-parameters`` instead. - -``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0) -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Use ``migrate-set-parameters`` instead. - -Guest Emulator ISAs -------------------- - -RISC-V ISA privilege specification version 1.09.1 (removed in 5.1) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The RISC-V ISA privilege specification version 1.09.1 has been removed. -QEMU supports both the newer version 1.10.0 and the ratified version 1.11.0, these -should be used instead of the 1.09.1 version. - -System emulator CPUS --------------------- - -KVM guest support on 32-bit Arm hosts (removed in 5.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The Linux kernel has dropped support for allowing 32-bit Arm systems -to host KVM guests as of the 5.7 kernel. Accordingly, QEMU is deprecating -its support for this configuration and will remove it in a future version. -Running 32-bit guests on a 64-bit Arm host remains supported. - -RISC-V ISA Specific CPUs (removed in 5.1) -''''''''''''''''''''''''''''''''''''''''' - -The RISC-V cpus with the ISA version in the CPU name have been removed. The -four CPUs are: ``rv32gcsu-v1.9.1``, ``rv32gcsu-v1.10.0``, ``rv64gcsu-v1.9.1`` and -``rv64gcsu-v1.10.0``. Instead the version can be specified via the CPU ``priv_spec`` -option when using the ``rv32`` or ``rv64`` CPUs. - -RISC-V no MMU CPUs (removed in 5.1) -''''''''''''''''''''''''''''''''''' - -The RISC-V no MMU cpus have been removed. The two CPUs: ``rv32imacu-nommu`` and -``rv64imacu-nommu`` can no longer be used. Instead the MMU status can be specified -via the CPU ``mmu`` option when using the ``rv32`` or ``rv64`` CPUs. - -System emulator machines ------------------------- - -``spike_v1.9.1`` and ``spike_v1.10`` (removed in 5.1) -''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The version specific Spike machines have been removed in favour of the -generic ``spike`` machine. If you need to specify an older version of the RISC-V -spec you can use the ``-cpu rv64gcsu,priv_spec=v1.10.0`` command line argument. - -mips ``r4k`` platform (removed in 5.2) -'''''''''''''''''''''''''''''''''''''' - -This machine type was very old and unmaintained. Users should use the ``malta`` -machine type instead. - -mips ``fulong2e`` machine alias (removed in 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''' - -This machine has been renamed ``fuloong2e``. - -``pc-1.0``, ``pc-1.1``, ``pc-1.2`` and ``pc-1.3`` (removed in 6.0) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -These machine types were very old and likely could not be used for live -migration from old QEMU versions anymore. Use a newer machine type instead. - - -linux-user mode CPUs --------------------- - -``tilegx`` CPUs (removed in 6.0) -'''''''''''''''''''''''''''''''' - -The ``tilegx`` guest CPU support has been removed without replacement. It was -only implemented in linux-user mode, but support for this CPU was removed from -the upstream Linux kernel in 2018, and it has also been dropped from glibc, so -there is no new Linux development taking place with this architecture. For -running the old binaries, you can use older versions of QEMU. - -System emulator devices ------------------------ - -``ide-drive`` (removed in 6.0) -'''''''''''''''''''''''''''''' - -The 'ide-drive' device has been removed. Users should use 'ide-hd' or -'ide-cd' as appropriate to get an IDE hard disk or CD-ROM as needed. - -``scsi-disk`` (removed in 6.0) -'''''''''''''''''''''''''''''' - -The 'scsi-disk' device has been removed. Users should use 'scsi-hd' or -'scsi-cd' as appropriate to get a SCSI hard disk or CD-ROM as needed. - -Related binaries ----------------- - -``qemu-nbd --partition`` (removed in 5.0) -''''''''''''''''''''''''''''''''''''''''' - -The ``qemu-nbd --partition $digit`` code (also spelled ``-P``) -could only handle MBR partitions, and never correctly handled logical -partitions beyond partition 5. Exporting a partition can still be -done by utilizing the ``--image-opts`` option with a raw blockdev -using the ``offset`` and ``size`` parameters layered on top of -any other existing blockdev. For example, if partition 1 is 100MiB -long starting at 1MiB, the old command:: - - qemu-nbd -t -P 1 -f qcow2 file.qcow2 - -can be rewritten as:: - - qemu-nbd -t --image-opts driver=raw,offset=1M,size=100M,file.driver=qcow2,file.file.driver=file,file.file.filename=file.qcow2 - -``qemu-img convert -n -o`` (removed in 5.1) -''''''''''''''''''''''''''''''''''''''''''' - -All options specified in ``-o`` are image creation options, so -they are now rejected when used with ``-n`` to skip image creation. - - -``qemu-img create -b bad file $size`` (removed in 5.1) -'''''''''''''''''''''''''''''''''''''''''''''''''''''' - -When creating an image with a backing file that could not be opened, -``qemu-img create`` used to issue a warning about the failure but -proceed with the image creation if an explicit size was provided. -However, as the ``-u`` option exists for this purpose, it is safer to -enforce that any failure to open the backing image (including if the -backing file is missing or an incorrect format was specified) is an -error when ``-u`` is not used. - -Command line options --------------------- - -``-smp`` (invalid topologies) (removed 5.2) -''''''''''''''''''''''''''''''''''''''''''' - -CPU topology properties should describe whole machine topology including -possible CPUs. - -However, historically it was possible to start QEMU with an incorrect topology -where *n* <= *sockets* * *cores* * *threads* < *maxcpus*, -which could lead to an incorrect topology enumeration by the guest. -Support for invalid topologies is removed, the user must ensure -topologies described with -smp include all possible cpus, i.e. -*sockets* * *cores* * *threads* = *maxcpus*. - -``-numa`` node (without memory specified) (removed 5.2) -''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Splitting RAM by default between NUMA nodes had the same issues as ``mem`` -parameter with the difference that the role of the user plays QEMU using -implicit generic or board specific splitting rule. -Use ``memdev`` with *memory-backend-ram* backend or ``mem`` (if -it's supported by used machine type) to define mapping explicitly instead. -Users of existing VMs, wishing to preserve the same RAM distribution, should -configure it explicitly using ``-numa node,memdev`` options. Current RAM -distribution can be retrieved using HMP command ``info numa`` and if separate -memory devices (pc|nv-dimm) are present use ``info memory-device`` and subtract -device memory from output of ``info numa``. - -``-numa node,mem=``\ *size* (removed in 5.1) -'''''''''''''''''''''''''''''''''''''''''''' - -The parameter ``mem`` of ``-numa node`` was used to assign a part of -guest RAM to a NUMA node. But when using it, it's impossible to manage a specified -RAM chunk on the host side (like bind it to a host node, setting bind policy, ...), -so the guest ends up with the fake NUMA configuration with suboptiomal performance. -However since 2014 there is an alternative way to assign RAM to a NUMA node -using parameter ``memdev``, which does the same as ``mem`` and adds -means to actually manage node RAM on the host side. Use parameter ``memdev`` -with *memory-backend-ram* backend as replacement for parameter ``mem`` -to achieve the same fake NUMA effect or a properly configured -*memory-backend-file* backend to actually benefit from NUMA configuration. -New machine versions (since 5.1) will not accept the option but it will still -work with old machine types. User can check the QAPI schema to see if the legacy -option is supported by looking at MachineInfo::numa-mem-supported property. - -``-mem-path`` fallback to RAM (removed in 5.0) -'''''''''''''''''''''''''''''''''''''''''''''' - -If guest RAM allocation from file pointed by ``mem-path`` failed, -QEMU was falling back to allocating from RAM, which might have resulted -in unpredictable behavior since the backing file specified by the user -as ignored. Currently, users are responsible for making sure the backing storage -specified with ``-mem-path`` can actually provide the guest RAM configured with -``-m`` and QEMU fails to start up if RAM allocation is unsuccessful. - -``-smp`` (invalid topologies) (removed 5.2) -''''''''''''''''''''''''''''''''''''''''''' - -CPU topology properties should describe whole machine topology including -possible CPUs. - -However, historically it was possible to start QEMU with an incorrect topology -where *n* <= *sockets* * *cores* * *threads* < *maxcpus*, -which could lead to an incorrect topology enumeration by the guest. -Support for invalid topologies is removed, the user must ensure -topologies described with -smp include all possible cpus, i.e. -*sockets* * *cores* * *threads* = *maxcpus*. - -``-machine enforce-config-section=on|off`` (removed 5.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -The ``enforce-config-section`` property was replaced by the -``-global migration.send-configuration={on|off}`` option. - -Block devices -------------- - -VXHS backend (removed in 5.1) -''''''''''''''''''''''''''''' - -The VXHS code did not compile since v2.12.0. It was removed in 5.1. diff --git a/docs/system/riscv/microchip-icicle-kit.rst b/docs/system/riscv/microchip-icicle-kit.rst index 4fe97bce3f0..40798b1aae5 100644 --- a/docs/system/riscv/microchip-icicle-kit.rst +++ b/docs/system/riscv/microchip-icicle-kit.rst @@ -15,33 +15,53 @@ Supported devices The ``microchip-icicle-kit`` machine supports the following devices: - * 1 E51 core - * 4 U54 cores - * Core Level Interruptor (CLINT) - * Platform-Level Interrupt Controller (PLIC) - * L2 Loosely Integrated Memory (L2-LIM) - * DDR memory controller - * 5 MMUARTs - * 1 DMA controller - * 2 GEM Ethernet controllers - * 1 SDHC storage controller +* 1 E51 core +* 4 U54 cores +* Core Level Interruptor (CLINT) +* Platform-Level Interrupt Controller (PLIC) +* L2 Loosely Integrated Memory (L2-LIM) +* DDR memory controller +* 5 MMUARTs +* 1 DMA controller +* 2 GEM Ethernet controllers +* 1 SDHC storage controller Boot options ------------ The ``microchip-icicle-kit`` machine can start using the standard -bios functionality for loading its BIOS image, aka Hart Software Services (HSS_). -HSS loads the second stage bootloader U-Boot from an SD card. It does not -support direct kernel loading via the -kernel option. One has to load kernel -from U-Boot. +HSS loads the second stage bootloader U-Boot from an SD card. Then a kernel +can be loaded from U-Boot. It also supports direct kernel booting via the +-kernel option along with the device tree blob via -dtb. When direct kernel +boot is used, the OpenSBI fw_dynamic BIOS image is used to boot a payload +like U-Boot or OS kernel directly. + +The user provided DTB should have the following requirements: + +* The /cpus node should contain at least one subnode for E51 and the number + of subnodes should match QEMU's ``-smp`` option +* The /memory reg size should match QEMU’s selected ram_size via ``-m`` +* Should contain a node for the CLINT device with a compatible string + "riscv,clint0" + +QEMU follows below truth table to select which payload to execute: + +===== ========== ========== ======= +-bios -kernel -dtb payload +===== ========== ========== ======= + N N don't care HSS + Y don't care don't care HSS + N Y Y kernel +===== ========== ========== ======= The memory is set to 1537 MiB by default which is the minimum required high memory size by HSS. A sanity check on ram size is performed in the machine init routine to prompt user to increase the RAM size to > 1537 MiB when less than 1537 MiB ram is detected. -Boot the machine ----------------- +Running HSS +----------- HSS 2020.12 release is tested at the time of writing. To build an HSS image that can be booted by the ``microchip-icicle-kit`` machine, type the following @@ -75,7 +95,7 @@ Then we can boot the machine by: -serial chardev:serial1 With above command line, current terminal session will be used for the first -serial port. Open another terminal window, and use `minicom` to connect the +serial port. Open another terminal window, and use ``minicom`` to connect the second serial port. .. code-block:: bash @@ -86,4 +106,44 @@ HSS output is on the first serial port (stdio) and U-Boot outputs on the second serial port. U-Boot will automatically load the Linux kernel from the SD card image. +Direct Kernel Boot +------------------ + +Sometimes we just want to test booting a new kernel, and transforming the +kernel image to the format required by the HSS bootflow is tedious. We can +use '-kernel' for direct kernel booting just like other RISC-V machines do. + +In this mode, the OpenSBI fw_dynamic BIOS image for 'generic' platform is +used to boot an S-mode payload like U-Boot or OS kernel directly. + +For example, the following commands show building a U-Boot image from U-Boot +mainline v2021.07 for the Microchip Icicle Kit board: + +.. code-block:: bash + + $ export CROSS_COMPILE=riscv64-linux- + $ make microchip_mpfs_icicle_defconfig + +Then we can boot the machine by: + +.. code-block:: bash + + $ qemu-system-riscv64 -M microchip-icicle-kit -smp 5 -m 2G \ + -sd path/to/sdcard.img \ + -nic user,model=cadence_gem \ + -nic tap,ifname=tap,model=cadence_gem,script=no \ + -display none -serial stdio \ + -kernel path/to/u-boot/build/dir/u-boot.bin \ + -dtb path/to/u-boot/build/dir/u-boot.dtb + +CAVEATS: + +* Check the "stdout-path" property in the /chosen node in the DTB to determine + which serial port is used for the serial console, e.g.: if the console is set + to the second serial port, change to use "-serial null -serial stdio". +* The default U-Boot configuration uses CONFIG_OF_SEPARATE hence the ELF image + ``u-boot`` cannot be passed to "-kernel" as it does not contain the DTB hence + ``u-boot.bin`` has to be used which does contain one. To use the ELF image, + we need to change to CONFIG_OF_EMBED or CONFIG_OF_PRIOR_STAGE. + .. _HSS: https://github.com/polarfire-soc/hart-software-services diff --git a/docs/system/riscv/shakti-c.rst b/docs/system/riscv/shakti-c.rst new file mode 100644 index 00000000000..fea57f7b6ba --- /dev/null +++ b/docs/system/riscv/shakti-c.rst @@ -0,0 +1,82 @@ +Shakti C Reference Platform (``shakti_c``) +========================================== + +Shakti C Reference Platform is a reference platform based on arty a7 100t +for the Shakti SoC. + +Shakti SoC is a SoC based on the Shakti C-class processor core. Shakti C +is a 64bit RV64GCSUN processor core. + +For more details on Shakti SoC, please see: +https://gitlab.com/shaktiproject/cores/shakti-soc/-/blob/master/fpga/boards/artya7-100t/c-class/README.rst + +For more info on the Shakti C-class core, please see: +https://c-class.readthedocs.io/en/latest/ + +Supported devices +----------------- + +The ``shakti_c`` machine supports the following devices: + + * 1 C-class core + * Core Level Interruptor (CLINT) + * Platform-Level Interrupt Controller (PLIC) + * 1 UART + +Boot options +------------ + +The ``shakti_c`` machine can start using the standard -bios +functionality for loading the baremetal application or opensbi. + +Boot the machine +---------------- + +Shakti SDK +~~~~~~~~~~ +Shakti SDK can be used to generate the baremetal example UART applications. + +.. code-block:: bash + + $ git clone https://gitlab.com/behindbytes/shakti-sdk.git + $ cd shakti-sdk + $ make software PROGRAM=loopback TARGET=artix7_100t + +Binary would be generated in: + software/examples/uart_applns/loopback/output/loopback.shakti + +You could also download the precompiled example applications using below +commands. + +.. code-block:: bash + + $ wget -c https://gitlab.com/behindbytes/shakti-binaries/-/raw/master/sdk/shakti_sdk_qemu.zip + $ unzip shakti_sdk_qemu.zip + +Then we can run the UART example using: + +.. code-block:: bash + + $ qemu-system-riscv64 -M shakti_c -nographic \ + -bios path/to/shakti_sdk_qemu/loopback.shakti + +OpenSBI +~~~~~~~ +We can also run OpenSBI with Test Payload. + +.. code-block:: bash + + $ git clone https://github.com/riscv/opensbi.git -b v0.9 + $ cd opensbi + $ wget -c https://gitlab.com/behindbytes/shakti-binaries/-/raw/master/dts/shakti.dtb + $ export CROSS_COMPILE=riscv64-unknown-elf- + $ export FW_FDT_PATH=./shakti.dtb + $ make PLATFORM=generic + +fw_payload.elf would be generated in build/platform/generic/firmware/fw_payload.elf. +Boot it using the below qemu command. + +.. code-block:: bash + + $ qemu-system-riscv64 -M shakti_c -nographic \ + -bios path/to/fw_payload.elf diff --git a/docs/system/riscv/sifive_u.rst b/docs/system/riscv/sifive_u.rst index 98e7562848f..7b166567f97 100644 --- a/docs/system/riscv/sifive_u.rst +++ b/docs/system/riscv/sifive_u.rst @@ -9,21 +9,22 @@ Supported devices The ``sifive_u`` machine supports the following devices: - * 1 E51 / E31 core - * Up to 4 U54 / U34 cores - * Core Level Interruptor (CLINT) - * Platform-Level Interrupt Controller (PLIC) - * Power, Reset, Clock, Interrupt (PRCI) - * L2 Loosely Integrated Memory (L2-LIM) - * DDR memory controller - * 2 UARTs - * 1 GEM Ethernet controller - * 1 GPIO controller - * 1 One-Time Programmable (OTP) memory with stored serial number - * 1 DMA controller - * 2 QSPI controllers - * 1 ISSI 25WP256 flash - * 1 SD card in SPI mode +* 1 E51 / E31 core +* Up to 4 U54 / U34 cores +* Core Local Interruptor (CLINT) +* Platform-Level Interrupt Controller (PLIC) +* Power, Reset, Clock, Interrupt (PRCI) +* L2 Loosely Integrated Memory (L2-LIM) +* DDR memory controller +* 2 UARTs +* 1 GEM Ethernet controller +* 1 GPIO controller +* 1 One-Time Programmable (OTP) memory with stored serial number +* 1 DMA controller +* 2 QSPI controllers +* 1 ISSI 25WP256 flash +* 1 SD card in SPI mode +* PWM0 and PWM1 Please note the real world HiFive Unleashed board has a fixed configuration of 1 E51 core and 4 U54 core combination and the RISC-V core boots in 64-bit mode. @@ -36,12 +37,21 @@ Hardware configuration information ---------------------------------- The ``sifive_u`` machine automatically generates a device tree blob ("dtb") -which it passes to the guest. This provides information about the addresses, -interrupt lines and other configuration of the various devices in the system. -Guest software should discover the devices that are present in the generated -DTB instead of using a DTB for the real hardware, as some of the devices are -not modeled by QEMU and trying to access these devices may cause unexpected -behavior. +which it passes to the guest, if there is no ``-dtb`` option. This provides +information about the addresses, interrupt lines and other configuration of +the various devices in the system. Guest software should discover the devices +that are present in the generated DTB instead of using a DTB for the real +hardware, as some of the devices are not modeled by QEMU and trying to access +these devices may cause unexpected behavior. + +If users want to provide their own DTB, they can use the ``-dtb`` option. +These DTBs should have the following requirements: + +* The /cpus node should contain at least one subnode for E51 and the number + of subnodes should match QEMU's ``-smp`` option +* The /memory reg size should match QEMU’s selected ram_size via ``-m`` +* Should contain a node for the CLINT device with a compatible string + "riscv,clint0" if using with OpenSBI BIOS images Boot options ------------ @@ -122,6 +132,32 @@ To boot the newly built Linux kernel in QEMU with the ``sifive_u`` machine: -initrd /path/to/rootfs.ext4 \ -append "root=/dev/ram" +Alternatively, we can use a custom DTB to boot the machine by inserting a CLINT +node in fu540-c000.dtsi in the Linux kernel, + +.. code-block:: none + + clint: clint@2000000 { + compatible = "riscv,clint0"; + interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7 + &cpu1_intc 3 &cpu1_intc 7 + &cpu2_intc 3 &cpu2_intc 7 + &cpu3_intc 3 &cpu3_intc 7 + &cpu4_intc 3 &cpu4_intc 7>; + reg = <0x00 0x2000000 0x00 0x10000>; + }; + +with the following command line options: + +.. code-block:: bash + + $ qemu-system-riscv64 -M sifive_u -smp 5 -m 8G \ + -display none -serial stdio \ + -kernel arch/riscv/boot/Image \ + -dtb arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dtb \ + -initrd /path/to/rootfs.ext4 \ + -append "root=/dev/ram" + To build a Linux mainline kernel that can be booted by the ``sifive_u`` machine in 32-bit mode, use the rv32_defconfig configuration. A patch is required to fix the 32-bit boot issue for Linux kernel v5.10. @@ -174,15 +210,16 @@ command line options with ``qemu-system-riscv32``. Running U-Boot -------------- -U-Boot mainline v2021.01 release is tested at the time of writing. To build a +U-Boot mainline v2021.07 release is tested at the time of writing. To build a U-Boot mainline bootloader that can be booted by the ``sifive_u`` machine, use -the sifive_fu540_defconfig with similar commands as described above for Linux: +the sifive_unleashed_defconfig with similar commands as described above for +Linux: .. code-block:: bash $ export CROSS_COMPILE=riscv64-linux- $ export OPENSBI=/path/to/opensbi-riscv64-generic-fw_dynamic.bin - $ make sifive_fu540_defconfig + $ make sifive_unleashed_defconfig You will get spl/u-boot-spl.bin and u-boot.itb file in the build tree. @@ -277,31 +314,29 @@ board on QEMU ``sifive_u`` machine out of the box. This allows users to develop and test the recommended RISC-V boot flow with a real world use case: ZSBL (in QEMU) loads U-Boot SPL from SD card or SPI flash to L2LIM, then U-Boot SPL loads the combined payload image of OpenSBI fw_dynamic -firmware and U-Boot proper. However sometimes we want to have a quick test -of booting U-Boot on QEMU without the needs of preparing the SPI flash or -SD card images, an alternate way can be used, which is to create a U-Boot -S-mode image by modifying the configuration of U-Boot: +firmware and U-Boot proper. + +However sometimes we want to have a quick test of booting U-Boot on QEMU +without the needs of preparing the SPI flash or SD card images, an alternate +way can be used, which is to create a U-Boot S-mode image by modifying the +configuration of U-Boot: .. code-block:: bash + $ export CROSS_COMPILE=riscv64-linux- + $ make sifive_unleashed_defconfig $ make menuconfig -then manually select the following configuration in U-Boot: +then manually select the following configuration: - Device Tree Control > Provider of DTB for DT Control > Prior Stage bootloader DTB + * Device Tree Control ---> Provider of DTB for DT Control ---> Prior Stage bootloader DTB -This lets U-Boot to use the QEMU generated device tree blob. During the build, -a build error will be seen below: +and unselect the following configuration: -.. code-block:: none - - MKIMAGE u-boot.img - ./tools/mkimage: Can't open arch/riscv/dts/hifive-unleashed-a00.dtb: No such file or directory - ./tools/mkimage: failed to build FIT - make: *** [Makefile:1440: u-boot.img] Error 1 + * Library routines ---> Allow access to binman information in the device tree -The above errors can be safely ignored as we don't run U-Boot SPL under QEMU -in this alternate configuration. +This changes U-Boot to use the QEMU generated device tree blob, and bypass +running the U-Boot SPL stage. Boot the 64-bit U-Boot S-mode image directly: @@ -316,14 +351,18 @@ It's possible to create a 32-bit U-Boot S-mode image as well. .. code-block:: bash $ export CROSS_COMPILE=riscv64-linux- - $ make sifive_fu540_defconfig + $ make sifive_unleashed_defconfig $ make menuconfig then manually update the following configuration in U-Boot: - Device Tree Control > Provider of DTB for DT Control > Prior Stage bootloader DTB - RISC-V architecture > Base ISA > RV32I - Boot images > Text Base > 0x80400000 + * Device Tree Control ---> Provider of DTB for DT Control ---> Prior Stage bootloader DTB + * RISC-V architecture ---> Base ISA ---> RV32I + * Boot options ---> Boot images ---> Text Base ---> 0x80400000 + +and unselect the following configuration: + + * Library routines ---> Allow access to binman information in the device tree Use the same command line options to boot the 32-bit U-Boot S-mode image: diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst new file mode 100644 index 00000000000..fa016584bf5 --- /dev/null +++ b/docs/system/riscv/virt.rst @@ -0,0 +1,148 @@ +'virt' Generic Virtual Platform (``virt``) +========================================== + +The ``virt`` board is a platform which does not correspond to any real hardware; +it is designed for use in virtual machines. It is the recommended board type +if you simply want to run a guest such as Linux and do not care about +reproducing the idiosyncrasies and limitations of a particular bit of +real-world hardware. + +Supported devices +----------------- + +The ``virt`` machine supports the following devices: + +* Up to 8 generic RV32GC/RV64GC cores, with optional extensions +* Core Local Interruptor (CLINT) +* Platform-Level Interrupt Controller (PLIC) +* CFI parallel NOR flash memory +* 1 NS16550 compatible UART +* 1 Google Goldfish RTC +* 1 SiFive Test device +* 8 virtio-mmio transport devices +* 1 generic PCIe host bridge +* The fw_cfg device that allows a guest to obtain data from QEMU + +Note that the default CPU is a generic RV32GC/RV64GC. Optional extensions +can be enabled via command line parameters, e.g.: ``-cpu rv64,x-h=true`` +enables the hypervisor extension for RV64. + +Hardware configuration information +---------------------------------- + +The ``virt`` machine automatically generates a device tree blob ("dtb") +which it passes to the guest, if there is no ``-dtb`` option. This provides +information about the addresses, interrupt lines and other configuration of +the various devices in the system. Guest software should discover the devices +that are present in the generated DTB. + +If users want to provide their own DTB, they can use the ``-dtb`` option. +These DTBs should have the following requirements: + +* The number of subnodes of the /cpus node should match QEMU's ``-smp`` option +* The /memory reg size should match QEMU’s selected ram_size via ``-m`` +* Should contain a node for the CLINT device with a compatible string + "riscv,clint0" if using with OpenSBI BIOS images + +Boot options +------------ + +The ``virt`` machine can start using the standard -kernel functionality +for loading a Linux kernel, a VxWorks kernel, an S-mode U-Boot bootloader +with the default OpenSBI firmware image as the -bios. It also supports +the recommended RISC-V bootflow: U-Boot SPL (M-mode) loads OpenSBI fw_dynamic +firmware and U-Boot proper (S-mode), using the standard -bios functionality. + +Machine-specific options +------------------------ + +The following machine-specific options are supported: + +- aclint=[on|off] + + When this option is "on", ACLINT devices will be emulated instead of + SiFive CLINT. When not specified, this option is assumed to be "off". + +Running Linux kernel +-------------------- + +Linux mainline v5.12 release is tested at the time of writing. To build a +Linux mainline kernel that can be booted by the ``virt`` machine in +64-bit mode, simply configure the kernel using the defconfig configuration: + +.. code-block:: bash + + $ export ARCH=riscv + $ export CROSS_COMPILE=riscv64-linux- + $ make defconfig + $ make + +To boot the newly built Linux kernel in QEMU with the ``virt`` machine: + +.. code-block:: bash + + $ qemu-system-riscv64 -M virt -smp 4 -m 2G \ + -display none -serial stdio \ + -kernel arch/riscv/boot/Image \ + -initrd /path/to/rootfs.cpio \ + -append "root=/dev/ram" + +To build a Linux mainline kernel that can be booted by the ``virt`` machine +in 32-bit mode, use the rv32_defconfig configuration. A patch is required to +fix the 32-bit boot issue for Linux kernel v5.12. + +.. code-block:: bash + + $ export ARCH=riscv + $ export CROSS_COMPILE=riscv64-linux- + $ curl https://patchwork.kernel.org/project/linux-riscv/patch/20210627135117.28641-1-bmeng.cn@gmail.com/mbox/ > riscv.patch + $ git am riscv.patch + $ make rv32_defconfig + $ make + +Replace ``qemu-system-riscv64`` with ``qemu-system-riscv32`` in the command +line above to boot the 32-bit Linux kernel. A rootfs image containing 32-bit +applications shall be used in order for kernel to boot to user space. + +Running U-Boot +-------------- + +U-Boot mainline v2021.04 release is tested at the time of writing. To build an +S-mode U-Boot bootloader that can be booted by the ``virt`` machine, use +the qemu-riscv64_smode_defconfig with similar commands as described above for Linux: + +.. code-block:: bash + + $ export CROSS_COMPILE=riscv64-linux- + $ make qemu-riscv64_smode_defconfig + +Boot the 64-bit U-Boot S-mode image directly: + +.. code-block:: bash + + $ qemu-system-riscv64 -M virt -smp 4 -m 2G \ + -display none -serial stdio \ + -kernel /path/to/u-boot.bin + +To test booting U-Boot SPL which in M-mode, which in turn loads a FIT image +that bundles OpenSBI fw_dynamic firmware and U-Boot proper (S-mode) together, +build the U-Boot images using riscv64_spl_defconfig: + +.. code-block:: bash + + $ export CROSS_COMPILE=riscv64-linux- + $ export OPENSBI=/path/to/opensbi-riscv64-generic-fw_dynamic.bin + $ make qemu-riscv64_spl_defconfig + +The minimal QEMU commands to run U-Boot SPL are: + +.. code-block:: bash + + $ qemu-system-riscv64 -M virt -smp 4 -m 2G \ + -display none -serial stdio \ + -bios /path/to/u-boot-spl \ + -device loader,file=/path/to/u-boot.itb,addr=0x80200000 + +To test 32-bit U-Boot images, switch to use qemu-riscv32_smode_defconfig and +riscv32_spl_defconfig builds, and replace ``qemu-system-riscv64`` with +``qemu-system-riscv32`` in the command lines above to boot the 32-bit U-Boot. diff --git a/docs/system/s390x/protvirt.rst b/docs/system/s390x/protvirt.rst index 0f481043d99..aee63ed7ec9 100644 --- a/docs/system/s390x/protvirt.rst +++ b/docs/system/s390x/protvirt.rst @@ -14,11 +14,11 @@ Prerequisites To run PVMs, a machine with the Protected Virtualization feature, as indicated by the Ultravisor Call facility (stfle bit 158), is required. The Ultravisor needs to be initialized at boot by setting -`prot_virt=1` on the host's kernel command line. +``prot_virt=1`` on the host's kernel command line. Running PVMs requires using the KVM hypervisor. -If those requirements are met, the capability `KVM_CAP_S390_PROTECTED` +If those requirements are met, the capability ``KVM_CAP_S390_PROTECTED`` will indicate that KVM can support PVMs on that LPAR. @@ -26,15 +26,15 @@ Running a Protected Virtual Machine ----------------------------------- To run a PVM you will need to select a CPU model which includes the -`Unpack facility` (stfle bit 161 represented by the feature -`unpack`/`S390_FEAT_UNPACK`), and add these options to the command line:: +``Unpack facility`` (stfle bit 161 represented by the feature +``unpack``/``S390_FEAT_UNPACK``), and add these options to the command line:: -object s390-pv-guest,id=pv0 \ -machine confidential-guest-support=pv0 Adding these options will: -* Ensure the `unpack` facility is available +* Ensure the ``unpack`` facility is available * Enable the IOMMU by default for all I/O devices * Initialize the PV mechanism @@ -63,5 +63,5 @@ from the disk boot. This memory layout includes the encrypted components (kernel, initrd, cmdline), the stage3a loader and metadata. In case this boot method is used, the command line options -initrd and -cmdline are ineffective. The preparation of a PVM -image is done via the `genprotimg` tool from the s390-tools +image is done via the ``genprotimg`` tool from the s390-tools collection. diff --git a/docs/system/secrets.rst b/docs/system/secrets.rst new file mode 100644 index 00000000000..4a177369b69 --- /dev/null +++ b/docs/system/secrets.rst @@ -0,0 +1,162 @@ +.. _secret data: + +Providing secret data to QEMU +----------------------------- + +There are a variety of objects in QEMU which require secret data to be provided +by the administrator or management application. For example, network block +devices often require a password, LUKS block devices require a passphrase to +unlock key material, remote desktop services require an access password. +QEMU has a general purpose mechanism for providing secret data to QEMU in a +secure manner, using the ``secret`` object type. + +At startup this can be done using the ``-object secret,...`` command line +argument. At runtime this can be done using the ``object_add`` QMP / HMP +monitor commands. The examples that follow will illustrate use of ``-object`` +command lines, but they all apply equivalentely in QMP / HMP. When creating +a ``secret`` object it must be given a unique ID string. This ID is then +used to identify the object when configuring the thing which need the data. + + +INSECURE: Passing secrets as clear text inline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**The following should never be done in a production environment or on a +multi-user host. Command line arguments are usually visible in the process +listings and are often collected in log files by system monitoring agents +or bug reporting tools. QMP/HMP commands and their arguments are also often +logged and attached to bug reports. This all risks compromising secrets that +are passed inline.** + +For the convenience of people debugging / developing with QEMU, it is possible +to pass secret data inline on the command line. + +:: + + -object secret,id=secvnc0,data=87539319 + + +Again it is possible to provide the data in base64 encoded format, which is +particularly useful if the data contains binary characters that would clash +with argument parsing. + +:: + + -object secret,id=secvnc0,data=ODc1MzkzMTk=,format=base64 + + +**Note: base64 encoding does not provide any security benefit.** + +Passing secrets as clear text via a file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The simplest approach to providing data securely is to use a file to store +the secret: + +:: + + -object secret,id=secvnc0,file=vnc-password.txt + + +In this example the file ``vnc-password.txt`` contains the plain text secret +data. It is important to note that the contents of the file are treated as an +opaque blob. The entire raw file contents is used as the value, thus it is +important not to mistakenly add any trailing newline character in the file if +this newline is not intended to be part of the secret data. + +In some cases it might be more convenient to pass the secret data in base64 +format and have QEMU decode to get the raw bytes before use: + +:: + + -object secret,id=sec0,file=vnc-password.txt,format=base64 + + +The file should generally be given mode ``0600`` or ``0400`` permissions, and +have its user/group ownership set to the same account that the QEMU process +will be launched under. If using mandatory access control such as SELinux, then +the file should be labelled to only grant access to the specific QEMU process +that needs access. This will prevent other processes/users from compromising the +secret data. + + +Passing secrets as cipher text inline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To address the insecurity of passing secrets inline as clear text, it is +possible to configure a second secret as an AES key to use for decrypting +the data. + +The secret used as the AES key must always be configured using the file based +storage mechanism: + +:: + + -object secret,id=secmaster,file=masterkey.data,format=base64 + + +In this case the ``masterkey.data`` file would be initialized with 32 +cryptographically secure random bytes, which are then base64 encoded. +The contents of this file will by used as an AES-256 key to encrypt the +real secret that can now be safely passed to QEMU inline as cipher text + +:: + + -object secret,id=secvnc0,keyid=secmaster,data=BASE64-CIPHERTEXT,iv=BASE64-IV,format=base64 + + +In this example ``BASE64-CIPHERTEXT`` is the result of AES-256-CBC encrypting +the secret with ``masterkey.data`` and then base64 encoding the ciphertext. +The ``BASE64-IV`` data is 16 random bytes which have been base64 encrypted. +These bytes are used as the initialization vector for the AES-256-CBC value. + +A single master key can be used to encrypt all subsequent secrets, **but it is +critical that a different initialization vector is used for every secret**. + +Passing secrets via the Linux keyring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The earlier mechanisms described are platform agnostic. If using QEMU on a Linux +host, it is further possible to pass secrets to QEMU using the Linux keyring: + +:: + + -object secret_keyring,id=secvnc0,serial=1729 + + +This instructs QEMU to load data from the Linux keyring secret identified by +the serial number ``1729``. It is possible to combine use of the keyring with +other features mentioned earlier such as base64 encoding: + +:: + + -object secret_keyring,id=secvnc0,serial=1729,format=base64 + + +and also encryption with a master key: + +:: + + -object secret_keyring,id=secvnc0,keyid=secmaster,serial=1729,iv=BASE64-IV + + +Best practice +~~~~~~~~~~~~~ + +It is recommended for production deployments to use a master key secret, and +then pass all subsequent inline secrets encrypted with the master key. + +Each QEMU instance must have a distinct master key, and that must be generated +from a cryptographically secure random data source. The master key should be +deleted immediately upon QEMU shutdown. If passing the master key as a file, +the key file must have access control rules applied that restrict access to +just the one QEMU process that is intended to use it. Alternatively the Linux +keyring can be used to pass the master key to QEMU. + +The secrets for individual QEMU device backends must all then be encrypted +with this master key. + +This procedure helps ensure that the individual secrets for QEMU backends will +not be compromised, even if ``-object`` CLI args or ``object_add`` monitor +commands are collected in log files and attached to public bug support tickets. +The only item that needs strongly protecting is the master key file. diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index edd013c7bbd..91ebc26c6db 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -85,10 +85,17 @@ undocumented; you can get a complete list by running arm/aspeed arm/sabrelite arm/digic + arm/cubieboard + arm/emcraft-sf2 + arm/highbank arm/musicpal arm/gumstix + arm/mainstone + arm/kzm + arm/nrf arm/nseries arm/nuvoton + arm/imx25-pdk arm/orangepi arm/palm arm/raspi @@ -96,9 +103,16 @@ undocumented; you can get a complete list by running arm/collie arm/sx1 arm/stellaris + arm/stm32 arm/virt arm/xlnx-versal-virt +Emulated CPU architecture support +================================= + +.. toctree:: + arm/emulation + Arm CPU features ================ diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst index 22ba5ce2c0f..4daa53c35d8 100644 --- a/docs/system/target-i386.rst +++ b/docs/system/target-i386.rst @@ -19,7 +19,15 @@ Board-specific documentation i386/microvm i386/pc -.. include:: cpu-models-x86.rst.inc +Architectural features +~~~~~~~~~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 1 + + i386/cpu + i386/kvm-pv + i386/sgx .. _pcsys_005freq: diff --git a/docs/system/target-ppc.rst b/docs/system/target-ppc.rst index 67905b8f2a6..4f6eb93b177 100644 --- a/docs/system/target-ppc.rst +++ b/docs/system/target-ppc.rst @@ -20,5 +20,6 @@ help``. ppc/embedded ppc/powermac ppc/powernv + ppc/ppce500 ppc/prep ppc/pseries diff --git a/docs/system/target-riscv.rst b/docs/system/target-riscv.rst index 8d5946fbbbc..89a866e4f4f 100644 --- a/docs/system/target-riscv.rst +++ b/docs/system/target-riscv.rst @@ -67,7 +67,20 @@ undocumented; you can get a complete list by running :maxdepth: 1 riscv/microchip-icicle-kit + riscv/shakti-c riscv/sifive_u + riscv/virt -RISC-V CPU features +RISC-V CPU firmware ------------------- + +When using the ``sifive_u`` or ``virt`` machine there are three different +firmware boot options: +1. ``-bios default`` - This is the default behaviour if no -bios option +is included. This option will load the default OpenSBI firmware automatically. +The firmware is included with the QEMU release and no user interaction is +required. All a user needs to do is specify the kernel they want to boot +with the -kernel option +2. ``-bios none`` - QEMU will not automatically load any firmware. It is up +to the user to load all the images they need. +3. ``-bios `` - Tells QEMU to load the specified file as the firmware. diff --git a/docs/system/tls.rst b/docs/system/tls.rst index b0973afe1bf..1a04674362e 100644 --- a/docs/system/tls.rst +++ b/docs/system/tls.rst @@ -311,7 +311,7 @@ containing one or more usernames and random keys:: mkdir -m 0700 /tmp/keys psktool -u rich -p /tmp/keys/keys.psk -TLS-enabled servers such as qemu-nbd can use this directory like so:: +TLS-enabled servers such as ``qemu-nbd`` can use this directory like so:: qemu-nbd \ -t -x / \ diff --git a/docs/system/usb.rst b/docs/system/usb.rst deleted file mode 100644 index eeab78dcfbe..00000000000 --- a/docs/system/usb.rst +++ /dev/null @@ -1,140 +0,0 @@ -.. _pcsys_005fusb: - -USB emulation -------------- - -QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can -plug virtual USB devices or real host USB devices (only works with -certain host operating systems). QEMU will automatically create and -connect virtual USB hubs as necessary to connect multiple USB devices. - -.. _Connecting USB devices: - -Connecting USB devices -~~~~~~~~~~~~~~~~~~~~~~ - -USB devices can be connected with the ``-device usb-...`` command line -option or the ``device_add`` monitor command. Available devices are: - -``usb-mouse`` - Virtual Mouse. This will override the PS/2 mouse emulation when - activated. - -``usb-tablet`` - Pointer device that uses absolute coordinates (like a touchscreen). - This means QEMU is able to report the mouse position without having - to grab the mouse. Also overrides the PS/2 mouse emulation when - activated. - -``usb-storage,drive=drive_id`` - Mass storage device backed by drive_id (see the :ref:`disk images` - chapter in the System Emulation Users Guide) - -``usb-uas`` - USB attached SCSI device, see - `usb-storage.txt `__ - for details - -``usb-bot`` - Bulk-only transport storage device, see - `usb-storage.txt `__ - for details here, too - -``usb-mtp,rootdir=dir`` - Media transfer protocol device, using dir as root of the file tree - that is presented to the guest. - -``usb-host,hostbus=bus,hostaddr=addr`` - Pass through the host device identified by bus and addr - -``usb-host,vendorid=vendor,productid=product`` - Pass through the host device identified by vendor and product ID - -``usb-wacom-tablet`` - Virtual Wacom PenPartner tablet. This device is similar to the - ``tablet`` above but it can be used with the tslib library because in - addition to touch coordinates it reports touch pressure. - -``usb-kbd`` - Standard USB keyboard. Will override the PS/2 keyboard (if present). - -``usb-serial,chardev=id`` - Serial converter. This emulates an FTDI FT232BM chip connected to - host character device id. - -``usb-braille,chardev=id`` - Braille device. This will use BrlAPI to display the braille output on - a real or fake device referenced by id. - -``usb-net[,netdev=id]`` - Network adapter that supports CDC ethernet and RNDIS protocols. id - specifies a netdev defined with ``-netdev …,id=id``. For instance, - user-mode networking can be used with - - .. parsed-literal:: - - |qemu_system| [...] -netdev user,id=net0 -device usb-net,netdev=net0 - -``usb-ccid`` - Smartcard reader device - -``usb-audio`` - USB audio device - -``u2f-{emulated,passthru}`` - Universal Second Factor device - -.. _host_005fusb_005fdevices: - -Using host USB devices on a Linux host -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -WARNING: this is an experimental feature. QEMU will slow down when using -it. USB devices requiring real time streaming (i.e. USB Video Cameras) -are not supported yet. - -1. If you use an early Linux 2.4 kernel, verify that no Linux driver is - actually using the USB device. A simple way to do that is simply to - disable the corresponding kernel module by renaming it from - ``mydriver.o`` to ``mydriver.o.disabled``. - -2. Verify that ``/proc/bus/usb`` is working (most Linux distributions - should enable it by default). You should see something like that: - - :: - - ls /proc/bus/usb - 001 devices drivers - -3. Since only root can access to the USB devices directly, you can - either launch QEMU as root or change the permissions of the USB - devices you want to use. For testing, the following suffices: - - :: - - chown -R myuid /proc/bus/usb - -4. Launch QEMU and do in the monitor: - - :: - - info usbhost - Device 1.2, speed 480 Mb/s - Class 00: USB device 1234:5678, USB DISK - - You should see the list of the devices you can use (Never try to use - hubs, it won't work). - -5. Add the device in QEMU by using: - - :: - - device_add usb-host,vendorid=0x1234,productid=0x5678 - - Normally the guest OS should report that a new USB device is plugged. - You can use the option ``-device usb-host,...`` to do the same. - -6. Now you can try to use the host USB device in QEMU. - -When relaunching QEMU, you may have to unplug and plug again the USB -device to make it work again (this is a bug). diff --git a/docs/system/vnc-security.rst b/docs/system/vnc-security.rst index 830f6acc738..4c1769eeb86 100644 --- a/docs/system/vnc-security.rst +++ b/docs/system/vnc-security.rst @@ -168,7 +168,7 @@ used is drastically reduced. In fact only the GSSAPI SASL mechanism provides an acceptable level of security by modern standards. Previous versions of QEMU referred to the DIGEST-MD5 mechanism, however, it has multiple serious flaws described in detail in RFC 6331 and thus should -never be used any more. The SCRAM-SHA-1 mechanism provides a simple +never be used any more. The SCRAM-SHA-256 mechanism provides a simple username/password auth facility similar to DIGEST-MD5, but does not support session encryption, so can only be used in combination with TLS. @@ -191,11 +191,12 @@ reasonable configuration is :: - mech_list: scram-sha-1 + mech_list: scram-sha-256 sasldb_path: /etc/qemu/passwd.db The ``saslpasswd2`` program can be used to populate the ``passwd.db`` -file with accounts. +file with accounts. Note that the ``passwd.db`` file stores passwords +in clear text. Other SASL configurations will be left as an exercise for the reader. Note that all mechanisms, except GSSAPI, should be combined with use of diff --git a/docs/throttle.txt b/docs/throttle.txt index b5b78b7326d..0a0453a5ee6 100644 --- a/docs/throttle.txt +++ b/docs/throttle.txt @@ -273,11 +273,9 @@ A group can be created using the object-add QMP function: "arguments": { "qom-type": "throttle-group", "id": "group0", - "props": { - "limits" : { - "iops-total": 1000 - "bps-write": 2097152 - } + "limits" : { + "iops-total": 1000, + "bps-write": 2097152 } } } diff --git a/docs/tools/_templates/editpage.html b/docs/tools/_templates/editpage.html deleted file mode 100644 index 2a9c8fc92b3..00000000000 --- a/docs/tools/_templates/editpage.html +++ /dev/null @@ -1,5 +0,0 @@ -
- -
diff --git a/docs/tools/index.rst b/docs/tools/index.rst index d923834a739..1edd5a8054a 100644 --- a/docs/tools/index.rst +++ b/docs/tools/index.rst @@ -1,11 +1,9 @@ -.. This is the top level page for the 'tools' manual - - +----- Tools -===== - +----- -Contents: +This section of the manual documents QEMU's "tools": its +command line utilities and other standalone programs. .. toctree:: :maxdepth: 2 diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst index c9efcfaefc4..d663dd92bd7 100644 --- a/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst @@ -1,3 +1,4 @@ +======================= QEMU disk image utility ======================= @@ -126,9 +127,9 @@ by the used format or see the format descriptions below for details. .. option:: -S SIZE Indicates the consecutive number of bytes that must contain only zeros - for qemu-img to create a sparse image during conversion. This value is rounded - down to the nearest 512 bytes. You may use the common size suffixes like - ``k`` for kilobytes. + for ``qemu-img`` to create a sparse image during conversion. This value is + rounded down to the nearest 512 bytes. You may use the common size suffixes + like ``k`` for kilobytes. .. option:: -t CACHE @@ -414,7 +415,7 @@ Command description: 4 Error on reading data -.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME +.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE [-F BACKING_FMT]] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM* to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can @@ -430,7 +431,7 @@ Command description: suppressed from the destination image. *SPARSE_SIZE* indicates the consecutive number of bytes (defaults to 4k) - that must contain only zeros for qemu-img to create a sparse image during + that must contain only zeros for ``qemu-img`` to create a sparse image during conversion. If *SPARSE_SIZE* is 0, the source will not be scanned for unallocated or zero sectors, and the destination image will always be fully allocated. @@ -438,7 +439,7 @@ Command description: You can use the *BACKING_FILE* option to force the output image to be created as a copy on write image of the specified base image; the *BACKING_FILE* should have the same content as the input's base image, - however the path, image format, etc may differ. + however the path, image format (as given by *BACKING_FMT*), etc may differ. If a relative path name is given, the backing file is looked up relative to the directory containing *OUTPUT_FILENAME*. @@ -446,7 +447,7 @@ Command description: If the ``-n`` option is specified, the target volume creation will be skipped. This is useful for formats such as ``rbd`` if the target volume has already been created with site specific options that cannot - be supplied through qemu-img. + be supplied through ``qemu-img``. Out of order writes can be enabled with ``-W`` to improve performance. This is only recommended for preallocated devices like host devices or other @@ -456,6 +457,12 @@ Command description: *NUM_COROUTINES* specifies how many coroutines work in parallel during the convert process (defaults to 8). + Use of ``--bitmaps`` requests that any persistent bitmaps present in + the original are also copied to the destination. If any bitmap is + inconsistent in the source, the conversion will fail unless + ``--skip-broken-bitmaps`` is also specified to copy only the + consistent bitmaps. + .. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] Create the new disk image *FILENAME* of size *SIZE* and format @@ -465,7 +472,7 @@ Command description: If the option *BACKING_FILE* is specified, then the image will record only the differences from *BACKING_FILE*. No size needs to be specified in this case. *BACKING_FILE* will never be modified unless you use the - ``commit`` monitor command (or qemu-img commit). + ``commit`` monitor command (or ``qemu-img commit``). If a relative path name is given, the backing file is looked up relative to the directory containing *FILENAME*. @@ -593,13 +600,16 @@ Command description: the ``start``, ``length``, ``offset`` fields; it will also include other more specific information: - - whether the sectors contain actual data or not (boolean field ``data``; - if false, the sectors are either unallocated or stored as optimized - all-zero clusters); - - whether the data is known to read as zero (boolean field ``zero``); - - in order to make the output shorter, the target file is expressed as - a ``depth``; for example, a depth of 2 refers to the backing file - of the backing file of *FILENAME*. + - boolean field ``data``: true if the sectors contain actual data, + false if the sectors are either unallocated or stored as optimized + all-zero clusters + - boolean field ``zero``: true if the data is known to read as zero + - boolean field ``present``: true if the data belongs to the backing + chain, false if rebasing the backing chain onto a deeper file + would pick up data from the deeper file; + - integer field ``depth``: the depth within the backing chain at + which the data was resolved; for example, a depth of 2 refers to + the backing file of the backing file of *FILENAME*. In JSON format, the ``offset`` field is optional; it is absent in cases where ``human`` format would omit the entry or exit with an error. @@ -674,7 +684,7 @@ Command description: Safe mode This is the default mode and performs a real rebase operation. The - new backing file may differ from the old one and qemu-img rebase + new backing file may differ from the old one and ``qemu-img rebase`` will take care of keeping the guest-visible content of *FILENAME* unchanged. @@ -687,7 +697,7 @@ Command description: exists. Unsafe mode - qemu-img uses the unsafe mode if ``-u`` is specified. In this + ``qemu-img`` uses the unsafe mode if ``-u`` is specified. In this mode, only the backing file name and format of *FILENAME* is changed without any checks on the file contents. The user must take care of specifying the correct new backing file, or the guest-visible @@ -725,7 +735,7 @@ Command description: sizes accordingly. Failure to do so will result in data loss! When shrinking images, the ``--shrink`` option must be given. This informs - qemu-img that the user acknowledges all loss of data beyond the truncated + ``qemu-img`` that the user acknowledges all loss of data beyond the truncated image's end. After using this command to grow a disk image, you must use file system and @@ -866,6 +876,37 @@ Supported image file formats: issue ``lsattr filename`` to check if the NOCOW flag is set or not (Capital 'C' is NOCOW flag). + ``data_file`` + Filename where all guest data will be stored. If this option is used, + the qcow2 file will only contain the image's metadata. + + Note: Data loss will occur if the given filename already exists when + using this option with ``qemu-img create`` since ``qemu-img`` will create + the data file anew, overwriting the file's original contents. To simply + update the reference to point to the given pre-existing file, use + ``qemu-img amend``. + + ``data_file_raw`` + If this option is set to ``on``, QEMU will always keep the external data + file consistent as a standalone read-only raw image. + + It does this by forwarding all write accesses to the qcow2 file through to + the raw data file, including their offsets. Therefore, data that is visible + on the qcow2 node (i.e., to the guest) at some offset is visible at the same + offset in the raw data file. This results in a read-only raw image. Writes + that bypass the qcow2 metadata may corrupt the qcow2 metadata because the + out-of-band writes may result in the metadata falling out of sync with the + raw image. + + If this option is ``off``, QEMU will use the data file to store data in an + arbitrary manner. The file’s content will not make sense without the + accompanying qcow2 metadata. Where data is written will have no relation to + its offset as seen by the guest, and some writes (specifically zero writes) + may not be forwarded to the data file at all, but will only be handled by + modifying qcow2 metadata. + + This option can only be enabled if ``data_file`` is set. + ``Other`` QEMU also supports various other image file formats for diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst index ee862fa0bc0..6031f968931 100644 --- a/docs/tools/qemu-nbd.rst +++ b/docs/tools/qemu-nbd.rst @@ -1,3 +1,4 @@ +===================================== QEMU Disk Network Block Device Server ===================================== @@ -30,14 +31,14 @@ driver options if ``--image-opts`` is specified. *dev* is an NBD device. -.. option:: --object type,id=ID,...props... +.. option:: --object type,id=ID,... Define a new instance of the *type* object class identified by *ID*. See the :manpage:`qemu(1)` manual page for full details of the properties supported. The common object types that it makes sense to define are the ``secret`` object, which is used to supply passwords and/or encryption keys, and the ``tls-creds`` object, which is used to supply TLS - credentials for the qemu-nbd server or client. + credentials for the ``qemu-nbd`` server or client. .. option:: -p, --port=PORT @@ -98,8 +99,10 @@ driver options if ``--image-opts`` is specified. .. option:: --cache=CACHE - The cache mode to be used with the file. See the documentation of - the emulator's ``-drive cache=...`` option for allowed values. + The cache mode to be used with the file. Valid values are: + ``none``, ``writeback`` (the default), ``writethrough``, + ``directsync`` and ``unsafe``. See the documentation of + the emulator's ``-drive cache=...`` option for more info. .. option:: -n, --nocache @@ -235,7 +238,7 @@ daemon: Expose the guest-visible contents of a qcow2 file via a block device /dev/nbd0 (and possibly creating /dev/nbd0p1 and friends for partitions found within), then disconnect the device when done. -Access to bind qemu-nbd to an /dev/nbd device generally requires root +Access to bind ``qemu-nbd`` to a /dev/nbd device generally requires root privileges, and may also require the execution of ``modprobe nbd`` to enable the kernel NBD client module. *CAUTION*: Do not use this method to mount filesystems from an untrusted guest image - a diff --git a/docs/tools/qemu-pr-helper.rst b/docs/tools/qemu-pr-helper.rst index ac036180ac1..eaebe40da0e 100644 --- a/docs/tools/qemu-pr-helper.rst +++ b/docs/tools/qemu-pr-helper.rst @@ -1,3 +1,4 @@ +================================== QEMU persistent reservation helper ================================== diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index 3ec4bdd9145..3e5a9dc0320 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -1,3 +1,4 @@ +=================== QEMU Storage Daemon =================== @@ -9,9 +10,10 @@ Synopsis Description ----------- -qemu-storage-daemon provides disk image functionality from QEMU, qemu-img, and -qemu-nbd in a long-running process controlled via QMP commands without running -a virtual machine. It can export disk images, run block job operations, and +``qemu-storage-daemon`` provides disk image functionality from QEMU, +``qemu-img``, and ``qemu-nbd`` in a long-running process controlled via QMP +commands without running a virtual machine. +It can export disk images, run block job operations, and perform other disk-related operations. The daemon is controlled via a QMP monitor and initial configuration from the command-line. diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst index fb70445c751..d53073b52b6 100644 --- a/docs/tools/qemu-trace-stap.rst +++ b/docs/tools/qemu-trace-stap.rst @@ -1,3 +1,4 @@ +========================= QEMU SystemTap trace tool ========================= diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index 00554c75bd7..07ac0be5511 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -101,6 +101,9 @@ Options Enable/disable extended attributes (xattr) on files and directories. The default is ``no_xattr``. + * posix_acl|no_posix_acl - + Enable/disable posix acl support. Posix ACLs are disabled by default. + .. option:: --socket-path=PATH Listen on vhost-user UNIX domain socket at PATH. @@ -127,14 +130,17 @@ Options timeout. ``always`` sets a long cache lifetime at the expense of coherency. The default is ``auto``. -xattr-mapping -------------- +Extended attribute (xattr) mapping +---------------------------------- By default the name of xattr's used by the client are passed through to the server file system. This can be a problem where either those xattr names are used by something on the server (e.g. selinux client/server confusion) or if the -virtiofsd is running in a container with restricted privileges where it cannot -access some attributes. +``virtiofsd`` is running in a container with restricted privileges where it +cannot access some attributes. + +Mapping syntax +~~~~~~~~~~~~~~ A mapping of xattr names can be made using -o xattrmap=mapping where the ``mapping`` string consists of a series of rules. @@ -177,6 +183,12 @@ Using ':' as the separator a rule is of the form: 'ok' as either an explicit terminator or for special handling of certain patterns. +- 'unsupported' - If a client tries to use a name matching 'key' it's + denied using ENOTSUP; when the server passes an attribute + name matching 'prepend' it's hidden. In many ways it's use is very like + 'ok' as either an explicit terminator or for special handling of certain + patterns. + **key** is a string tested as a prefix on an attribute name originating on the client. It maybe empty in which case a 'client' rule will always match on client names. @@ -232,14 +244,54 @@ Note: When the 'security.capability' xattr is remapped, the daemon has to do extra work to remove it during many operations, which the host kernel normally does itself. -xattr-mapping Examples ----------------------- +Security considerations +~~~~~~~~~~~~~~~~~~~~~~~ + +Operating systems typically partition the xattr namespace using +well defined name prefixes. Each partition may have different +access controls applied. For example, on Linux there are multiple +partitions + + * ``system.*`` - access varies depending on attribute & filesystem + * ``security.*`` - only processes with CAP_SYS_ADMIN + * ``trusted.*`` - only processes with CAP_SYS_ADMIN + * ``user.*`` - any process granted by file permissions / ownership + +While other OS such as FreeBSD have different name prefixes +and access control rules. + +When remapping attributes on the host, it is important to +ensure that the remapping does not allow a guest user to +evade the guest access control rules. + +Consider if ``trusted.*`` from the guest was remapped to +``user.virtiofs.trusted*`` in the host. An unprivileged +user in a Linux guest has the ability to write to xattrs +under ``user.*``. Thus the user can evade the access +control restriction on ``trusted.*`` by instead writing +to ``user.virtiofs.trusted.*``. + +As noted above, the partitions used and access controls +applied, will vary across guest OS, so it is not wise to +try to predict what the guest OS will use. + +The simplest way to avoid an insecure configuration is +to remap all xattrs at once, to a given fixed prefix. +This is shown in example (1) below. + +If selectively mapping only a subset of xattr prefixes, +then rules must be added to explicitly block direct +access to the target of the remapping. This is shown +in example (2) below. + +Mapping examples +~~~~~~~~~~~~~~~~ 1) Prefix all attributes with 'user.virtiofs.' :: --o xattrmap=":prefix:all::user.virtiofs.::bad:all:::" + -o xattrmap=":prefix:all::user.virtiofs.::bad:all:::" This uses two rules, using : as the field separator; @@ -250,7 +302,8 @@ the host set. This is equivalent to the 'map' rule: :: --o xattrmap=":map::user.virtiofs.:" + + -o xattrmap=":map::user.virtiofs.:" 2) Prefix 'trusted.' attributes, allow others through @@ -270,14 +323,17 @@ stripping of 'user.virtiofs.'. The second rule hides unprefixed 'trusted.' attributes on the host. The third rule stops a guest from explicitly setting -the 'user.virtiofs.' path directly. +the 'user.virtiofs.' path directly to prevent access +control bypass on the target of the earlier prefix +remapping. Finally, the fourth rule lets all remaining attributes through. This is equivalent to the 'map' rule: :: --o xattrmap="/map/trusted./user.virtiofs./" + + -o xattrmap="/map/trusted./user.virtiofs./" 3) Hide 'security.' attributes, and allow everything else @@ -298,13 +354,13 @@ Examples Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket ``/var/run/vm001-vhost-fs.sock``: -:: +.. parsed-literal:: host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 - host# qemu-system-x86_64 \ - -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \ - -device vhost-user-fs-pci,chardev=char0,tag=myfs \ - -object memory-backend-memfd,id=mem,size=4G,share=on \ - -numa node,memdev=mem \ - ... + host# |qemu_system| \\ + -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \\ + -device vhost-user-fs-pci,chardev=char0,tag=myfs \\ + -object memory-backend-memfd,id=mem,size=4G,share=on \\ + -numa node,memdev=mem \\ + ... guest# mount -t virtiofs myfs /mnt diff --git a/docs/u2f.txt b/docs/u2f.txt index 8f44994818a..7f5813a0b72 100644 --- a/docs/u2f.txt +++ b/docs/u2f.txt @@ -21,7 +21,7 @@ The second factor is materialized by a device implementing the U2F protocol. In case of a USB U2F security key, it is a USB HID device that implements the U2F protocol. -In Qemu, the USB U2F key device offers a dedicated support of U2F, allowing +In QEMU, the USB U2F key device offers a dedicated support of U2F, allowing guest USB FIDO/U2F security keys operating in two possible modes: pass-through and emulated. diff --git a/docs/usb-storage.txt b/docs/usb-storage.txt deleted file mode 100644 index 551af6f88bb..00000000000 --- a/docs/usb-storage.txt +++ /dev/null @@ -1,59 +0,0 @@ - -qemu usb storage emulation --------------------------- - -QEMU has three devices for usb storage emulation. - -Number one emulates the classic bulk-only transport protocol which is -used by 99% of the usb sticks on the market today and is called -"usb-storage". Usage (hooking up to xhci, other host controllers work -too): - - qemu ${other_vm_args} \ - -drive if=none,id=stick,file=/path/to/file.img \ - -device nec-usb-xhci,id=xhci \ - -device usb-storage,bus=xhci.0,drive=stick - - -Number two is the newer usb attached scsi transport. This one doesn't -automagically create a scsi disk, so you have to explicitly attach one -manually. Multiple logical units are supported. Here is an example -with tree logical units: - - qemu ${other_vm_args} \ - -drive if=none,id=uas-disk1,file=/path/to/file1.img \ - -drive if=none,id=uas-disk2,file=/path/to/file2.img \ - -drive if=none,id=uas-cdrom,media=cdrom,file=/path/to/image.iso \ - -device nec-usb-xhci,id=xhci \ - -device usb-uas,id=uas,bus=xhci.0 \ - -device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=uas-disk1 \ - -device scsi-hd,bus=uas.0,scsi-id=0,lun=1,drive=uas-disk2 \ - -device scsi-cd,bus=uas.0,scsi-id=0,lun=5,drive=uas-cdrom - - -Number three emulates the classic bulk-only transport protocol too. -It's called "usb-bot". It shares most code with "usb-storage", and -the guest will not be able to see the difference. The qemu command -line interface is similar to usb-uas though, i.e. no automatic scsi -disk creation. It also features support for up to 16 LUNs. The LUN -numbers must be continuous, i.e. for three devices you must use 0+1+2. -The 0+1+5 numbering from the "usb-uas" example isn't going to work -with "usb-bot". - -Starting with qemu version 2.7 usb-bot and usb-uas devices can be -hotplugged. In the hotplug case they are added with "attached = -false" so the guest will not see the device until the "attached" -property is explicitly set to true. That allows to attach one or more -scsi devices before making the device visible to the guest, i.e. the -workflow looks like this: - - (1) device-add usb-bot,id=foo - (2) device-add scsi-{hd,cd},bus=foo.0,lun=0 - (2b) optionally add more devices (luns 1 ... 15). - (3) scripts/qmp/qom-set foo.attached = true - -enjoy, - Gerd - --- -Gerd Hoffmann diff --git a/docs/usb2.txt b/docs/usb2.txt deleted file mode 100644 index 172614d3a7e..00000000000 --- a/docs/usb2.txt +++ /dev/null @@ -1,172 +0,0 @@ - -USB Quick Start -=============== - -XHCI controller support ------------------------ - -QEMU has XHCI host adapter support. The XHCI hardware design is much -more virtualization-friendly when compared to EHCI and UHCI, thus XHCI -emulation uses less resources (especially cpu). So if your guest -supports XHCI (which should be the case for any operating system -released around 2010 or later) we recommend using it: - - qemu -device qemu-xhci - -XHCI supports USB 1.1, USB 2.0 and USB 3.0 devices, so this is the -only controller you need. With only a single USB controller (and -therefore only a single USB bus) present in the system there is no -need to use the bus= parameter when adding USB devices. - - -EHCI controller support ------------------------ - -The QEMU EHCI Adapter supports USB 2.0 devices. It can be used either -standalone or with companion controllers (UHCI, OHCI) for USB 1.1 -devices. The companion controller setup is more convenient to use -because it provides a single USB bus supporting both USB 2.0 and USB -1.1 devices. See next section for details. - -When running EHCI in standalone mode you can add UHCI or OHCI -controllers for USB 1.1 devices too. Each controller creates its own -bus though, so there are two completely separate USB buses: One USB -1.1 bus driven by the UHCI controller and one USB 2.0 bus driven by -the EHCI controller. Devices must be attached to the correct -controller manually. - -The easiest way to add a UHCI controller to a 'pc' machine is the -'-usb' switch. QEMU will create the UHCI controller as function of -the PIIX3 chipset. The USB 1.1 bus will carry the name "usb-bus.0". - -You can use the standard -device switch to add a EHCI controller to -your virtual machine. It is strongly recommended to specify an ID for -the controller so the USB 2.0 bus gets an individual name, for example -'-device usb-ehci,id=ehci". This will give you a USB 2.0 bus named -"ehci.0". - -When adding USB devices using the -device switch you can specify the -bus they should be attached to. Here is a complete example: - - qemu -M pc ${otheroptions} \ - -drive if=none,id=usbstick,file=/path/to/image \ - -usb \ - -device usb-ehci,id=ehci \ - -device usb-tablet,bus=usb-bus.0 \ - -device usb-storage,bus=ehci.0,drive=usbstick - -This attaches a USB tablet to the UHCI adapter and a USB mass storage -device to the EHCI adapter. - - -Companion controller support ----------------------------- - -The UHCI and OHCI controllers can attach to a USB bus created by EHCI -as companion controllers. This is done by specifying the masterbus -and firstport properties. masterbus specifies the bus name the -controller should attach to. firstport specifies the first port the -controller should attach to, which is needed as usually one EHCI -controller with six ports has three UHCI companion controllers with -two ports each. - -There is a config file in docs which will do all this for -you, just try ... - - qemu -readconfig docs/config/ich9-ehci-uhci.cfg - -... then use "bus=ehci.0" to assign your USB devices to that bus. - -Using the '-usb' switch for 'q35' machines will create a similar -USB controller configuration. - - -More USB tips & tricks -====================== - -Recently the USB pass through driver (also known as usb-host) and the -QEMU USB subsystem gained a few capabilities which are available only -via qdev properties, i,e. when using '-device'. - - -physical port addressing ------------------------- - -First you can (for all USB devices) specify the physical port where -the device will show up in the guest. This can be done using the -"port" property. UHCI has two root ports (1,2). EHCI has six root -ports (1-6), the emulated (1.1) USB hub has eight ports. - -Plugging a tablet into UHCI port 1 works like this: - - -device usb-tablet,bus=usb-bus.0,port=1 - -Plugging a hub into UHCI port 2 works like this: - - -device usb-hub,bus=usb-bus.0,port=2 - -Plugging a virtual USB stick into port 4 of the hub just plugged works -this way: - - -device usb-storage,bus=usb-bus.0,port=2.4,drive=... - -You can do basically the same in the monitor using the device_add -command. If you want to unplug devices too you should specify some -unique id which you can use to refer to the device ... - - (qemu) device_add usb-tablet,bus=usb-bus.0,port=1,id=my-tablet - (qemu) device_del my-tablet - -... when unplugging it with device_del. - - -USB pass through hints ----------------------- - -The usb-host driver has a bunch of properties to specify the device -which should be passed to the guest: - - hostbus= -- Specifies the bus number the device must be attached - to. - - hostaddr= -- Specifies the device address the device got - assigned by the guest os. - - hostport= -- Specifies the physical port the device is attached - to. - - vendorid= -- Specifies the vendor ID of the device. - productid= -- Specifies the product ID of the device. - -In theory you can combine all these properties as you like. In -practice only a few combinations are useful: - - (1) vendorid+productid -- match for a specific device, pass it to - the guest when it shows up somewhere in the host. - - (2) hostbus+hostport -- match for a specific physical port in the - host, any device which is plugged in there gets passed to the - guest. - - (3) hostbus+hostaddr -- most useful for ad-hoc pass through as the - hostaddr isn't stable, the next time you plug in the device it - gets a new one ... - -Note that USB 1.1 devices are handled by UHCI/OHCI and USB 2.0 by -EHCI. That means a device plugged into the very same physical port -may show up on different buses depending on the speed. The port I'm -using for testing is bus 1 + port 1 for 2.0 devices and bus 3 + port 1 -for 1.1 devices. Passing through any device plugged into that port -and also assign them to the correct bus can be done this way: - - qemu -M pc ${otheroptions} \ - -usb \ - -device usb-ehci,id=ehci \ - -device usb-host,bus=usb-bus.0,hostbus=3,hostport=1 \ - -device usb-host,bus=ehci.0,hostbus=1,hostport=1 - -enjoy, - Gerd - --- -Gerd Hoffmann diff --git a/docs/user/_templates/editpage.html b/docs/user/_templates/editpage.html deleted file mode 100644 index 1f5ee01e606..00000000000 --- a/docs/user/_templates/editpage.html +++ /dev/null @@ -1,5 +0,0 @@ -
- -
diff --git a/docs/user/index.rst b/docs/user/index.rst index a5b47459ec7..2c4e29f3dbc 100644 --- a/docs/user/index.rst +++ b/docs/user/index.rst @@ -1,15 +1,11 @@ -.. This is the top level page for the 'user' manual. - - +------------------- User Mode Emulation -=================== +------------------- -This manual is the overall guide for users using QEMU +This section of the manual is the overall guide for users using QEMU for user-mode emulation. In this mode, QEMU can launch processes compiled for one CPU on another CPU. -Contents: - .. toctree:: :maxdepth: 2 diff --git a/dtc b/dtc index 85e5d839847..b6910bec116 160000 --- a/dtc +++ b/dtc @@ -1 +1 @@ -Subproject commit 85e5d839847af54efab170f2b1331b2a6421e647 +Subproject commit b6910bec11614980a21e46fbccc35934b671bd81 diff --git a/dump/dump.c b/dump/dump.c index 929138e91d0..662d0a62cd9 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -15,7 +15,6 @@ #include "qemu-common.h" #include "qemu/cutils.h" #include "elf.h" -#include "cpu.h" #include "exec/hwaddr.h" #include "monitor/monitor.h" #include "sysemu/kvm.h" @@ -30,6 +29,7 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "hw/misc/vmcoreinfo.h" +#include "migration/blocker.h" #ifdef TARGET_X86_64 #include "win_dump.h" @@ -48,6 +48,8 @@ #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */ +static Error *dump_migration_blocker; + #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \ ((DIV_ROUND_UP((hdr_size), 4) + \ DIV_ROUND_UP((name_size), 4) + \ @@ -102,6 +104,7 @@ static int dump_cleanup(DumpState *s) qemu_mutex_unlock_iothread(); } } + migrate_del_blocker(dump_migration_blocker); return 0; } @@ -2006,6 +2009,21 @@ void qmp_dump_guest_memory(bool paging, const char *file, return; } + if (!dump_migration_blocker) { + error_setg(&dump_migration_blocker, + "Live migration disabled: dump-guest-memory in progress"); + } + + /* + * Allows even for -only-migratable, but forbid migration during the + * process of dump guest memory. + */ + if (migrate_add_blocker_internal(dump_migration_blocker, errp)) { + /* Remember to release the fd before passing it over to dump state */ + close(fd); + return; + } + s = &dump_state_global; dump_state_prepare(s); diff --git a/dump/win_dump.c b/dump/win_dump.c index 652c7bad995..c5eb5a9aacd 100644 --- a/dump/win_dump.c +++ b/dump/win_dump.c @@ -12,7 +12,6 @@ #include "qemu-common.h" #include "qemu/cutils.h" #include "elf.h" -#include "cpu.h" #include "exec/hwaddr.h" #include "monitor/monitor.h" #include "sysemu/kvm.h" diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c new file mode 100644 index 00000000000..e71e229190d --- /dev/null +++ b/ebpf/ebpf_rss-stub.c @@ -0,0 +1,40 @@ +/* + * eBPF RSS stub file + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Yuri Benditovich + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "ebpf/ebpf_rss.h" + +void ebpf_rss_init(struct EBPFRSSContext *ctx) +{ + +} + +bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) +{ + return false; +} + +bool ebpf_rss_load(struct EBPFRSSContext *ctx) +{ + return false; +} + +bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, + uint16_t *indirections_table, uint8_t *toeplitz_key) +{ + return false; +} + +void ebpf_rss_unload(struct EBPFRSSContext *ctx) +{ + +} diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c new file mode 100644 index 00000000000..118c68da831 --- /dev/null +++ b/ebpf/ebpf_rss.c @@ -0,0 +1,165 @@ +/* + * eBPF RSS loader + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Andrew Melnychenko + * Yuri Benditovich + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" + +#include +#include + +#include "hw/virtio/virtio-net.h" /* VIRTIO_NET_RSS_MAX_TABLE_LEN */ + +#include "ebpf/ebpf_rss.h" +#include "ebpf/rss.bpf.skeleton.h" +#include "trace.h" + +void ebpf_rss_init(struct EBPFRSSContext *ctx) +{ + if (ctx != NULL) { + ctx->obj = NULL; + } +} + +bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) +{ + return ctx != NULL && ctx->obj != NULL; +} + +bool ebpf_rss_load(struct EBPFRSSContext *ctx) +{ + struct rss_bpf *rss_bpf_ctx; + + if (ctx == NULL) { + return false; + } + + rss_bpf_ctx = rss_bpf__open(); + if (rss_bpf_ctx == NULL) { + trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object"); + goto error; + } + + bpf_program__set_socket_filter(rss_bpf_ctx->progs.tun_rss_steering_prog); + + if (rss_bpf__load(rss_bpf_ctx)) { + trace_ebpf_error("eBPF RSS", "can not load RSS program"); + goto error; + } + + ctx->obj = rss_bpf_ctx; + ctx->program_fd = bpf_program__fd( + rss_bpf_ctx->progs.tun_rss_steering_prog); + ctx->map_configuration = bpf_map__fd( + rss_bpf_ctx->maps.tap_rss_map_configurations); + ctx->map_indirections_table = bpf_map__fd( + rss_bpf_ctx->maps.tap_rss_map_indirection_table); + ctx->map_toeplitz_key = bpf_map__fd( + rss_bpf_ctx->maps.tap_rss_map_toeplitz_key); + + return true; +error: + rss_bpf__destroy(rss_bpf_ctx); + ctx->obj = NULL; + + return false; +} + +static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx, + struct EBPFRSSConfig *config) +{ + uint32_t map_key = 0; + + if (!ebpf_rss_is_loaded(ctx)) { + return false; + } + if (bpf_map_update_elem(ctx->map_configuration, + &map_key, config, 0) < 0) { + return false; + } + return true; +} + +static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, + uint16_t *indirections_table, + size_t len) +{ + uint32_t i = 0; + + if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL || + len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { + return false; + } + + for (; i < len; ++i) { + if (bpf_map_update_elem(ctx->map_indirections_table, &i, + indirections_table + i, 0) < 0) { + return false; + } + } + return true; +} + +static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx, + uint8_t *toeplitz_key) +{ + uint32_t map_key = 0; + + /* prepare toeplitz key */ + uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {}; + + if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) { + return false; + } + memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE); + *(uint32_t *)toe = ntohl(*(uint32_t *)toe); + + if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe, + 0) < 0) { + return false; + } + return true; +} + +bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, + uint16_t *indirections_table, uint8_t *toeplitz_key) +{ + if (!ebpf_rss_is_loaded(ctx) || config == NULL || + indirections_table == NULL || toeplitz_key == NULL) { + return false; + } + + if (!ebpf_rss_set_config(ctx, config)) { + return false; + } + + if (!ebpf_rss_set_indirections_table(ctx, indirections_table, + config->indirections_len)) { + return false; + } + + if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) { + return false; + } + + return true; +} + +void ebpf_rss_unload(struct EBPFRSSContext *ctx) +{ + if (!ebpf_rss_is_loaded(ctx)) { + return; + } + + rss_bpf__destroy(ctx->obj); + ctx->obj = NULL; +} diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h new file mode 100644 index 00000000000..bf3f2572c7c --- /dev/null +++ b/ebpf/ebpf_rss.h @@ -0,0 +1,44 @@ +/* + * eBPF RSS header + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Andrew Melnychenko + * Yuri Benditovich + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef QEMU_EBPF_RSS_H +#define QEMU_EBPF_RSS_H + +struct EBPFRSSContext { + void *obj; + int program_fd; + int map_configuration; + int map_toeplitz_key; + int map_indirections_table; +}; + +struct EBPFRSSConfig { + uint8_t redirect; + uint8_t populate_hash; + uint32_t hash_types; + uint16_t indirections_len; + uint16_t default_queue; +} __attribute__((packed)); + +void ebpf_rss_init(struct EBPFRSSContext *ctx); + +bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx); + +bool ebpf_rss_load(struct EBPFRSSContext *ctx); + +bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, + uint16_t *indirections_table, uint8_t *toeplitz_key); + +void ebpf_rss_unload(struct EBPFRSSContext *ctx); + +#endif /* QEMU_EBPF_RSS_H */ diff --git a/ebpf/meson.build b/ebpf/meson.build new file mode 100644 index 00000000000..2dd0fd89480 --- /dev/null +++ b/ebpf/meson.build @@ -0,0 +1 @@ +softmmu_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c')) diff --git a/ebpf/rss.bpf.skeleton.h b/ebpf/rss.bpf.skeleton.h new file mode 100644 index 00000000000..126683eb878 --- /dev/null +++ b/ebpf/rss.bpf.skeleton.h @@ -0,0 +1,431 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* THIS FILE IS AUTOGENERATED! */ +#ifndef __RSS_BPF_SKEL_H__ +#define __RSS_BPF_SKEL_H__ + +#include +#include + +struct rss_bpf { + struct bpf_object_skeleton *skeleton; + struct bpf_object *obj; + struct { + struct bpf_map *tap_rss_map_configurations; + struct bpf_map *tap_rss_map_indirection_table; + struct bpf_map *tap_rss_map_toeplitz_key; + } maps; + struct { + struct bpf_program *tun_rss_steering_prog; + } progs; + struct { + struct bpf_link *tun_rss_steering_prog; + } links; +}; + +static void +rss_bpf__destroy(struct rss_bpf *obj) +{ + if (!obj) + return; + if (obj->skeleton) + bpf_object__destroy_skeleton(obj->skeleton); + free(obj); +} + +static inline int +rss_bpf__create_skeleton(struct rss_bpf *obj); + +static inline struct rss_bpf * +rss_bpf__open_opts(const struct bpf_object_open_opts *opts) +{ + struct rss_bpf *obj; + + obj = (struct rss_bpf *)calloc(1, sizeof(*obj)); + if (!obj) + return NULL; + if (rss_bpf__create_skeleton(obj)) + goto err; + if (bpf_object__open_skeleton(obj->skeleton, opts)) + goto err; + + return obj; +err: + rss_bpf__destroy(obj); + return NULL; +} + +static inline struct rss_bpf * +rss_bpf__open(void) +{ + return rss_bpf__open_opts(NULL); +} + +static inline int +rss_bpf__load(struct rss_bpf *obj) +{ + return bpf_object__load_skeleton(obj->skeleton); +} + +static inline struct rss_bpf * +rss_bpf__open_and_load(void) +{ + struct rss_bpf *obj; + + obj = rss_bpf__open(); + if (!obj) + return NULL; + if (rss_bpf__load(obj)) { + rss_bpf__destroy(obj); + return NULL; + } + return obj; +} + +static inline int +rss_bpf__attach(struct rss_bpf *obj) +{ + return bpf_object__attach_skeleton(obj->skeleton); +} + +static inline void +rss_bpf__detach(struct rss_bpf *obj) +{ + return bpf_object__detach_skeleton(obj->skeleton); +} + +static inline int +rss_bpf__create_skeleton(struct rss_bpf *obj) +{ + struct bpf_object_skeleton *s; + + s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s)); + if (!s) + return -1; + obj->skeleton = s; + + s->sz = sizeof(*s); + s->name = "rss_bpf"; + s->obj = &obj->obj; + + /* maps */ + s->map_cnt = 3; + s->map_skel_sz = sizeof(*s->maps); + s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz); + if (!s->maps) + goto err; + + s->maps[0].name = "tap_rss_map_configurations"; + s->maps[0].map = &obj->maps.tap_rss_map_configurations; + + s->maps[1].name = "tap_rss_map_indirection_table"; + s->maps[1].map = &obj->maps.tap_rss_map_indirection_table; + + s->maps[2].name = "tap_rss_map_toeplitz_key"; + s->maps[2].map = &obj->maps.tap_rss_map_toeplitz_key; + + /* programs */ + s->prog_cnt = 1; + s->prog_skel_sz = sizeof(*s->progs); + s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz); + if (!s->progs) + goto err; + + s->progs[0].name = "tun_rss_steering_prog"; + s->progs[0].prog = &obj->progs.tun_rss_steering_prog; + s->progs[0].link = &obj->links.tun_rss_steering_prog; + + s->data_sz = 8088; + s->data = (void *)"\ +\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\x18\x1d\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0a\0\ +\x01\0\xbf\x18\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x4c\xff\0\0\0\0\xbf\xa7\ +\0\0\0\0\0\0\x07\x07\0\0\x4c\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x06\0\0\0\0\0\0\x18\x01\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x07\0\0\0\0\0\0\ +\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x66\x02\0\0\0\0\xbf\x79\0\0\ +\0\0\0\0\x15\x09\x64\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\ +\0\x5d\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc0\xff\0\0\0\0\x7b\x1a\xb8\xff\ +\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\ +\0\x63\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\x1a\x88\xff\0\0\0\0\x7b\ +\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\x70\xff\0\0\0\0\x7b\x1a\ +\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\xff\0\0\0\0\x7b\x1a\x50\ +\xff\0\0\0\0\x15\x08\x4c\x02\0\0\0\0\x6b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\ +\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\ +\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\ +\x77\0\0\0\x20\0\0\0\x55\0\x11\0\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xd0\xff\ +\0\0\0\0\xbf\x13\0\0\0\0\0\0\xdc\x03\0\0\x10\0\0\0\x15\x03\x02\0\0\x81\0\0\x55\ +\x03\x0c\0\xa8\x88\0\0\xb7\x02\0\0\x14\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\ +\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\ +\x85\0\0\0\x44\0\0\0\x69\xa1\xd0\xff\0\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\ +\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x2f\x02\0\0\0\0\x15\x01\x2e\x02\0\0\0\0\x7b\ +\x9a\x30\xff\0\0\0\0\x15\x01\x57\0\x86\xdd\0\0\x55\x01\x3b\0\x08\0\0\0\x7b\x7a\ +\x20\xff\0\0\0\0\xb7\x07\0\0\x01\0\0\0\x73\x7a\x50\xff\0\0\0\0\xb7\x01\0\0\0\0\ +\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\ +\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\ +\0\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\ +\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x1a\x02\0\0\0\0\x69\xa1\xd6\xff\0\0\ +\0\0\x55\x01\x01\0\0\0\0\0\xb7\x07\0\0\0\0\0\0\x61\xa1\xdc\xff\0\0\0\0\x63\x1a\ +\x5c\xff\0\0\0\0\x61\xa1\xe0\xff\0\0\0\0\x63\x1a\x60\xff\0\0\0\0\x73\x7a\x56\ +\xff\0\0\0\0\x71\xa9\xd9\xff\0\0\0\0\x71\xa1\xd0\xff\0\0\0\0\x67\x01\0\0\x02\0\ +\0\0\x57\x01\0\0\x3c\0\0\0\x7b\x1a\x40\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\xbf\ +\x91\0\0\0\0\0\0\x57\x01\0\0\xff\0\0\0\x15\x01\x19\0\0\0\0\0\x71\xa1\x56\xff\0\ +\0\0\0\x55\x01\x17\0\0\0\0\0\x57\x09\0\0\xff\0\0\0\x15\x09\x7a\x01\x11\0\0\0\ +\x55\x09\x14\0\x06\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x53\xff\0\0\0\0\xb7\x01\ +\0\0\0\0\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\ +\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\ +\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\ +\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\xf4\x01\0\0\0\0\x69\xa1\ +\xd0\xff\0\0\0\0\x6b\x1a\x58\xff\0\0\0\0\x69\xa1\xd2\xff\0\0\0\0\x6b\x1a\x5a\ +\xff\0\0\0\0\x71\xa1\x50\xff\0\0\0\0\x15\x01\xd4\0\0\0\0\0\x71\x62\x03\0\0\0\0\ +\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\x12\0\0\0\0\0\0\x71\x63\x04\ +\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\0\0\x4f\x31\0\0\0\0\0\0\x67\ +\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\xff\0\0\0\0\x79\xa0\x30\xff\ +\0\0\0\0\x15\x02\x06\x01\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x02\0\0\0\x15\ +\x02\x03\x01\0\0\0\0\x61\xa1\x5c\xff\0\0\0\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\ +\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x69\xa1\x58\xff\0\0\0\0\x6b\x1a\xa8\ +\xff\0\0\0\0\x69\xa1\x5a\xff\0\0\0\0\x6b\x1a\xaa\xff\0\0\0\0\x05\0\x65\x01\0\0\ +\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x51\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\ +\xf0\xff\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\ +\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\ +\xff\xff\xb7\x01\0\0\x28\0\0\0\x7b\x1a\x40\xff\0\0\0\0\xbf\x81\0\0\0\0\0\0\xb7\ +\x02\0\0\0\0\0\0\xb7\x04\0\0\x28\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\ +\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x10\x01\0\0\0\0\x79\xa1\xe0\ +\xff\0\0\0\0\x63\x1a\x64\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x68\xff\0\0\ +\0\0\x79\xa1\xd8\xff\0\0\0\0\x63\x1a\x5c\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\ +\x1a\x60\xff\0\0\0\0\x79\xa1\xe8\xff\0\0\0\0\x63\x1a\x6c\xff\0\0\0\0\x77\x01\0\ +\0\x20\0\0\0\x63\x1a\x70\xff\0\0\0\0\x79\xa1\xf0\xff\0\0\0\0\x63\x1a\x74\xff\0\ +\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x78\xff\0\0\0\0\x71\xa9\xd6\xff\0\0\0\0\ +\x25\x09\xff\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\0\0\x18\x02\0\0\ +\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\0\ +\xf8\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x6b\x1a\xfe\xff\0\0\0\0\xb7\x01\0\0\x28\0\0\ +\0\x7b\x1a\x40\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x8c\xff\xff\xff\x7b\ +\x1a\x18\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x7c\xff\xff\xff\x7b\x1a\ +\x10\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\x28\xff\0\0\0\0\x7b\x7a\x20\xff\0\ +\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xfe\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\ +\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\ +\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x90\ +\x01\0\0\0\0\xbf\x91\0\0\0\0\0\0\x15\x01\x23\0\x3c\0\0\0\x15\x01\x59\0\x2c\0\0\ +\0\x55\x01\x5a\0\x2b\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xf8\xff\0\0\0\0\xbf\xa3\ +\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\ +\0\0\0\xb7\x04\0\0\x04\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\ +\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x03\x01\0\0\0\ +\0\x71\xa1\xfa\xff\0\0\0\0\x55\x01\x4b\0\x02\0\0\0\x71\xa1\xf9\xff\0\0\0\0\x55\ +\x01\x49\0\x02\0\0\0\x71\xa1\xfb\xff\0\0\0\0\x55\x01\x47\0\x01\0\0\0\x79\xa2\ +\x40\xff\0\0\0\0\x07\x02\0\0\x08\0\0\0\xbf\x81\0\0\0\0\0\0\x79\xa3\x18\xff\0\0\ +\0\0\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\ +\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\xf2\0\0\0\0\0\ +\xb7\x01\0\0\x01\0\0\0\x73\x1a\x55\xff\0\0\0\0\x05\0\x39\0\0\0\0\0\xb7\x01\0\0\ +\0\0\0\0\x6b\x1a\xf8\xff\0\0\0\0\xb7\x09\0\0\x02\0\0\0\xb7\x07\0\0\x1e\0\0\0\ +\x05\0\x0e\0\0\0\0\0\x79\xa2\x38\xff\0\0\0\0\x0f\x29\0\0\0\0\0\0\xbf\x92\0\0\0\ +\0\0\0\x07\x02\0\0\x01\0\0\0\x71\xa3\xff\xff\0\0\0\0\x67\x03\0\0\x03\0\0\0\x2d\ +\x23\x02\0\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x2b\0\0\0\0\0\x07\x07\0\0\xff\ +\xff\xff\xff\xbf\x72\0\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x77\x02\0\0\x20\0\0\0\ +\x15\x02\xf9\xff\0\0\0\0\x7b\x9a\x38\xff\0\0\0\0\x79\xa1\x40\xff\0\0\0\0\x0f\ +\x19\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\ +\0\0\0\xbf\x92\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\ +\0\x44\0\0\0\xbf\x01\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\ +\x55\x01\x94\0\0\0\0\0\x71\xa2\xf8\xff\0\0\0\0\x55\x02\x0f\0\xc9\0\0\0\x07\x09\ +\0\0\x02\0\0\0\xbf\x81\0\0\0\0\0\0\xbf\x92\0\0\0\0\0\0\x79\xa3\x10\xff\0\0\0\0\ +\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\0\0\ +\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x87\0\0\0\0\0\xb7\ +\x01\0\0\x01\0\0\0\x73\x1a\x54\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x07\0\ +\0\0\0\0\xb7\x09\0\0\x01\0\0\0\x15\x02\xd1\xff\0\0\0\0\x71\xa9\xf9\xff\0\0\0\0\ +\x07\x09\0\0\x02\0\0\0\x05\0\xce\xff\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x56\ +\xff\0\0\0\0\x71\xa1\xff\xff\0\0\0\0\x67\x01\0\0\x03\0\0\0\x79\xa2\x40\xff\0\0\ +\0\0\x0f\x12\0\0\0\0\0\0\x07\x02\0\0\x08\0\0\0\x7b\x2a\x40\xff\0\0\0\0\x71\xa9\ +\xfe\xff\0\0\0\0\x25\x09\x0e\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\ +\0\0\x18\x02\0\0\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\ +\0\0\0\0\0\x05\0\x07\0\0\0\0\0\x79\xa1\x28\xff\0\0\0\0\x07\x01\0\0\x01\0\0\0\ +\x7b\x1a\x28\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\ +\x82\xff\x0b\0\0\0\x05\0\x10\xff\0\0\0\0\x15\x09\xf8\xff\x87\0\0\0\x05\0\xfd\ +\xff\0\0\0\0\x71\xa1\x51\xff\0\0\0\0\x79\xa0\x30\xff\0\0\0\0\x15\x01\x17\x01\0\ +\0\0\0\x71\x62\x03\0\0\0\0\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\ +\x12\0\0\0\0\0\0\x71\x63\x04\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\ +\0\0\x4f\x31\0\0\0\0\0\0\x67\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\ +\xff\0\0\0\0\x15\x02\x3d\0\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x10\0\0\0\ +\x15\x02\x3a\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\ +\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\ +\x07\x03\0\0\x7c\xff\xff\xff\x67\x01\0\0\x38\0\0\0\xc7\x01\0\0\x38\0\0\0\x65\ +\x01\x01\0\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\ +\x6c\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\ +\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x65\x01\x01\0\xff\xff\xff\ +\xff\xbf\x43\0\0\0\0\0\0\x61\x21\x04\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\x24\0\ +\0\0\0\0\0\x4f\x41\0\0\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\0\x61\x21\x08\0\0\0\0\0\ +\x61\x22\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\xa8\ +\xff\0\0\0\0\x61\x31\0\0\0\0\0\0\x61\x32\x04\0\0\0\0\0\x61\x34\x08\0\0\0\0\0\ +\x61\x33\x0c\0\0\0\0\0\x69\xa5\x5a\xff\0\0\0\0\x6b\x5a\xc2\xff\0\0\0\0\x69\xa5\ +\x58\xff\0\0\0\0\x6b\x5a\xc0\xff\0\0\0\0\x67\x03\0\0\x20\0\0\0\x4f\x43\0\0\0\0\ +\0\0\x7b\x3a\xb8\xff\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\ +\xb0\xff\0\0\0\0\x05\0\x6b\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x04\0\0\0\ +\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x04\0\0\0\x15\x02\x01\0\0\0\0\0\x05\0\xf7\ +\xfe\0\0\0\0\x57\x01\0\0\x01\0\0\0\x15\x01\xd3\0\0\0\0\0\x61\xa1\x5c\xff\0\0\0\ +\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x05\ +\0\x5e\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x1e\0\0\0\0\0\xbf\x12\0\0\0\0\ +\0\0\x57\x02\0\0\x20\0\0\0\x15\x02\x1b\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\ +\0\x5c\xff\xff\xff\x71\xa4\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\ +\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\0\x01\0\0\ +\x15\x01\x01\0\0\0\0\0\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x6c\ +\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\0\0\ +\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x15\x01\xc3\xff\0\0\0\0\x05\0\ +\xc1\xff\0\0\0\0\xb7\x09\0\0\x3c\0\0\0\x79\xa7\x20\xff\0\0\0\0\x67\0\0\0\x20\0\ +\0\0\x77\0\0\0\x20\0\0\0\x15\0\xa5\xfe\0\0\0\0\x05\0\xb0\0\0\0\0\0\x15\x09\x07\ +\xff\x87\0\0\0\x05\0\xa2\xfe\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x08\0\0\0\ +\x15\x02\xab\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\ +\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\ +\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\x40\0\0\0\x15\x01\x01\0\0\0\0\0\xbf\ +\x32\0\0\0\0\0\0\x61\x23\x04\0\0\0\0\0\x67\x03\0\0\x20\0\0\0\x61\x24\0\0\0\0\0\ +\0\x4f\x43\0\0\0\0\0\0\x7b\x3a\xa0\xff\0\0\0\0\x61\x23\x08\0\0\0\0\0\x61\x22\ +\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x32\0\0\0\0\0\0\x7b\x2a\xa8\xff\0\0\0\ +\0\x15\x01\x1c\0\0\0\0\0\x71\xa1\x55\xff\0\0\0\0\x15\x01\x1a\0\0\0\0\0\x61\xa1\ +\x98\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x94\xff\0\0\0\0\x4f\x21\0\0\0\0\ +\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x90\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\ +\xa2\x8c\xff\0\0\0\0\x05\0\x19\0\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x52\xff\ +\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\ +\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\0\0\0\xb7\x04\0\ +\0\x08\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\ +\0\0\0\x20\0\0\0\x55\0\x7d\0\0\0\0\0\x05\0\x88\xfe\0\0\0\0\xb7\x09\0\0\x2b\0\0\ +\0\x05\0\xc6\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\ +\x74\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x70\xff\0\ +\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x6c\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\ +\x1a\xb0\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x07\x07\0\0\x04\0\0\0\x61\x03\0\0\0\0\ +\0\0\xb7\x05\0\0\0\0\0\0\x05\0\x4e\0\0\0\0\0\xaf\x52\0\0\0\0\0\0\xbf\x75\0\0\0\ +\0\0\0\x0f\x15\0\0\0\0\0\0\x71\x55\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\ +\0\0\0\0\0\x77\0\0\0\x07\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\ +\0\x39\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\ +\x50\0\0\0\0\0\0\x77\0\0\0\x06\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\ +\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3a\0\0\0\xc7\0\0\0\x3f\0\0\ +\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\ +\0\0\0\x77\0\0\0\x05\0\0\0\x57\0\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\ +\0\0\0\0\x67\0\0\0\x3b\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\ +\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x04\0\0\0\x57\0\ +\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3c\0\0\0\xc7\ +\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\ +\x77\0\0\0\x03\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\ +\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3d\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\ +\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x02\0\0\0\x57\0\0\0\ +\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\ +\0\0\x3e\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\ +\x50\0\0\0\0\0\0\x77\0\0\0\x01\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\ +\x4f\x03\0\0\0\0\0\0\x57\x04\0\0\x01\0\0\0\x87\x04\0\0\0\0\0\0\x5f\x34\0\0\0\0\ +\0\0\xaf\x42\0\0\0\0\0\0\x57\x05\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x53\0\ +\0\0\0\0\0\x07\x01\0\0\x01\0\0\0\xbf\x25\0\0\0\0\0\0\x15\x01\x0b\0\x24\0\0\0\ +\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\xa0\xff\xff\xff\x0f\x12\0\0\0\0\0\0\x71\x24\0\ +\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x38\0\0\0\xc7\0\0\0\x38\0\0\0\xb7\x02\ +\0\0\0\0\0\0\x65\0\xa9\xff\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\x05\0\xa7\xff\0\ +\0\0\0\xbf\x21\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x15\x01\ +\x0e\0\0\0\0\0\x71\x63\x06\0\0\0\0\0\x71\x64\x07\0\0\0\0\0\x67\x04\0\0\x08\0\0\ +\0\x4f\x34\0\0\0\0\0\0\x3f\x41\0\0\0\0\0\0\x2f\x41\0\0\0\0\0\0\x1f\x12\0\0\0\0\ +\0\0\x63\x2a\x50\xff\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x50\xff\xff\xff\ +\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\x55\0\x05\0\0\0\0\0\ +\x71\x61\x08\0\0\0\0\0\x71\x60\x09\0\0\0\0\0\x67\0\0\0\x08\0\0\0\x4f\x10\0\0\0\ +\0\0\0\x95\0\0\0\0\0\0\0\x69\0\0\0\0\0\0\0\x05\0\xfd\xff\0\0\0\0\x02\0\0\0\x04\ +\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x02\0\0\0\x04\0\0\0\x28\0\0\0\x01\0\0\0\0\0\ +\0\0\x02\0\0\0\x04\0\0\0\x02\0\0\0\x80\0\0\0\0\0\0\0\x47\x50\x4c\x20\x76\x32\0\ +\0\0\0\0\0\x10\0\0\0\0\0\0\0\x01\x7a\x52\0\x08\x7c\x0b\x01\x0c\0\0\0\x18\0\0\0\ +\x18\0\0\0\0\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\xa0\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\x60\x02\0\0\0\0\x03\0\x20\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3f\x02\0\0\0\0\ +\x03\0\xd0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xed\x01\0\0\0\0\x03\0\x10\x10\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\xd4\x01\0\0\0\0\x03\0\x20\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\xa3\x01\0\0\0\0\x03\0\xb8\x12\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x01\0\0\0\0\ +\x03\0\x48\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2a\x01\0\0\0\0\x03\0\x10\x13\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\xe1\0\0\0\0\0\x03\0\xa0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\x2e\x02\0\0\0\0\x03\0\x28\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x68\x02\0\0\0\0\x03\ +\0\xc0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x36\x02\0\0\0\0\x03\0\xc8\x13\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\x22\x01\0\0\0\0\x03\0\xe8\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\x02\x01\0\0\0\0\x03\0\x40\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd9\0\0\0\0\0\x03\0\ +\xf8\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x26\x02\0\0\0\0\x03\0\x20\x0e\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\xcc\x01\0\0\0\0\x03\0\x60\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9b\ +\x01\0\0\0\0\x03\0\xc8\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5b\x01\0\0\0\0\x03\0\ +\x20\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x7c\x01\0\0\0\0\x03\0\x48\x08\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\x53\x01\0\0\0\0\x03\0\xb8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\ +\x01\0\0\0\0\x03\0\xe0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x84\x01\0\0\0\0\x03\0\ +\xb8\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1e\x02\0\0\0\0\x03\0\xd8\x09\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\xc4\x01\0\0\0\0\x03\0\x70\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x93\ +\x01\0\0\0\0\x03\0\xa8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\x01\0\0\0\0\x03\0\ +\xf0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4b\x01\0\0\0\0\x03\0\0\x0a\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\x12\x01\0\0\0\0\x03\0\x10\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfa\0\ +\0\0\0\0\x03\0\xc0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x58\x02\0\0\0\0\x03\0\x88\ +\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x16\x02\0\0\0\0\x03\0\xb8\x0a\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\xe5\x01\0\0\0\0\x03\0\xc0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbc\x01\ +\0\0\0\0\x03\0\0\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x8b\x01\0\0\0\0\x03\0\x18\x0e\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd1\0\0\0\0\0\x03\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\x50\x02\0\0\0\0\x03\0\x20\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0e\x02\0\0\0\0\ +\x03\0\x48\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6c\x01\0\0\0\0\x03\0\xb0\x04\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\x43\x01\0\0\0\0\x03\0\xc8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\xc9\0\0\0\0\0\x03\0\xf8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\x02\0\0\0\0\x03\ +\0\xd0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3b\x01\0\0\0\0\x03\0\x98\x0b\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\xf2\0\0\0\0\0\x03\0\xb8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\ +\x02\0\0\0\0\x03\0\xf0\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfe\x01\0\0\0\0\x03\0\ +\xf8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xdd\x01\0\0\0\0\x03\0\0\x0c\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\xb4\x01\0\0\0\0\x03\0\x30\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0a\ +\x01\0\0\0\0\x03\0\x90\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc1\0\0\0\0\0\x03\0\xa8\ +\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xba\0\0\0\0\0\x03\0\xd0\x01\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\xf6\x01\0\0\0\0\x03\0\xe0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xac\x01\0\ +\0\0\0\x03\0\x30\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x33\x01\0\0\0\0\x03\0\x80\x0e\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xea\0\0\0\0\0\x03\0\x98\x0e\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6b\0\0\0\x11\0\x06\ +\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x25\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\x14\ +\0\0\0\0\0\0\0\x82\0\0\0\x11\0\x05\0\x28\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x01\0\ +\0\0\x11\0\x05\0\x14\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x40\0\0\0\x12\0\x03\0\0\0\ +\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\x28\0\0\0\0\0\0\0\x01\0\0\0\x3a\0\0\0\x50\0\0\ +\0\0\0\0\0\x01\0\0\0\x3c\0\0\0\x80\x13\0\0\0\0\0\0\x01\0\0\0\x3b\0\0\0\x1c\0\0\ +\0\0\0\0\0\x01\0\0\0\x38\0\0\0\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\ +\x5f\x74\x6f\x65\x70\x6c\x69\x74\x7a\x5f\x6b\x65\x79\0\x2e\x74\x65\x78\x74\0\ +\x6d\x61\x70\x73\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x63\x6f\x6e\ +\x66\x69\x67\x75\x72\x61\x74\x69\x6f\x6e\x73\0\x74\x75\x6e\x5f\x72\x73\x73\x5f\ +\x73\x74\x65\x65\x72\x69\x6e\x67\x5f\x70\x72\x6f\x67\0\x2e\x72\x65\x6c\x74\x75\ +\x6e\x5f\x72\x73\x73\x5f\x73\x74\x65\x65\x72\x69\x6e\x67\0\x5f\x6c\x69\x63\x65\ +\x6e\x73\x65\0\x2e\x72\x65\x6c\x2e\x65\x68\x5f\x66\x72\x61\x6d\x65\0\x74\x61\ +\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x69\x6e\x64\x69\x72\x65\x63\x74\x69\ +\x6f\x6e\x5f\x74\x61\x62\x6c\x65\0\x72\x73\x73\x2e\x62\x70\x66\x2e\x63\0\x2e\ +\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x4c\x42\x42\x30\x5f\ +\x39\0\x4c\x42\x42\x30\x5f\x38\x39\0\x4c\x42\x42\x30\x5f\x36\x39\0\x4c\x42\x42\ +\x30\x5f\x35\x39\0\x4c\x42\x42\x30\x5f\x31\x39\0\x4c\x42\x42\x30\x5f\x31\x30\ +\x39\0\x4c\x42\x42\x30\x5f\x39\x38\0\x4c\x42\x42\x30\x5f\x37\x38\0\x4c\x42\x42\ +\x30\x5f\x34\x38\0\x4c\x42\x42\x30\x5f\x31\x38\0\x4c\x42\x42\x30\x5f\x38\x37\0\ +\x4c\x42\x42\x30\x5f\x34\x37\0\x4c\x42\x42\x30\x5f\x33\x37\0\x4c\x42\x42\x30\ +\x5f\x31\x37\0\x4c\x42\x42\x30\x5f\x31\x30\x37\0\x4c\x42\x42\x30\x5f\x39\x36\0\ +\x4c\x42\x42\x30\x5f\x37\x36\0\x4c\x42\x42\x30\x5f\x36\x36\0\x4c\x42\x42\x30\ +\x5f\x34\x36\0\x4c\x42\x42\x30\x5f\x33\x36\0\x4c\x42\x42\x30\x5f\x32\x36\0\x4c\ +\x42\x42\x30\x5f\x31\x30\x36\0\x4c\x42\x42\x30\x5f\x36\x35\0\x4c\x42\x42\x30\ +\x5f\x34\x35\0\x4c\x42\x42\x30\x5f\x33\x35\0\x4c\x42\x42\x30\x5f\x34\0\x4c\x42\ +\x42\x30\x5f\x35\x34\0\x4c\x42\x42\x30\x5f\x34\x34\0\x4c\x42\x42\x30\x5f\x32\ +\x34\0\x4c\x42\x42\x30\x5f\x31\x30\x34\0\x4c\x42\x42\x30\x5f\x39\x33\0\x4c\x42\ +\x42\x30\x5f\x38\x33\0\x4c\x42\x42\x30\x5f\x35\x33\0\x4c\x42\x42\x30\x5f\x34\ +\x33\0\x4c\x42\x42\x30\x5f\x32\x33\0\x4c\x42\x42\x30\x5f\x31\x30\x33\0\x4c\x42\ +\x42\x30\x5f\x38\x32\0\x4c\x42\x42\x30\x5f\x35\x32\0\x4c\x42\x42\x30\x5f\x31\ +\x30\x32\0\x4c\x42\x42\x30\x5f\x39\x31\0\x4c\x42\x42\x30\x5f\x38\x31\0\x4c\x42\ +\x42\x30\x5f\x37\x31\0\x4c\x42\x42\x30\x5f\x36\x31\0\x4c\x42\x42\x30\x5f\x35\ +\x31\0\x4c\x42\x42\x30\x5f\x34\x31\0\x4c\x42\x42\x30\x5f\x32\x31\0\x4c\x42\x42\ +\x30\x5f\x31\x31\0\x4c\x42\x42\x30\x5f\x31\x31\x31\0\x4c\x42\x42\x30\x5f\x31\ +\x30\x31\0\x4c\x42\x42\x30\x5f\x38\x30\0\x4c\x42\x42\x30\x5f\x36\x30\0\x4c\x42\ +\x42\x30\x5f\x35\x30\0\x4c\x42\x42\x30\x5f\x31\x30\0\x4c\x42\x42\x30\x5f\x31\ +\x31\x30\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\ +\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa0\x1a\0\0\0\0\0\0\x71\x02\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\0\0\0\x01\0\0\ +\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5a\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x56\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\x60\x1a\0\0\0\0\0\0\x30\0\0\0\0\0\0\0\x09\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\ +\x10\0\0\0\0\0\0\0\x20\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\ +\x14\0\0\0\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\x6c\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x54\x14\0\0\0\0\0\ +\0\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x78\0\0\ +\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x60\x14\0\0\0\0\0\0\x30\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\0\0\0\x09\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x90\x1a\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x09\0\0\0\ +\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xb2\0\0\0\x02\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\x90\x14\0\0\0\0\0\0\xd0\x05\0\0\0\0\0\0\x01\0\0\0\x39\0\0\ +\0\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\0\0"; + + return 0; +err: + bpf_object__destroy_skeleton(s); + return -1; +} + +#endif /* __RSS_BPF_SKEL_H__ */ diff --git a/ebpf/trace-events b/ebpf/trace-events new file mode 100644 index 00000000000..411b1e2be39 --- /dev/null +++ b/ebpf/trace-events @@ -0,0 +1,4 @@ +# See docs/devel/tracing.txt for syntax documentation. + +# ebpf-rss.c +ebpf_error(const char *s1, const char *s2) "error in %s: %s" diff --git a/ebpf/trace.h b/ebpf/trace.h new file mode 100644 index 00000000000..abefc46ab10 --- /dev/null +++ b/ebpf/trace.h @@ -0,0 +1 @@ +#include "trace/trace-ebpf.h" diff --git a/fpu/meson.build b/fpu/meson.build new file mode 100644 index 00000000000..1a9992ded56 --- /dev/null +++ b/fpu/meson.build @@ -0,0 +1 @@ +specific_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c')) diff --git a/fpu/softfloat-parts-addsub.c.inc b/fpu/softfloat-parts-addsub.c.inc new file mode 100644 index 00000000000..ae5c1017c5b --- /dev/null +++ b/fpu/softfloat-parts-addsub.c.inc @@ -0,0 +1,62 @@ +/* + * Floating point arithmetic implementation + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +static void partsN(add_normal)(FloatPartsN *a, FloatPartsN *b) +{ + int exp_diff = a->exp - b->exp; + + if (exp_diff > 0) { + frac_shrjam(b, exp_diff); + } else if (exp_diff < 0) { + frac_shrjam(a, -exp_diff); + a->exp = b->exp; + } + + if (frac_add(a, a, b)) { + frac_shrjam(a, 1); + a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; + a->exp += 1; + } +} + +static bool partsN(sub_normal)(FloatPartsN *a, FloatPartsN *b) +{ + int exp_diff = a->exp - b->exp; + int shift; + + if (exp_diff > 0) { + frac_shrjam(b, exp_diff); + frac_sub(a, a, b); + } else if (exp_diff < 0) { + a->exp = b->exp; + a->sign ^= 1; + frac_shrjam(a, -exp_diff); + frac_sub(a, b, a); + } else if (frac_sub(a, a, b)) { + /* Overflow means that A was less than B. */ + frac_neg(a); + a->sign ^= 1; + } + + shift = frac_normalize(a); + if (likely(shift < N)) { + a->exp -= shift; + return true; + } + a->cls = float_class_zero; + return false; +} diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc new file mode 100644 index 00000000000..41d4b17e419 --- /dev/null +++ b/fpu/softfloat-parts.c.inc @@ -0,0 +1,1513 @@ +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +static void partsN(return_nan)(FloatPartsN *a, float_status *s) +{ + switch (a->cls) { + case float_class_snan: + float_raise(float_flag_invalid, s); + if (s->default_nan_mode) { + parts_default_nan(a, s); + } else { + parts_silence_nan(a, s); + } + break; + case float_class_qnan: + if (s->default_nan_mode) { + parts_default_nan(a, s); + } + break; + default: + g_assert_not_reached(); + } +} + +static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, + float_status *s) +{ + if (is_snan(a->cls) || is_snan(b->cls)) { + float_raise(float_flag_invalid, s); + } + + if (s->default_nan_mode) { + parts_default_nan(a, s); + } else { + int cmp = frac_cmp(a, b); + if (cmp == 0) { + cmp = a->sign < b->sign; + } + + if (pickNaN(a->cls, b->cls, cmp > 0, s)) { + a = b; + } + if (is_snan(a->cls)) { + parts_silence_nan(a, s); + } + } + return a; +} + +static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, + FloatPartsN *c, float_status *s, + int ab_mask, int abc_mask) +{ + int which; + + if (unlikely(abc_mask & float_cmask_snan)) { + float_raise(float_flag_invalid, s); + } + + which = pickNaNMulAdd(a->cls, b->cls, c->cls, + ab_mask == float_cmask_infzero, s); + + if (s->default_nan_mode || which == 3) { + /* + * Note that this check is after pickNaNMulAdd so that function + * has an opportunity to set the Invalid flag for infzero. + */ + parts_default_nan(a, s); + return a; + } + + switch (which) { + case 0: + break; + case 1: + a = b; + break; + case 2: + a = c; + break; + default: + g_assert_not_reached(); + } + if (is_snan(a->cls)) { + parts_silence_nan(a, s); + } + return a; +} + +/* + * Canonicalize the FloatParts structure. Determine the class, + * unbias the exponent, and normalize the fraction. + */ +static void partsN(canonicalize)(FloatPartsN *p, float_status *status, + const FloatFmt *fmt) +{ + if (unlikely(p->exp == 0)) { + if (likely(frac_eqz(p))) { + p->cls = float_class_zero; + } else if (status->flush_inputs_to_zero) { + float_raise(float_flag_input_denormal, status); + p->cls = float_class_zero; + frac_clear(p); + } else { + int shift = frac_normalize(p); + p->cls = float_class_normal; + p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1; + } + } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { + p->cls = float_class_normal; + p->exp -= fmt->exp_bias; + frac_shl(p, fmt->frac_shift); + p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; + } else if (likely(frac_eqz(p))) { + p->cls = float_class_inf; + } else { + frac_shl(p, fmt->frac_shift); + p->cls = (parts_is_snan_frac(p->frac_hi, status) + ? float_class_snan : float_class_qnan); + } +} + +/* + * Round and uncanonicalize a floating-point number by parts. There + * are FRAC_SHIFT bits that may require rounding at the bottom of the + * fraction; these bits will be removed. The exponent will be biased + * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. + */ +static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, + const FloatFmt *fmt) +{ + const int exp_max = fmt->exp_max; + const int frac_shift = fmt->frac_shift; + const uint64_t round_mask = fmt->round_mask; + const uint64_t frac_lsb = round_mask + 1; + const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); + const uint64_t roundeven_mask = round_mask | frac_lsb; + uint64_t inc; + bool overflow_norm = false; + int exp, flags = 0; + + switch (s->float_rounding_mode) { + case float_round_nearest_even: + if (N > 64 && frac_lsb == 0) { + inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 + ? frac_lsbm1 : 0); + } else { + inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 + ? frac_lsbm1 : 0); + } + break; + case float_round_ties_away: + inc = frac_lsbm1; + break; + case float_round_to_zero: + overflow_norm = true; + inc = 0; + break; + case float_round_up: + inc = p->sign ? 0 : round_mask; + overflow_norm = p->sign; + break; + case float_round_down: + inc = p->sign ? round_mask : 0; + overflow_norm = !p->sign; + break; + case float_round_to_odd: + overflow_norm = true; + /* fall through */ + case float_round_to_odd_inf: + if (N > 64 && frac_lsb == 0) { + inc = p->frac_hi & 1 ? 0 : round_mask; + } else { + inc = p->frac_lo & frac_lsb ? 0 : round_mask; + } + break; + default: + g_assert_not_reached(); + } + + exp = p->exp + fmt->exp_bias; + if (likely(exp > 0)) { + if (p->frac_lo & round_mask) { + flags |= float_flag_inexact; + if (frac_addi(p, p, inc)) { + frac_shr(p, 1); + p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; + exp++; + } + p->frac_lo &= ~round_mask; + } + + if (fmt->arm_althp) { + /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ + if (unlikely(exp > exp_max)) { + /* Overflow. Return the maximum normal. */ + flags = float_flag_invalid; + exp = exp_max; + frac_allones(p); + p->frac_lo &= ~round_mask; + } + } else if (unlikely(exp >= exp_max)) { + flags |= float_flag_overflow | float_flag_inexact; + if (overflow_norm) { + exp = exp_max - 1; + frac_allones(p); + p->frac_lo &= ~round_mask; + } else { + p->cls = float_class_inf; + exp = exp_max; + frac_clear(p); + } + } + frac_shr(p, frac_shift); + } else if (s->flush_to_zero) { + flags |= float_flag_output_denormal; + p->cls = float_class_zero; + exp = 0; + frac_clear(p); + } else { + bool is_tiny = s->tininess_before_rounding || exp < 0; + + if (!is_tiny) { + FloatPartsN discard; + is_tiny = !frac_addi(&discard, p, inc); + } + + frac_shrjam(p, 1 - exp); + + if (p->frac_lo & round_mask) { + /* Need to recompute round-to-even/round-to-odd. */ + switch (s->float_rounding_mode) { + case float_round_nearest_even: + if (N > 64 && frac_lsb == 0) { + inc = ((p->frac_hi & 1) || + (p->frac_lo & round_mask) != frac_lsbm1 + ? frac_lsbm1 : 0); + } else { + inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 + ? frac_lsbm1 : 0); + } + break; + case float_round_to_odd: + case float_round_to_odd_inf: + if (N > 64 && frac_lsb == 0) { + inc = p->frac_hi & 1 ? 0 : round_mask; + } else { + inc = p->frac_lo & frac_lsb ? 0 : round_mask; + } + break; + default: + break; + } + flags |= float_flag_inexact; + frac_addi(p, p, inc); + p->frac_lo &= ~round_mask; + } + + exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0; + frac_shr(p, frac_shift); + + if (is_tiny && (flags & float_flag_inexact)) { + flags |= float_flag_underflow; + } + if (exp == 0 && frac_eqz(p)) { + p->cls = float_class_zero; + } + } + p->exp = exp; + float_raise(flags, s); +} + +static void partsN(uncanon)(FloatPartsN *p, float_status *s, + const FloatFmt *fmt) +{ + if (likely(p->cls == float_class_normal)) { + parts_uncanon_normal(p, s, fmt); + } else { + switch (p->cls) { + case float_class_zero: + p->exp = 0; + frac_clear(p); + return; + case float_class_inf: + g_assert(!fmt->arm_althp); + p->exp = fmt->exp_max; + frac_clear(p); + return; + case float_class_qnan: + case float_class_snan: + g_assert(!fmt->arm_althp); + p->exp = fmt->exp_max; + frac_shr(p, fmt->frac_shift); + return; + default: + break; + } + g_assert_not_reached(); + } +} + +/* + * Returns the result of adding or subtracting the values of the + * floating-point values `a' and `b'. The operation is performed + * according to the IEC/IEEE Standard for Binary Floating-Point + * Arithmetic. + */ +static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, + float_status *s, bool subtract) +{ + bool b_sign = b->sign ^ subtract; + int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); + + if (a->sign != b_sign) { + /* Subtraction */ + if (likely(ab_mask == float_cmask_normal)) { + if (parts_sub_normal(a, b)) { + return a; + } + /* Subtract was exact, fall through to set sign. */ + ab_mask = float_cmask_zero; + } + + if (ab_mask == float_cmask_zero) { + a->sign = s->float_rounding_mode == float_round_down; + return a; + } + + if (unlikely(ab_mask & float_cmask_anynan)) { + goto p_nan; + } + + if (ab_mask & float_cmask_inf) { + if (a->cls != float_class_inf) { + /* N - Inf */ + goto return_b; + } + if (b->cls != float_class_inf) { + /* Inf - N */ + return a; + } + /* Inf - Inf */ + float_raise(float_flag_invalid, s); + parts_default_nan(a, s); + return a; + } + } else { + /* Addition */ + if (likely(ab_mask == float_cmask_normal)) { + parts_add_normal(a, b); + return a; + } + + if (ab_mask == float_cmask_zero) { + return a; + } + + if (unlikely(ab_mask & float_cmask_anynan)) { + goto p_nan; + } + + if (ab_mask & float_cmask_inf) { + a->cls = float_class_inf; + return a; + } + } + + if (b->cls == float_class_zero) { + g_assert(a->cls == float_class_normal); + return a; + } + + g_assert(a->cls == float_class_zero); + g_assert(b->cls == float_class_normal); + return_b: + b->sign = b_sign; + return b; + + p_nan: + return parts_pick_nan(a, b, s); +} + +/* + * Returns the result of multiplying the floating-point values `a' and + * `b'. The operation is performed according to the IEC/IEEE Standard + * for Binary Floating-Point Arithmetic. + */ +static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, + float_status *s) +{ + int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); + bool sign = a->sign ^ b->sign; + + if (likely(ab_mask == float_cmask_normal)) { + FloatPartsW tmp; + + frac_mulw(&tmp, a, b); + frac_truncjam(a, &tmp); + + a->exp += b->exp + 1; + if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { + frac_add(a, a, a); + a->exp -= 1; + } + + a->sign = sign; + return a; + } + + /* Inf * Zero == NaN */ + if (unlikely(ab_mask == float_cmask_infzero)) { + float_raise(float_flag_invalid, s); + parts_default_nan(a, s); + return a; + } + + if (unlikely(ab_mask & float_cmask_anynan)) { + return parts_pick_nan(a, b, s); + } + + /* Multiply by 0 or Inf */ + if (ab_mask & float_cmask_inf) { + a->cls = float_class_inf; + a->sign = sign; + return a; + } + + g_assert(ab_mask & float_cmask_zero); + a->cls = float_class_zero; + a->sign = sign; + return a; +} + +/* + * Returns the result of multiplying the floating-point values `a' and + * `b' then adding 'c', with no intermediate rounding step after the + * multiplication. The operation is performed according to the + * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. + * The flags argument allows the caller to select negation of the + * addend, the intermediate product, or the final result. (The + * difference between this and having the caller do a separate + * negation is that negating externally will flip the sign bit on NaNs.) + * + * Requires A and C extracted into a double-sized structure to provide the + * extra space for the widening multiply. + */ +static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, + FloatPartsN *c, int flags, float_status *s) +{ + int ab_mask, abc_mask; + FloatPartsW p_widen, c_widen; + + ab_mask = float_cmask(a->cls) | float_cmask(b->cls); + abc_mask = float_cmask(c->cls) | ab_mask; + + /* + * It is implementation-defined whether the cases of (0,inf,qnan) + * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN + * they return if they do), so we have to hand this information + * off to the target-specific pick-a-NaN routine. + */ + if (unlikely(abc_mask & float_cmask_anynan)) { + return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); + } + + if (flags & float_muladd_negate_c) { + c->sign ^= 1; + } + + /* Compute the sign of the product into A. */ + a->sign ^= b->sign; + if (flags & float_muladd_negate_product) { + a->sign ^= 1; + } + + if (unlikely(ab_mask != float_cmask_normal)) { + if (unlikely(ab_mask == float_cmask_infzero)) { + goto d_nan; + } + + if (ab_mask & float_cmask_inf) { + if (c->cls == float_class_inf && a->sign != c->sign) { + goto d_nan; + } + goto return_inf; + } + + g_assert(ab_mask & float_cmask_zero); + if (c->cls == float_class_normal) { + *a = *c; + goto return_normal; + } + if (c->cls == float_class_zero) { + if (a->sign != c->sign) { + goto return_sub_zero; + } + goto return_zero; + } + g_assert(c->cls == float_class_inf); + } + + if (unlikely(c->cls == float_class_inf)) { + a->sign = c->sign; + goto return_inf; + } + + /* Perform the multiplication step. */ + p_widen.sign = a->sign; + p_widen.exp = a->exp + b->exp + 1; + frac_mulw(&p_widen, a, b); + if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { + frac_add(&p_widen, &p_widen, &p_widen); + p_widen.exp -= 1; + } + + /* Perform the addition step. */ + if (c->cls != float_class_zero) { + /* Zero-extend C to less significant bits. */ + frac_widen(&c_widen, c); + c_widen.exp = c->exp; + + if (a->sign == c->sign) { + parts_add_normal(&p_widen, &c_widen); + } else if (!parts_sub_normal(&p_widen, &c_widen)) { + goto return_sub_zero; + } + } + + /* Narrow with sticky bit, for proper rounding later. */ + frac_truncjam(a, &p_widen); + a->sign = p_widen.sign; + a->exp = p_widen.exp; + + return_normal: + if (flags & float_muladd_halve_result) { + a->exp -= 1; + } + finish_sign: + if (flags & float_muladd_negate_result) { + a->sign ^= 1; + } + return a; + + return_sub_zero: + a->sign = s->float_rounding_mode == float_round_down; + return_zero: + a->cls = float_class_zero; + goto finish_sign; + + return_inf: + a->cls = float_class_inf; + goto finish_sign; + + d_nan: + float_raise(float_flag_invalid, s); + parts_default_nan(a, s); + return a; +} + +/* + * Returns the result of dividing the floating-point value `a' by the + * corresponding value `b'. The operation is performed according to + * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. + */ +static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, + float_status *s) +{ + int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); + bool sign = a->sign ^ b->sign; + + if (likely(ab_mask == float_cmask_normal)) { + a->sign = sign; + a->exp -= b->exp + frac_div(a, b); + return a; + } + + /* 0/0 or Inf/Inf => NaN */ + if (unlikely(ab_mask == float_cmask_zero) || + unlikely(ab_mask == float_cmask_inf)) { + float_raise(float_flag_invalid, s); + parts_default_nan(a, s); + return a; + } + + /* All the NaN cases */ + if (unlikely(ab_mask & float_cmask_anynan)) { + return parts_pick_nan(a, b, s); + } + + a->sign = sign; + + /* Inf / X */ + if (a->cls == float_class_inf) { + return a; + } + + /* 0 / X */ + if (a->cls == float_class_zero) { + return a; + } + + /* X / Inf */ + if (b->cls == float_class_inf) { + a->cls = float_class_zero; + return a; + } + + /* X / 0 => Inf */ + g_assert(b->cls == float_class_zero); + float_raise(float_flag_divbyzero, s); + a->cls = float_class_inf; + return a; +} + +/* + * Floating point remainder, per IEC/IEEE, or modulus. + */ +static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, + uint64_t *mod_quot, float_status *s) +{ + int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); + + if (likely(ab_mask == float_cmask_normal)) { + frac_modrem(a, b, mod_quot); + return a; + } + + if (mod_quot) { + *mod_quot = 0; + } + + /* All the NaN cases */ + if (unlikely(ab_mask & float_cmask_anynan)) { + return parts_pick_nan(a, b, s); + } + + /* Inf % N; N % 0 */ + if (a->cls == float_class_inf || b->cls == float_class_zero) { + float_raise(float_flag_invalid, s); + parts_default_nan(a, s); + return a; + } + + /* N % Inf; 0 % N */ + g_assert(b->cls == float_class_inf || a->cls == float_class_zero); + return a; +} + +/* + * Square Root + * + * The base algorithm is lifted from + * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c + * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c + * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c + * and is thus MIT licenced. + */ +static void partsN(sqrt)(FloatPartsN *a, float_status *status, + const FloatFmt *fmt) +{ + const uint32_t three32 = 3u << 30; + const uint64_t three64 = 3ull << 62; + uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ + uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ + uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ + uint64_t d0h, d0l, d1h, d1l, d2h, d2l; + uint64_t discard; + bool exp_odd; + size_t index; + + if (unlikely(a->cls != float_class_normal)) { + switch (a->cls) { + case float_class_snan: + case float_class_qnan: + parts_return_nan(a, status); + return; + case float_class_zero: + return; + case float_class_inf: + if (unlikely(a->sign)) { + goto d_nan; + } + return; + default: + g_assert_not_reached(); + } + } + + if (unlikely(a->sign)) { + goto d_nan; + } + + /* + * Argument reduction. + * x = 4^e frac; with integer e, and frac in [1, 4) + * m = frac fixed point at bit 62, since we're in base 4. + * If base-2 exponent is odd, exchange that for multiply by 2, + * which results in no shift. + */ + exp_odd = a->exp & 1; + index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); + if (!exp_odd) { + frac_shr(a, 1); + } + + /* + * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). + * + * Initial estimate: + * 7-bit lookup table (1-bit exponent and 6-bit significand). + * + * The relative error (e = r0*sqrt(m)-1) of a linear estimate + * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; + * a table lookup is faster and needs one less iteration. + * The 7-bit table gives |e| < 0x1.fdp-9. + * + * A Newton-Raphson iteration for r is + * s = m*r + * d = s*r + * u = 3 - d + * r = r*u/2 + * + * Fixed point representations: + * m, s, d, u, three are all 2.30; r is 0.32 + */ + m64 = a->frac_hi; + m32 = m64 >> 32; + + r32 = rsqrt_tab[index] << 16; + /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ + + s32 = ((uint64_t)m32 * r32) >> 32; + d32 = ((uint64_t)s32 * r32) >> 32; + u32 = three32 - d32; + + if (N == 64) { + /* float64 or smaller */ + + r32 = ((uint64_t)r32 * u32) >> 31; + /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ + + s32 = ((uint64_t)m32 * r32) >> 32; + d32 = ((uint64_t)s32 * r32) >> 32; + u32 = three32 - d32; + + if (fmt->frac_size <= 23) { + /* float32 or smaller */ + + s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ + s32 = (s32 - 1) >> 6; /* 9.23 */ + /* s < sqrt(m) < s + 0x1.08p-23 */ + + /* compute nearest rounded result to 2.23 bits */ + uint32_t d0 = (m32 << 16) - s32 * s32; + uint32_t d1 = s32 - d0; + uint32_t d2 = d1 + s32 + 1; + s32 += d1 >> 31; + a->frac_hi = (uint64_t)s32 << (64 - 25); + + /* increment or decrement for inexact */ + if (d2 != 0) { + a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); + } + goto done; + } + + /* float64 */ + + r64 = (uint64_t)r32 * u32 * 2; + /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ + mul64To128(m64, r64, &s64, &discard); + mul64To128(s64, r64, &d64, &discard); + u64 = three64 - d64; + + mul64To128(s64, u64, &s64, &discard); /* 3.61 */ + s64 = (s64 - 2) >> 9; /* 12.52 */ + + /* Compute nearest rounded result */ + uint64_t d0 = (m64 << 42) - s64 * s64; + uint64_t d1 = s64 - d0; + uint64_t d2 = d1 + s64 + 1; + s64 += d1 >> 63; + a->frac_hi = s64 << (64 - 54); + + /* increment or decrement for inexact */ + if (d2 != 0) { + a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); + } + goto done; + } + + r64 = (uint64_t)r32 * u32 * 2; + /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ + + mul64To128(m64, r64, &s64, &discard); + mul64To128(s64, r64, &d64, &discard); + u64 = three64 - d64; + mul64To128(u64, r64, &r64, &discard); + r64 <<= 1; + /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ + + mul64To128(m64, r64, &s64, &discard); + mul64To128(s64, r64, &d64, &discard); + u64 = three64 - d64; + mul64To128(u64, r64, &rh, &rl); + add128(rh, rl, rh, rl, &rh, &rl); + /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ + + mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); + mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); + sub128(three64, 0, dh, dl, &uh, &ul); + mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ + /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ + + sub128(sh, sl, 0, 4, &sh, &sl); + shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ + /* s < sqrt(m) < s + 1ulp */ + + /* Compute nearest rounded result */ + mul64To128(sl, sl, &d0h, &d0l); + d0h += 2 * sh * sl; + sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); + sub128(sh, sl, d0h, d0l, &d1h, &d1l); + add128(sh, sl, 0, 1, &d2h, &d2l); + add128(d2h, d2l, d1h, d1l, &d2h, &d2l); + add128(sh, sl, 0, d1h >> 63, &sh, &sl); + shift128Left(sh, sl, 128 - 114, &sh, &sl); + + /* increment or decrement for inexact */ + if (d2h | d2l) { + if ((int64_t)(d1h ^ d2h) < 0) { + sub128(sh, sl, 0, 1, &sh, &sl); + } else { + add128(sh, sl, 0, 1, &sh, &sl); + } + } + a->frac_lo = sl; + a->frac_hi = sh; + + done: + /* Convert back from base 4 to base 2. */ + a->exp >>= 1; + if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { + frac_add(a, a, a); + } else { + a->exp += 1; + } + return; + + d_nan: + float_raise(float_flag_invalid, status); + parts_default_nan(a, status); +} + +/* + * Rounds the floating-point value `a' to an integer, and returns the + * result as a floating-point value. The operation is performed + * according to the IEC/IEEE Standard for Binary Floating-Point + * Arithmetic. + * + * parts_round_to_int_normal is an internal helper function for + * normal numbers only, returning true for inexact but not directly + * raising float_flag_inexact. + */ +static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, + int scale, int frac_size) +{ + uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; + int shift_adj; + + scale = MIN(MAX(scale, -0x10000), 0x10000); + a->exp += scale; + + if (a->exp < 0) { + bool one; + + /* All fractional */ + switch (rmode) { + case float_round_nearest_even: + one = false; + if (a->exp == -1) { + FloatPartsN tmp; + /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ + frac_add(&tmp, a, a); + /* Anything remaining means frac > 0.5. */ + one = !frac_eqz(&tmp); + } + break; + case float_round_ties_away: + one = a->exp == -1; + break; + case float_round_to_zero: + one = false; + break; + case float_round_up: + one = !a->sign; + break; + case float_round_down: + one = a->sign; + break; + case float_round_to_odd: + one = true; + break; + default: + g_assert_not_reached(); + } + + frac_clear(a); + a->exp = 0; + if (one) { + a->frac_hi = DECOMPOSED_IMPLICIT_BIT; + } else { + a->cls = float_class_zero; + } + return true; + } + + if (a->exp >= frac_size) { + /* All integral */ + return false; + } + + if (N > 64 && a->exp < N - 64) { + /* + * Rounding is not in the low word -- shift lsb to bit 2, + * which leaves room for sticky and rounding bit. + */ + shift_adj = (N - 1) - (a->exp + 2); + frac_shrjam(a, shift_adj); + frac_lsb = 1 << 2; + } else { + shift_adj = 0; + frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); + } + + frac_lsbm1 = frac_lsb >> 1; + rnd_mask = frac_lsb - 1; + rnd_even_mask = rnd_mask | frac_lsb; + + if (!(a->frac_lo & rnd_mask)) { + /* Fractional bits already clear, undo the shift above. */ + frac_shl(a, shift_adj); + return false; + } + + switch (rmode) { + case float_round_nearest_even: + inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); + break; + case float_round_ties_away: + inc = frac_lsbm1; + break; + case float_round_to_zero: + inc = 0; + break; + case float_round_up: + inc = a->sign ? 0 : rnd_mask; + break; + case float_round_down: + inc = a->sign ? rnd_mask : 0; + break; + case float_round_to_odd: + inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; + break; + default: + g_assert_not_reached(); + } + + if (shift_adj == 0) { + if (frac_addi(a, a, inc)) { + frac_shr(a, 1); + a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; + a->exp++; + } + a->frac_lo &= ~rnd_mask; + } else { + frac_addi(a, a, inc); + a->frac_lo &= ~rnd_mask; + /* Be careful shifting back, not to overflow */ + frac_shl(a, shift_adj - 1); + if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { + a->exp++; + } else { + frac_add(a, a, a); + } + } + return true; +} + +static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, + int scale, float_status *s, + const FloatFmt *fmt) +{ + switch (a->cls) { + case float_class_qnan: + case float_class_snan: + parts_return_nan(a, s); + break; + case float_class_zero: + case float_class_inf: + break; + case float_class_normal: + if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { + float_raise(float_flag_inexact, s); + } + break; + default: + g_assert_not_reached(); + } +} + +/* + * Returns the result of converting the floating-point value `a' to + * the two's complement integer format. The conversion is performed + * according to the IEC/IEEE Standard for Binary Floating-Point + * Arithmetic---which means in particular that the conversion is + * rounded according to the current rounding mode. If `a' is a NaN, + * the largest positive integer is returned. Otherwise, if the + * conversion overflows, the largest integer with the same sign as `a' + * is returned. + */ +static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, + int scale, int64_t min, int64_t max, + float_status *s) +{ + int flags = 0; + uint64_t r; + + switch (p->cls) { + case float_class_snan: + case float_class_qnan: + flags = float_flag_invalid; + r = max; + break; + + case float_class_inf: + flags = float_flag_invalid; + r = p->sign ? min : max; + break; + + case float_class_zero: + return 0; + + case float_class_normal: + /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ + if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { + flags = float_flag_inexact; + } + + if (p->exp <= DECOMPOSED_BINARY_POINT) { + r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); + } else { + r = UINT64_MAX; + } + if (p->sign) { + if (r <= -(uint64_t)min) { + r = -r; + } else { + flags = float_flag_invalid; + r = min; + } + } else if (r > max) { + flags = float_flag_invalid; + r = max; + } + break; + + default: + g_assert_not_reached(); + } + + float_raise(flags, s); + return r; +} + +/* + * Returns the result of converting the floating-point value `a' to + * the unsigned integer format. The conversion is performed according + * to the IEC/IEEE Standard for Binary Floating-Point + * Arithmetic---which means in particular that the conversion is + * rounded according to the current rounding mode. If `a' is a NaN, + * the largest unsigned integer is returned. Otherwise, if the + * conversion overflows, the largest unsigned integer is returned. If + * the 'a' is negative, the result is rounded and zero is returned; + * values that do not round to zero will raise the inexact exception + * flag. + */ +static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, + int scale, uint64_t max, float_status *s) +{ + int flags = 0; + uint64_t r; + + switch (p->cls) { + case float_class_snan: + case float_class_qnan: + flags = float_flag_invalid; + r = max; + break; + + case float_class_inf: + flags = float_flag_invalid; + r = p->sign ? 0 : max; + break; + + case float_class_zero: + return 0; + + case float_class_normal: + /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ + if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { + flags = float_flag_inexact; + if (p->cls == float_class_zero) { + r = 0; + break; + } + } + + if (p->sign) { + flags = float_flag_invalid; + r = 0; + } else if (p->exp > DECOMPOSED_BINARY_POINT) { + flags = float_flag_invalid; + r = max; + } else { + r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); + if (r > max) { + flags = float_flag_invalid; + r = max; + } + } + break; + + default: + g_assert_not_reached(); + } + + float_raise(flags, s); + return r; +} + +/* + * Integer to float conversions + * + * Returns the result of converting the two's complement integer `a' + * to the floating-point format. The conversion is performed according + * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. + */ +static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, + int scale, float_status *s) +{ + uint64_t f = a; + int shift; + + memset(p, 0, sizeof(*p)); + + if (a == 0) { + p->cls = float_class_zero; + return; + } + + p->cls = float_class_normal; + if (a < 0) { + f = -f; + p->sign = true; + } + shift = clz64(f); + scale = MIN(MAX(scale, -0x10000), 0x10000); + + p->exp = DECOMPOSED_BINARY_POINT - shift + scale; + p->frac_hi = f << shift; +} + +/* + * Unsigned Integer to float conversions + * + * Returns the result of converting the unsigned integer `a' to the + * floating-point format. The conversion is performed according to the + * IEC/IEEE Standard for Binary Floating-Point Arithmetic. + */ +static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, + int scale, float_status *status) +{ + memset(p, 0, sizeof(*p)); + + if (a == 0) { + p->cls = float_class_zero; + } else { + int shift = clz64(a); + scale = MIN(MAX(scale, -0x10000), 0x10000); + p->cls = float_class_normal; + p->exp = DECOMPOSED_BINARY_POINT - shift + scale; + p->frac_hi = a << shift; + } +} + +/* + * Float min/max. + */ +static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, + float_status *s, int flags) +{ + int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); + int a_exp, b_exp, cmp; + + if (unlikely(ab_mask & float_cmask_anynan)) { + /* + * For minNum/maxNum (IEEE 754-2008) + * or minimumNumber/maximumNumber (IEEE 754-2019), + * if one operand is a QNaN, and the other + * operand is numerical, then return numerical argument. + */ + if ((flags & (minmax_isnum | minmax_isnumber)) + && !(ab_mask & float_cmask_snan) + && (ab_mask & ~float_cmask_qnan)) { + return is_nan(a->cls) ? b : a; + } + + /* + * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag + * are removed and replaced with minimum, minimumNumber, maximum + * and maximumNumber. + * minimumNumber/maximumNumber behavior for SNaN is changed to: + * If both operands are NaNs, a QNaN is returned. + * If either operand is a SNaN, + * an invalid operation exception is signaled, + * but unless both operands are NaNs, + * the SNaN is otherwise ignored and not converted to a QNaN. + */ + if ((flags & minmax_isnumber) + && (ab_mask & float_cmask_snan) + && (ab_mask & ~float_cmask_anynan)) { + float_raise(float_flag_invalid, s); + return is_nan(a->cls) ? b : a; + } + + return parts_pick_nan(a, b, s); + } + + a_exp = a->exp; + b_exp = b->exp; + + if (unlikely(ab_mask != float_cmask_normal)) { + switch (a->cls) { + case float_class_normal: + break; + case float_class_inf: + a_exp = INT16_MAX; + break; + case float_class_zero: + a_exp = INT16_MIN; + break; + default: + g_assert_not_reached(); + break; + } + switch (b->cls) { + case float_class_normal: + break; + case float_class_inf: + b_exp = INT16_MAX; + break; + case float_class_zero: + b_exp = INT16_MIN; + break; + default: + g_assert_not_reached(); + break; + } + } + + /* Compare magnitudes. */ + cmp = a_exp - b_exp; + if (cmp == 0) { + cmp = frac_cmp(a, b); + } + + /* + * Take the sign into account. + * For ismag, only do this if the magnitudes are equal. + */ + if (!(flags & minmax_ismag) || cmp == 0) { + if (a->sign != b->sign) { + /* For differing signs, the negative operand is less. */ + cmp = a->sign ? -1 : 1; + } else if (a->sign) { + /* For two negative operands, invert the magnitude comparison. */ + cmp = -cmp; + } + } + + if (flags & minmax_ismin) { + cmp = -cmp; + } + return cmp < 0 ? b : a; +} + +/* + * Floating point compare + */ +static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, + float_status *s, bool is_quiet) +{ + int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); + int cmp; + + if (likely(ab_mask == float_cmask_normal)) { + if (a->sign != b->sign) { + goto a_sign; + } + if (a->exp != b->exp) { + cmp = a->exp < b->exp ? -1 : 1; + } else { + cmp = frac_cmp(a, b); + } + if (a->sign) { + cmp = -cmp; + } + return cmp; + } + + if (unlikely(ab_mask & float_cmask_anynan)) { + if (!is_quiet || (ab_mask & float_cmask_snan)) { + float_raise(float_flag_invalid, s); + } + return float_relation_unordered; + } + + if (ab_mask & float_cmask_zero) { + if (ab_mask == float_cmask_zero) { + return float_relation_equal; + } else if (a->cls == float_class_zero) { + goto b_sign; + } else { + goto a_sign; + } + } + + if (ab_mask == float_cmask_inf) { + if (a->sign == b->sign) { + return float_relation_equal; + } + } else if (b->cls == float_class_inf) { + goto b_sign; + } else { + g_assert(a->cls == float_class_inf); + } + + a_sign: + return a->sign ? float_relation_less : float_relation_greater; + b_sign: + return b->sign ? float_relation_greater : float_relation_less; +} + +/* + * Multiply A by 2 raised to the power N. + */ +static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) +{ + switch (a->cls) { + case float_class_snan: + case float_class_qnan: + parts_return_nan(a, s); + break; + case float_class_zero: + case float_class_inf: + break; + case float_class_normal: + a->exp += MIN(MAX(n, -0x10000), 0x10000); + break; + default: + g_assert_not_reached(); + } +} + +/* + * Return log2(A) + */ +static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) +{ + uint64_t a0, a1, r, t, ign; + FloatPartsN f; + int i, n, a_exp, f_exp; + + if (unlikely(a->cls != float_class_normal)) { + switch (a->cls) { + case float_class_snan: + case float_class_qnan: + parts_return_nan(a, s); + return; + case float_class_zero: + /* log2(0) = -inf */ + a->cls = float_class_inf; + a->sign = 1; + return; + case float_class_inf: + if (unlikely(a->sign)) { + goto d_nan; + } + return; + default: + break; + } + g_assert_not_reached(); + } + if (unlikely(a->sign)) { + goto d_nan; + } + + /* TODO: This algorithm looses bits too quickly for float128. */ + g_assert(N == 64); + + a_exp = a->exp; + f_exp = -1; + + r = 0; + t = DECOMPOSED_IMPLICIT_BIT; + a0 = a->frac_hi; + a1 = 0; + + n = fmt->frac_size + 2; + if (unlikely(a_exp == -1)) { + /* + * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). + * When the value is very close to 1.0, there are lots of 1's in + * the msb parts of the fraction. At the end, when we subtract + * this value from -1.0, we can see a catastrophic loss of precision, + * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the + * bits of y in the final result. To minimize this, compute as many + * digits as we can. + * ??? This case needs another algorithm to avoid this. + */ + n = fmt->frac_size * 2 + 2; + /* Don't compute a value overlapping the sticky bit */ + n = MIN(n, 62); + } + + for (i = 0; i < n; i++) { + if (a1) { + mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); + } else if (a0 & 0xffffffffull) { + mul64To128(a0, a0, &a0, &a1); + } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { + a0 >>= 32; + a0 *= a0; + } else { + goto exact; + } + + if (a0 & DECOMPOSED_IMPLICIT_BIT) { + if (unlikely(a_exp == 0 && r == 0)) { + /* + * When a_exp == 0, we're computing the log2 of a value + * [1.0,2.0). When the value is very close to 1.0, there + * are lots of 0's in the msb parts of the fraction. + * We need to compute more digits to produce a correct + * result -- restart at the top of the fraction. + * ??? This is likely to lose precision quickly, as for + * float128; we may need another method. + */ + f_exp -= i; + t = r = DECOMPOSED_IMPLICIT_BIT; + i = 0; + } else { + r |= t; + } + } else { + add128(a0, a1, a0, a1, &a0, &a1); + } + t >>= 1; + } + + /* Set sticky for inexact. */ + r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); + + exact: + parts_sint_to_float(a, a_exp, 0, s); + if (r == 0) { + return; + } + + memset(&f, 0, sizeof(f)); + f.cls = float_class_normal; + f.frac_hi = r; + f.exp = f_exp - frac_normalize(&f); + + if (a_exp < 0) { + parts_sub_normal(a, &f); + } else if (a_exp > 0) { + parts_add_normal(a, &f); + } else { + *a = f; + } + return; + + d_nan: + float_raise(float_flag_invalid, s); + parts_default_nan(a, s); +} diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index c2f87addb25..f2ad0f335e6 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -103,7 +103,7 @@ static inline bool snan_bit_is_one(float_status *status) { #if defined(TARGET_MIPS) return status->snan_bit_is_one; -#elif defined(TARGET_HPPA) || defined(TARGET_UNICORE32) || defined(TARGET_SH4) +#elif defined(TARGET_HPPA) || defined(TARGET_SH4) return 1; #else return 0; @@ -129,7 +129,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status) | The pattern for a default generated deconstructed floating-point NaN. *----------------------------------------------------------------------------*/ -static FloatParts parts_default_nan(float_status *status) +static void parts64_default_nan(FloatParts64 *p, float_status *status) { bool sign = 0; uint64_t frac; @@ -145,12 +145,14 @@ static FloatParts parts_default_nan(float_status *status) #elif defined(TARGET_HPPA) /* snan_bit_is_one, set msb-1. */ frac = 1ULL << (DECOMPOSED_BINARY_POINT - 2); +#elif defined(TARGET_HEXAGON) + sign = 1; + frac = ~0ULL; #else - /* This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V, - * S390, SH4, TriCore, and Xtensa. I cannot find documentation - * for Unicore32; the choice from the original commit is unchanged. - * Our other supported targets, CRIS, LM32, Moxie, Nios2, and Tile, - * do not have floating-point. + /* + * This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V, + * S390, SH4, TriCore, and Xtensa. Our other supported targets, + * CRIS, Nios2, and Tile, do not have floating-point. */ if (snan_bit_is_one(status)) { /* set all bits other than msb */ @@ -161,7 +163,7 @@ static FloatParts parts_default_nan(float_status *status) } #endif - return (FloatParts) { + *p = (FloatParts64) { .cls = float_class_qnan, .sign = sign, .exp = INT_MAX, @@ -169,26 +171,54 @@ static FloatParts parts_default_nan(float_status *status) }; } +static void parts128_default_nan(FloatParts128 *p, float_status *status) +{ + /* + * Extrapolate from the choices made by parts64_default_nan to fill + * in the quad-floating format. If the low bit is set, assume we + * want to set all non-snan bits. + */ + FloatParts64 p64; + parts64_default_nan(&p64, status); + + *p = (FloatParts128) { + .cls = float_class_qnan, + .sign = p64.sign, + .exp = INT_MAX, + .frac_hi = p64.frac, + .frac_lo = -(p64.frac & 1) + }; +} + /*---------------------------------------------------------------------------- | Returns a quiet NaN from a signalling NaN for the deconstructed | floating-point parts. *----------------------------------------------------------------------------*/ -static FloatParts parts_silence_nan(FloatParts a, float_status *status) +static uint64_t parts_silence_nan_frac(uint64_t frac, float_status *status) { g_assert(!no_signaling_nans(status)); -#if defined(TARGET_HPPA) - a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1)); - a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2); -#else + + /* The only snan_bit_is_one target without default_nan_mode is HPPA. */ if (snan_bit_is_one(status)) { - return parts_default_nan(status); + frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1)); + frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2); } else { - a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1); + frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1); } -#endif - a.cls = float_class_qnan; - return a; + return frac; +} + +static void parts64_silence_nan(FloatParts64 *p, float_status *status) +{ + p->frac = parts_silence_nan_frac(p->frac, status); + p->cls = float_class_qnan; +} + +static void parts128_silence_nan(FloatParts128 *p, float_status *status) +{ + p->frac_hi = parts_silence_nan_frac(p->frac_hi, status); + p->cls = float_class_qnan; } /*---------------------------------------------------------------------------- @@ -225,26 +255,6 @@ floatx80 floatx80_default_nan(float_status *status) const floatx80 floatx80_infinity = make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low); -/*---------------------------------------------------------------------------- -| Raises the exceptions specified by `flags'. Floating-point traps can be -| defined here if desired. It is currently not possible for such a trap -| to substitute a result value. If traps are not implemented, this routine -| should be simply `float_exception_flags |= flags;'. -*----------------------------------------------------------------------------*/ - -void float_raise(uint8_t flags, float_status *status) -{ - status->float_exception_flags |= flags; -} - -/*---------------------------------------------------------------------------- -| Internal canonical NaN format. -*----------------------------------------------------------------------------*/ -typedef struct { - bool sign; - uint64_t high, low; -} commonNaNT; - /*---------------------------------------------------------------------------- | Returns 1 if the half-precision floating-point value `a' is a quiet | NaN; otherwise returns 0. @@ -360,46 +370,6 @@ bool float32_is_signaling_nan(float32 a_, float_status *status) } } -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float32ToCommonNaN(float32 a, float_status *status) -{ - commonNaNT z; - - if (float32_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - z.sign = float32_val(a) >> 31; - z.low = 0; - z.high = ((uint64_t)float32_val(a)) << 41; - return z; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the single- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float32 commonNaNToFloat32(commonNaNT a, float_status *status) -{ - uint32_t mantissa = a.high >> 41; - - if (status->default_nan_mode) { - return float32_default_nan(status); - } - - if (mantissa) { - return make_float32( - (((uint32_t)a.sign) << 31) | 0x7F800000 | (a.high >> 41)); - } else { - return float32_default_nan(status); - } -} - /*---------------------------------------------------------------------------- | Select which NaN to propagate for a two-input operation. | IEEE754 doesn't specify all the details of this, so the @@ -624,6 +594,12 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, } else { return 1; } +#elif defined(TARGET_RISCV) + /* For RISC-V, InvalidOp is set when multiplicands are Inf and zero */ + if (infzero) { + float_raise(float_flag_invalid, status); + } + return 3; /* default NaN */ #elif defined(TARGET_XTENSA) /* * For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns @@ -664,62 +640,6 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, #endif } -/*---------------------------------------------------------------------------- -| Takes two single-precision floating-point values `a' and `b', one of which -| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a -| signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status) -{ - bool aIsLargerSignificand; - uint32_t av, bv; - FloatClass a_cls, b_cls; - - /* This is not complete, but is good enough for pickNaN. */ - a_cls = (!float32_is_any_nan(a) - ? float_class_normal - : float32_is_signaling_nan(a, status) - ? float_class_snan - : float_class_qnan); - b_cls = (!float32_is_any_nan(b) - ? float_class_normal - : float32_is_signaling_nan(b, status) - ? float_class_snan - : float_class_qnan); - - av = float32_val(a); - bv = float32_val(b); - - if (is_snan(a_cls) || is_snan(b_cls)) { - float_raise(float_flag_invalid, status); - } - - if (status->default_nan_mode) { - return float32_default_nan(status); - } - - if ((uint32_t)(av << 1) < (uint32_t)(bv << 1)) { - aIsLargerSignificand = 0; - } else if ((uint32_t)(bv << 1) < (uint32_t)(av << 1)) { - aIsLargerSignificand = 1; - } else { - aIsLargerSignificand = (av < bv) ? 1 : 0; - } - - if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) { - if (is_snan(b_cls)) { - return float32_silence_nan(b, status); - } - return b; - } else { - if (is_snan(a_cls)) { - return float32_silence_nan(a, status); - } - return a; - } -} - /*---------------------------------------------------------------------------- | Returns 1 if the double-precision floating-point value `a' is a quiet | NaN; otherwise returns 0. @@ -760,104 +680,6 @@ bool float64_is_signaling_nan(float64 a_, float_status *status) } } -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float64ToCommonNaN(float64 a, float_status *status) -{ - commonNaNT z; - - if (float64_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - z.sign = float64_val(a) >> 63; - z.low = 0; - z.high = float64_val(a) << 12; - return z; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the double- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float64 commonNaNToFloat64(commonNaNT a, float_status *status) -{ - uint64_t mantissa = a.high >> 12; - - if (status->default_nan_mode) { - return float64_default_nan(status); - } - - if (mantissa) { - return make_float64( - (((uint64_t) a.sign) << 63) - | UINT64_C(0x7FF0000000000000) - | (a.high >> 12)); - } else { - return float64_default_nan(status); - } -} - -/*---------------------------------------------------------------------------- -| Takes two double-precision floating-point values `a' and `b', one of which -| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a -| signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status) -{ - bool aIsLargerSignificand; - uint64_t av, bv; - FloatClass a_cls, b_cls; - - /* This is not complete, but is good enough for pickNaN. */ - a_cls = (!float64_is_any_nan(a) - ? float_class_normal - : float64_is_signaling_nan(a, status) - ? float_class_snan - : float_class_qnan); - b_cls = (!float64_is_any_nan(b) - ? float_class_normal - : float64_is_signaling_nan(b, status) - ? float_class_snan - : float_class_qnan); - - av = float64_val(a); - bv = float64_val(b); - - if (is_snan(a_cls) || is_snan(b_cls)) { - float_raise(float_flag_invalid, status); - } - - if (status->default_nan_mode) { - return float64_default_nan(status); - } - - if ((uint64_t)(av << 1) < (uint64_t)(bv << 1)) { - aIsLargerSignificand = 0; - } else if ((uint64_t)(bv << 1) < (uint64_t)(av << 1)) { - aIsLargerSignificand = 1; - } else { - aIsLargerSignificand = (av < bv) ? 1 : 0; - } - - if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) { - if (is_snan(b_cls)) { - return float64_silence_nan(b, status); - } - return b; - } else { - if (is_snan(a_cls)) { - return float64_silence_nan(a, status); - } - return a; - } -} - /*---------------------------------------------------------------------------- | Returns 1 if the extended double-precision floating-point value `a' is a | quiet NaN; otherwise returns 0. This slightly differs from the same @@ -921,55 +743,6 @@ floatx80 floatx80_silence_nan(floatx80 a, float_status *status) return a; } -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the -| invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT floatx80ToCommonNaN(floatx80 a, float_status *status) -{ - floatx80 dflt; - commonNaNT z; - - if (floatx80_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - if (a.low >> 63) { - z.sign = a.high >> 15; - z.low = 0; - z.high = a.low << 1; - } else { - dflt = floatx80_default_nan(status); - z.sign = dflt.high >> 15; - z.low = 0; - z.high = dflt.low << 1; - } - return z; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the extended -| double-precision floating-point format. -*----------------------------------------------------------------------------*/ - -static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) -{ - floatx80 z; - - if (status->default_nan_mode) { - return floatx80_default_nan(status); - } - - if (a.high >> 1) { - z.low = UINT64_C(0x8000000000000000) | a.high >> 1; - z.high = (((uint16_t)a.sign) << 15) | 0x7FFF; - } else { - z = floatx80_default_nan(status); - } - return z; -} - /*---------------------------------------------------------------------------- | Takes two extended double-precision floating-point values `a' and `b', one | of which is a NaN, and returns the appropriate NaN result. If either `a' or @@ -1061,111 +834,3 @@ bool float128_is_signaling_nan(float128 a, float_status *status) } } } - -/*---------------------------------------------------------------------------- -| Returns a quiet NaN from a signalling NaN for the quadruple-precision -| floating point value `a'. -*----------------------------------------------------------------------------*/ - -float128 float128_silence_nan(float128 a, float_status *status) -{ - if (no_signaling_nans(status)) { - g_assert_not_reached(); - } else { - if (snan_bit_is_one(status)) { - return float128_default_nan(status); - } else { - a.high |= UINT64_C(0x0000800000000000); - return a; - } - } -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float128ToCommonNaN(float128 a, float_status *status) -{ - commonNaNT z; - - if (float128_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - z.sign = a.high >> 63; - shortShift128Left(a.high, a.low, 16, &z.high, &z.low); - return z; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the quadruple- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float128 commonNaNToFloat128(commonNaNT a, float_status *status) -{ - float128 z; - - if (status->default_nan_mode) { - return float128_default_nan(status); - } - - shift128Right(a.high, a.low, 16, &z.high, &z.low); - z.high |= (((uint64_t)a.sign) << 63) | UINT64_C(0x7FFF000000000000); - return z; -} - -/*---------------------------------------------------------------------------- -| Takes two quadruple-precision floating-point values `a' and `b', one of -| which is a NaN, and returns the appropriate NaN result. If either `a' or -| `b' is a signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float128 propagateFloat128NaN(float128 a, float128 b, - float_status *status) -{ - bool aIsLargerSignificand; - FloatClass a_cls, b_cls; - - /* This is not complete, but is good enough for pickNaN. */ - a_cls = (!float128_is_any_nan(a) - ? float_class_normal - : float128_is_signaling_nan(a, status) - ? float_class_snan - : float_class_qnan); - b_cls = (!float128_is_any_nan(b) - ? float_class_normal - : float128_is_signaling_nan(b, status) - ? float_class_snan - : float_class_qnan); - - if (is_snan(a_cls) || is_snan(b_cls)) { - float_raise(float_flag_invalid, status); - } - - if (status->default_nan_mode) { - return float128_default_nan(status); - } - - if (lt128(a.high << 1, a.low, b.high << 1, b.low)) { - aIsLargerSignificand = 0; - } else if (lt128(b.high << 1, b.low, a.high << 1, a.low)) { - aIsLargerSignificand = 1; - } else { - aIsLargerSignificand = (a.high < b.high) ? 1 : 0; - } - - if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) { - if (is_snan(b_cls)) { - return float128_silence_nan(b, status); - } - return b; - } else { - if (is_snan(a_cls)) { - return float128_silence_nan(a, status); - } - return a; - } -} diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 67cfa0fd82c..9a28720d82a 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -132,7 +132,7 @@ this code that are retained. if (unlikely(soft_t ## _is_denormal(*a))) { \ *a = soft_t ## _set_sign(soft_t ## _zero, \ soft_t ## _is_neg(*a)); \ - s->float_exception_flags |= float_flag_input_denormal; \ + float_raise(float_flag_input_denormal, s); \ } \ } @@ -360,7 +360,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s, ur.h = hard(ua.h, ub.h); if (unlikely(f32_is_inf(ur))) { - s->float_exception_flags |= float_flag_overflow; + float_raise(float_flag_overflow, s); } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) { goto soft; } @@ -391,7 +391,7 @@ float64_gen2(float64 xa, float64 xb, float_status *s, ur.h = hard(ua.h, ub.h); if (unlikely(f64_is_inf(ur))) { - s->float_exception_flags |= float_flag_overflow; + float_raise(float_flag_overflow, s); } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) { goto soft; } @@ -401,60 +401,6 @@ float64_gen2(float64 xa, float64 xb, float_status *s, return soft(ua.s, ub.s, s); } -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline uint32_t extractFloat32Frac(float32 a) -{ - return float32_val(a) & 0x007FFFFF; -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline int extractFloat32Exp(float32 a) -{ - return (float32_val(a) >> 23) & 0xFF; -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline bool extractFloat32Sign(float32 a) -{ - return float32_val(a) >> 31; -} - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the double-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline uint64_t extractFloat64Frac(float64 a) -{ - return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF); -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the double-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline int extractFloat64Exp(float64 a) -{ - return (float64_val(a) >> 52) & 0x7FF; -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the double-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline bool extractFloat64Sign(float64 a) -{ - return float64_val(a) >> 63; -} - /* * Classify a floating point number. Everything above float_class_qnan * is a NaN so cls >= float_class_qnan is any NaN. @@ -469,6 +415,34 @@ typedef enum __attribute__ ((__packed__)) { float_class_snan, } FloatClass; +#define float_cmask(bit) (1u << (bit)) + +enum { + float_cmask_zero = float_cmask(float_class_zero), + float_cmask_normal = float_cmask(float_class_normal), + float_cmask_inf = float_cmask(float_class_inf), + float_cmask_qnan = float_cmask(float_class_qnan), + float_cmask_snan = float_cmask(float_class_snan), + + float_cmask_infzero = float_cmask_zero | float_cmask_inf, + float_cmask_anynan = float_cmask_qnan | float_cmask_snan, +}; + +/* Flags for parts_minmax. */ +enum { + /* Set for minimum; clear for maximum. */ + minmax_ismin = 1, + /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */ + minmax_isnum = 2, + /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */ + minmax_ismag = 4, + /* + * Set for the IEEE 754-2019 minimumNumber() and maximumNumber() + * operations. + */ + minmax_isnumber = 8, +}; + /* Simple helpers for checking if, or what kind of, NaN we have */ static inline __attribute__((unused)) bool is_nan(FloatClass c) { @@ -486,26 +460,52 @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c) } /* - * Structure holding all of the decomposed parts of a float. The - * exponent is unbiased and the fraction is normalized. All - * calculations are done with a 64 bit fraction and then rounded as - * appropriate for the final format. + * Structure holding all of the decomposed parts of a float. + * The exponent is unbiased and the fraction is normalized. * - * Thanks to the packed FloatClass a decent compiler should be able to - * fit the whole structure into registers and avoid using the stack - * for parameter passing. + * The fraction words are stored in big-endian word ordering, + * so that truncation from a larger format to a smaller format + * can be done simply by ignoring subsequent elements. */ typedef struct { - uint64_t frac; - int32_t exp; FloatClass cls; bool sign; -} FloatParts; + int32_t exp; + union { + /* Routines that know the structure may reference the singular name. */ + uint64_t frac; + /* + * Routines expanded with multiple structures reference "hi" and "lo" + * depending on the operation. In FloatParts64, "hi" and "lo" are + * both the same word and aliased here. + */ + uint64_t frac_hi; + uint64_t frac_lo; + }; +} FloatParts64; + +typedef struct { + FloatClass cls; + bool sign; + int32_t exp; + uint64_t frac_hi; + uint64_t frac_lo; +} FloatParts128; -#define DECOMPOSED_BINARY_POINT (64 - 2) +typedef struct { + FloatClass cls; + bool sign; + int32_t exp; + uint64_t frac_hi; + uint64_t frac_hm; /* high-middle */ + uint64_t frac_lm; /* low-middle */ + uint64_t frac_lo; +} FloatParts256; + +/* These apply to the most significant word of each FloatPartsN. */ +#define DECOMPOSED_BINARY_POINT 63 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) -#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1) /* Structure holding all of the relevant parameters for a format. * exp_size: the size of the exponent field @@ -514,9 +514,7 @@ typedef struct { * frac_size: the size of the fraction field * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT * The following are computed based the size of fraction - * frac_lsb: least significant bit of fraction - * frac_lsbm1: the bit below the least significant bit (for rounding) - * round_mask/roundeven_mask: masks used for rounding + * round_mask: bits below lsb which must be rounded * The following optional modifiers are available: * arm_althp: handle ARM Alternative Half Precision */ @@ -526,24 +524,21 @@ typedef struct { int exp_max; int frac_size; int frac_shift; - uint64_t frac_lsb; - uint64_t frac_lsbm1; - uint64_t round_mask; - uint64_t roundeven_mask; bool arm_althp; + uint64_t round_mask; } FloatFmt; /* Expand fields based on the size of exponent and fraction */ -#define FLOAT_PARAMS(E, F) \ - .exp_size = E, \ - .exp_bias = ((1 << E) - 1) >> 1, \ - .exp_max = (1 << E) - 1, \ - .frac_size = F, \ - .frac_shift = DECOMPOSED_BINARY_POINT - F, \ - .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \ - .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \ - .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \ - .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1 +#define FLOAT_PARAMS_(E) \ + .exp_size = E, \ + .exp_bias = ((1 << E) - 1) >> 1, \ + .exp_max = (1 << E) - 1 + +#define FLOAT_PARAMS(E, F) \ + FLOAT_PARAMS_(E), \ + .frac_size = F, \ + .frac_shift = (-F - 1) & 63, \ + .round_mask = (1ull << ((-F - 1) & 63)) - 1 static const FloatFmt float16_params = { FLOAT_PARAMS(5, 10) @@ -566,65 +561,123 @@ static const FloatFmt float64_params = { FLOAT_PARAMS(11, 52) }; +static const FloatFmt float128_params = { + FLOAT_PARAMS(15, 112) +}; + +#define FLOATX80_PARAMS(R) \ + FLOAT_PARAMS_(15), \ + .frac_size = R == 64 ? 63 : R, \ + .frac_shift = 0, \ + .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1 + +static const FloatFmt floatx80_params[3] = { + [floatx80_precision_s] = { FLOATX80_PARAMS(23) }, + [floatx80_precision_d] = { FLOATX80_PARAMS(52) }, + [floatx80_precision_x] = { FLOATX80_PARAMS(64) }, +}; + /* Unpack a float to parts, but do not canonicalize. */ -static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw) +static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw) { - const int sign_pos = fmt.frac_size + fmt.exp_size; + const int f_size = fmt->frac_size; + const int e_size = fmt->exp_size; - return (FloatParts) { + *r = (FloatParts64) { .cls = float_class_unclassified, - .sign = extract64(raw, sign_pos, 1), - .exp = extract64(raw, fmt.frac_size, fmt.exp_size), - .frac = extract64(raw, 0, fmt.frac_size), + .sign = extract64(raw, f_size + e_size, 1), + .exp = extract64(raw, f_size, e_size), + .frac = extract64(raw, 0, f_size) }; } -static inline FloatParts float16_unpack_raw(float16 f) +static inline void float16_unpack_raw(FloatParts64 *p, float16 f) +{ + unpack_raw64(p, &float16_params, f); +} + +static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) +{ + unpack_raw64(p, &bfloat16_params, f); +} + +static inline void float32_unpack_raw(FloatParts64 *p, float32 f) { - return unpack_raw(float16_params, f); + unpack_raw64(p, &float32_params, f); } -static inline FloatParts bfloat16_unpack_raw(bfloat16 f) +static inline void float64_unpack_raw(FloatParts64 *p, float64 f) { - return unpack_raw(bfloat16_params, f); + unpack_raw64(p, &float64_params, f); } -static inline FloatParts float32_unpack_raw(float32 f) +static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f) { - return unpack_raw(float32_params, f); + *p = (FloatParts128) { + .cls = float_class_unclassified, + .sign = extract32(f.high, 15, 1), + .exp = extract32(f.high, 0, 15), + .frac_hi = f.low + }; } -static inline FloatParts float64_unpack_raw(float64 f) +static void float128_unpack_raw(FloatParts128 *p, float128 f) { - return unpack_raw(float64_params, f); + const int f_size = float128_params.frac_size - 64; + const int e_size = float128_params.exp_size; + + *p = (FloatParts128) { + .cls = float_class_unclassified, + .sign = extract64(f.high, f_size + e_size, 1), + .exp = extract64(f.high, f_size, e_size), + .frac_hi = extract64(f.high, 0, f_size), + .frac_lo = f.low, + }; } /* Pack a float from parts, but do not canonicalize. */ -static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p) +static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt) +{ + const int f_size = fmt->frac_size; + const int e_size = fmt->exp_size; + uint64_t ret; + + ret = (uint64_t)p->sign << (f_size + e_size); + ret = deposit64(ret, f_size, e_size, p->exp); + ret = deposit64(ret, 0, f_size, p->frac); + return ret; +} + +static inline float16 float16_pack_raw(const FloatParts64 *p) { - const int sign_pos = fmt.frac_size + fmt.exp_size; - uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp); - return deposit64(ret, sign_pos, 1, p.sign); + return make_float16(pack_raw64(p, &float16_params)); } -static inline float16 float16_pack_raw(FloatParts p) +static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p) { - return make_float16(pack_raw(float16_params, p)); + return pack_raw64(p, &bfloat16_params); } -static inline bfloat16 bfloat16_pack_raw(FloatParts p) +static inline float32 float32_pack_raw(const FloatParts64 *p) { - return pack_raw(bfloat16_params, p); + return make_float32(pack_raw64(p, &float32_params)); } -static inline float32 float32_pack_raw(FloatParts p) +static inline float64 float64_pack_raw(const FloatParts64 *p) { - return make_float32(pack_raw(float32_params, p)); + return make_float64(pack_raw64(p, &float64_params)); } -static inline float64 float64_pack_raw(FloatParts p) +static float128 float128_pack_raw(const FloatParts128 *p) { - return make_float64(pack_raw(float64_params, p)); + const int f_size = float128_params.frac_size - 64; + const int e_size = float128_params.exp_size; + uint64_t hi; + + hi = (uint64_t)p->sign << (f_size + e_size); + hi = deposit64(hi, f_size, e_size, p->exp); + hi = deposit64(hi, 0, f_size, p->frac_hi); + return make_float128(hi, p->frac_lo); } /*---------------------------------------------------------------------------- @@ -637,7003 +690,4175 @@ static inline float64 float64_pack_raw(FloatParts p) *----------------------------------------------------------------------------*/ #include "softfloat-specialize.c.inc" -/* Canonicalize EXP and FRAC, setting CLS. */ -static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm, - float_status *status) -{ - if (part.exp == parm->exp_max && !parm->arm_althp) { - if (part.frac == 0) { - part.cls = float_class_inf; - } else { - part.frac <<= parm->frac_shift; - part.cls = (parts_is_snan_frac(part.frac, status) - ? float_class_snan : float_class_qnan); - } - } else if (part.exp == 0) { - if (likely(part.frac == 0)) { - part.cls = float_class_zero; - } else if (status->flush_inputs_to_zero) { - float_raise(float_flag_input_denormal, status); - part.cls = float_class_zero; - part.frac = 0; - } else { - int shift = clz64(part.frac) - 1; - part.cls = float_class_normal; - part.exp = parm->frac_shift - parm->exp_bias - shift + 1; - part.frac <<= shift; - } - } else { - part.cls = float_class_normal; - part.exp -= parm->exp_bias; - part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift); - } - return part; -} +#define PARTS_GENERIC_64_128(NAME, P) \ + _Generic((P), FloatParts64 *: parts64_##NAME, \ + FloatParts128 *: parts128_##NAME) -/* Round and uncanonicalize a floating-point number by parts. There - * are FRAC_SHIFT bits that may require rounding at the bottom of the - * fraction; these bits will be removed. The exponent will be biased - * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. - */ +#define PARTS_GENERIC_64_128_256(NAME, P) \ + _Generic((P), FloatParts64 *: parts64_##NAME, \ + FloatParts128 *: parts128_##NAME, \ + FloatParts256 *: parts256_##NAME) -static FloatParts round_canonical(FloatParts p, float_status *s, - const FloatFmt *parm) -{ - const uint64_t frac_lsb = parm->frac_lsb; - const uint64_t frac_lsbm1 = parm->frac_lsbm1; - const uint64_t round_mask = parm->round_mask; - const uint64_t roundeven_mask = parm->roundeven_mask; - const int exp_max = parm->exp_max; - const int frac_shift = parm->frac_shift; - uint64_t frac, inc; - int exp, flags = 0; - bool overflow_norm; +#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S) +#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S) - frac = p.frac; - exp = p.exp; +static void parts64_return_nan(FloatParts64 *a, float_status *s); +static void parts128_return_nan(FloatParts128 *a, float_status *s); - switch (p.cls) { - case float_class_normal: - switch (s->float_rounding_mode) { - case float_round_nearest_even: - overflow_norm = false; - inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0); - break; - case float_round_ties_away: - overflow_norm = false; - inc = frac_lsbm1; - break; - case float_round_to_zero: - overflow_norm = true; - inc = 0; - break; - case float_round_up: - inc = p.sign ? 0 : round_mask; - overflow_norm = p.sign; - break; - case float_round_down: - inc = p.sign ? round_mask : 0; - overflow_norm = !p.sign; - break; - case float_round_to_odd: - overflow_norm = true; - inc = frac & frac_lsb ? 0 : round_mask; - break; - default: - g_assert_not_reached(); - } +#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S) - exp += parm->exp_bias; - if (likely(exp > 0)) { - if (frac & round_mask) { - flags |= float_flag_inexact; - frac += inc; - if (frac & DECOMPOSED_OVERFLOW_BIT) { - frac >>= 1; - exp++; - } - } - frac >>= frac_shift; - - if (parm->arm_althp) { - /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ - if (unlikely(exp > exp_max)) { - /* Overflow. Return the maximum normal. */ - flags = float_flag_invalid; - exp = exp_max; - frac = -1; - } - } else if (unlikely(exp >= exp_max)) { - flags |= float_flag_overflow | float_flag_inexact; - if (overflow_norm) { - exp = exp_max - 1; - frac = -1; - } else { - p.cls = float_class_inf; - goto do_inf; - } - } - } else if (s->flush_to_zero) { - flags |= float_flag_output_denormal; - p.cls = float_class_zero; - goto do_zero; - } else { - bool is_tiny = s->tininess_before_rounding - || (exp < 0) - || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT); - - shift64RightJamming(frac, 1 - exp, &frac); - if (frac & round_mask) { - /* Need to recompute round-to-even. */ - switch (s->float_rounding_mode) { - case float_round_nearest_even: - inc = ((frac & roundeven_mask) != frac_lsbm1 - ? frac_lsbm1 : 0); - break; - case float_round_to_odd: - inc = frac & frac_lsb ? 0 : round_mask; - break; - default: - break; - } - flags |= float_flag_inexact; - frac += inc; - } +static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b, + float_status *s); +static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b, + float_status *s); - exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0); - frac >>= frac_shift; +#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S) - if (is_tiny && (flags & float_flag_inexact)) { - flags |= float_flag_underflow; - } - if (exp == 0 && frac == 0) { - p.cls = float_class_zero; - } - } - break; +static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b, + FloatParts64 *c, float_status *s, + int ab_mask, int abc_mask); +static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a, + FloatParts128 *b, + FloatParts128 *c, + float_status *s, + int ab_mask, int abc_mask); - case float_class_zero: - do_zero: - exp = 0; - frac = 0; - break; +#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \ + PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM) - case float_class_inf: - do_inf: - assert(!parm->arm_althp); - exp = exp_max; - frac = 0; - break; +static void parts64_canonicalize(FloatParts64 *p, float_status *status, + const FloatFmt *fmt); +static void parts128_canonicalize(FloatParts128 *p, float_status *status, + const FloatFmt *fmt); - case float_class_qnan: - case float_class_snan: - assert(!parm->arm_althp); - exp = exp_max; - frac >>= parm->frac_shift; - break; +#define parts_canonicalize(A, S, F) \ + PARTS_GENERIC_64_128(canonicalize, A)(A, S, F) - default: - g_assert_not_reached(); - } +static void parts64_uncanon_normal(FloatParts64 *p, float_status *status, + const FloatFmt *fmt); +static void parts128_uncanon_normal(FloatParts128 *p, float_status *status, + const FloatFmt *fmt); - float_raise(flags, s); - p.exp = exp; - p.frac = frac; - return p; -} +#define parts_uncanon_normal(A, S, F) \ + PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F) + +static void parts64_uncanon(FloatParts64 *p, float_status *status, + const FloatFmt *fmt); +static void parts128_uncanon(FloatParts128 *p, float_status *status, + const FloatFmt *fmt); + +#define parts_uncanon(A, S, F) \ + PARTS_GENERIC_64_128(uncanon, A)(A, S, F) + +static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b); +static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b); +static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b); + +#define parts_add_normal(A, B) \ + PARTS_GENERIC_64_128_256(add_normal, A)(A, B) -/* Explicit FloatFmt version */ -static FloatParts float16a_unpack_canonical(float16 f, float_status *s, - const FloatFmt *params) +static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b); +static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b); +static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b); + +#define parts_sub_normal(A, B) \ + PARTS_GENERIC_64_128_256(sub_normal, A)(A, B) + +static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b, + float_status *s, bool subtract); +static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b, + float_status *s, bool subtract); + +#define parts_addsub(A, B, S, Z) \ + PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z) + +static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b, + float_status *s); +static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b, + float_status *s); + +#define parts_mul(A, B, S) \ + PARTS_GENERIC_64_128(mul, A)(A, B, S) + +static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b, + FloatParts64 *c, int flags, + float_status *s); +static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b, + FloatParts128 *c, int flags, + float_status *s); + +#define parts_muladd(A, B, C, Z, S) \ + PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S) + +static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b, + float_status *s); +static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b, + float_status *s); + +#define parts_div(A, B, S) \ + PARTS_GENERIC_64_128(div, A)(A, B, S) + +static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b, + uint64_t *mod_quot, float_status *s); +static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b, + uint64_t *mod_quot, float_status *s); + +#define parts_modrem(A, B, Q, S) \ + PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S) + +static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f); +static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f); + +#define parts_sqrt(A, S, F) \ + PARTS_GENERIC_64_128(sqrt, A)(A, S, F) + +static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm, + int scale, int frac_size); +static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r, + int scale, int frac_size); + +#define parts_round_to_int_normal(A, R, C, F) \ + PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F) + +static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm, + int scale, float_status *s, + const FloatFmt *fmt); +static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r, + int scale, float_status *s, + const FloatFmt *fmt); + +#define parts_round_to_int(A, R, C, S, F) \ + PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F) + +static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode, + int scale, int64_t min, int64_t max, + float_status *s); +static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode, + int scale, int64_t min, int64_t max, + float_status *s); + +#define parts_float_to_sint(P, R, Z, MN, MX, S) \ + PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S) + +static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode, + int scale, uint64_t max, + float_status *s); +static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode, + int scale, uint64_t max, + float_status *s); + +#define parts_float_to_uint(P, R, Z, M, S) \ + PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S) + +static void parts64_sint_to_float(FloatParts64 *p, int64_t a, + int scale, float_status *s); +static void parts128_sint_to_float(FloatParts128 *p, int64_t a, + int scale, float_status *s); + +#define parts_sint_to_float(P, I, Z, S) \ + PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S) + +static void parts64_uint_to_float(FloatParts64 *p, uint64_t a, + int scale, float_status *s); +static void parts128_uint_to_float(FloatParts128 *p, uint64_t a, + int scale, float_status *s); + +#define parts_uint_to_float(P, I, Z, S) \ + PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S) + +static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b, + float_status *s, int flags); +static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b, + float_status *s, int flags); + +#define parts_minmax(A, B, S, F) \ + PARTS_GENERIC_64_128(minmax, A)(A, B, S, F) + +static int parts64_compare(FloatParts64 *a, FloatParts64 *b, + float_status *s, bool q); +static int parts128_compare(FloatParts128 *a, FloatParts128 *b, + float_status *s, bool q); + +#define parts_compare(A, B, S, Q) \ + PARTS_GENERIC_64_128(compare, A)(A, B, S, Q) + +static void parts64_scalbn(FloatParts64 *a, int n, float_status *s); +static void parts128_scalbn(FloatParts128 *a, int n, float_status *s); + +#define parts_scalbn(A, N, S) \ + PARTS_GENERIC_64_128(scalbn, A)(A, N, S) + +static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f); +static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f); + +#define parts_log2(A, S, F) \ + PARTS_GENERIC_64_128(log2, A)(A, S, F) + +/* + * Helper functions for softfloat-parts.c.inc, per-size operations. + */ + +#define FRAC_GENERIC_64_128(NAME, P) \ + _Generic((P), FloatParts64 *: frac64_##NAME, \ + FloatParts128 *: frac128_##NAME) + +#define FRAC_GENERIC_64_128_256(NAME, P) \ + _Generic((P), FloatParts64 *: frac64_##NAME, \ + FloatParts128 *: frac128_##NAME, \ + FloatParts256 *: frac256_##NAME) + +static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) { - return sf_canonicalize(float16_unpack_raw(f), params, s); + return uadd64_overflow(a->frac, b->frac, &r->frac); } -static FloatParts float16_unpack_canonical(float16 f, float_status *s) +static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) { - return float16a_unpack_canonical(f, s, &float16_params); + bool c = 0; + r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); + r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); + return c; } -static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s) +static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) { - return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s); + bool c = 0; + r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c); + r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c); + r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c); + r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c); + return c; } -static float16 float16a_round_pack_canonical(FloatParts p, float_status *s, - const FloatFmt *params) +#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B) + +static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c) { - return float16_pack_raw(round_canonical(p, s, params)); + return uadd64_overflow(a->frac, c, &r->frac); } -static float16 float16_round_pack_canonical(FloatParts p, float_status *s) +static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c) { - return float16a_round_pack_canonical(p, s, &float16_params); + c = uadd64_overflow(a->frac_lo, c, &r->frac_lo); + return uadd64_overflow(a->frac_hi, c, &r->frac_hi); } -static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s) +#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C) + +static void frac64_allones(FloatParts64 *a) { - return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params)); + a->frac = -1; } -static FloatParts float32_unpack_canonical(float32 f, float_status *s) +static void frac128_allones(FloatParts128 *a) { - return sf_canonicalize(float32_unpack_raw(f), &float32_params, s); + a->frac_hi = a->frac_lo = -1; } -static float32 float32_round_pack_canonical(FloatParts p, float_status *s) +#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A) + +static int frac64_cmp(FloatParts64 *a, FloatParts64 *b) { - return float32_pack_raw(round_canonical(p, s, &float32_params)); + return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1; } -static FloatParts float64_unpack_canonical(float64 f, float_status *s) +static int frac128_cmp(FloatParts128 *a, FloatParts128 *b) { - return sf_canonicalize(float64_unpack_raw(f), &float64_params, s); + uint64_t ta = a->frac_hi, tb = b->frac_hi; + if (ta == tb) { + ta = a->frac_lo, tb = b->frac_lo; + if (ta == tb) { + return 0; + } + } + return ta < tb ? -1 : 1; } -static float64 float64_round_pack_canonical(FloatParts p, float_status *s) +#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B) + +static void frac64_clear(FloatParts64 *a) { - return float64_pack_raw(round_canonical(p, s, &float64_params)); + a->frac = 0; } -static FloatParts return_nan(FloatParts a, float_status *s) +static void frac128_clear(FloatParts128 *a) { - switch (a.cls) { - case float_class_snan: - s->float_exception_flags |= float_flag_invalid; - a = parts_silence_nan(a, s); - /* fall through */ - case float_class_qnan: - if (s->default_nan_mode) { - return parts_default_nan(s); - } - break; - - default: - g_assert_not_reached(); - } - return a; + a->frac_hi = a->frac_lo = 0; } -static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s) +#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A) + +static bool frac64_div(FloatParts64 *a, FloatParts64 *b) { - if (is_snan(a.cls) || is_snan(b.cls)) { - s->float_exception_flags |= float_flag_invalid; - } + uint64_t n1, n0, r, q; + bool ret; - if (s->default_nan_mode) { - return parts_default_nan(s); + /* + * We want a 2*N / N-bit division to produce exactly an N-bit + * result, so that we do not lose any precision and so that we + * do not have to renormalize afterward. If A.frac < B.frac, + * then division would produce an (N-1)-bit result; shift A left + * by one to produce the an N-bit result, and return true to + * decrement the exponent to match. + * + * The udiv_qrnnd algorithm that we're using requires normalization, + * i.e. the msb of the denominator must be set, which is already true. + */ + ret = a->frac < b->frac; + if (ret) { + n0 = a->frac; + n1 = 0; } else { - if (pickNaN(a.cls, b.cls, - a.frac > b.frac || - (a.frac == b.frac && a.sign < b.sign), s)) { - a = b; - } - if (is_snan(a.cls)) { - return parts_silence_nan(a, s); - } + n0 = a->frac >> 1; + n1 = a->frac << 63; } - return a; + q = udiv_qrnnd(&r, n0, n1, b->frac); + + /* Set lsb if there is a remainder, to set inexact. */ + a->frac = q | (r != 0); + + return ret; } -static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, - bool inf_zero, float_status *s) +static bool frac128_div(FloatParts128 *a, FloatParts128 *b) { - int which; - - if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) { - s->float_exception_flags |= float_flag_invalid; - } + uint64_t q0, q1, a0, a1, b0, b1; + uint64_t r0, r1, r2, r3, t0, t1, t2, t3; + bool ret = false; - which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s); + a0 = a->frac_hi, a1 = a->frac_lo; + b0 = b->frac_hi, b1 = b->frac_lo; - if (s->default_nan_mode) { - /* Note that this check is after pickNaNMulAdd so that function - * has an opportunity to set the Invalid flag. - */ - which = 3; + ret = lt128(a0, a1, b0, b1); + if (!ret) { + a1 = shr_double(a0, a1, 1); + a0 = a0 >> 1; } - switch (which) { - case 0: - break; - case 1: - a = b; - break; - case 2: - a = c; - break; - case 3: - return parts_default_nan(s); - default: - g_assert_not_reached(); - } + /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */ + q0 = estimateDiv128To64(a0, a1, b0); - if (is_snan(a.cls)) { - return parts_silence_nan(a, s); + /* + * Estimate is high because B1 was not included (unless B1 == 0). + * Reduce quotient and increase remainder until remainder is non-negative. + * This loop will execute 0 to 2 times. + */ + mul128By64To192(b0, b1, q0, &t0, &t1, &t2); + sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2); + while (r0 != 0) { + q0--; + add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2); } - return a; -} -/* - * Returns the result of adding or subtracting the values of the - * floating-point values `a' and `b'. The operation is performed - * according to the IEC/IEEE Standard for Binary Floating-Point - * Arithmetic. - */ + /* Repeat using the remainder, producing a second word of quotient. */ + q1 = estimateDiv128To64(r1, r2, b0); + mul128By64To192(b0, b1, q1, &t1, &t2, &t3); + sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3); + while (r1 != 0) { + q1--; + add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3); + } -static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract, - float_status *s) -{ - bool a_sign = a.sign; - bool b_sign = b.sign ^ subtract; - - if (a_sign != b_sign) { - /* Subtraction */ - - if (a.cls == float_class_normal && b.cls == float_class_normal) { - if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) { - shift64RightJamming(b.frac, a.exp - b.exp, &b.frac); - a.frac = a.frac - b.frac; - } else { - shift64RightJamming(a.frac, b.exp - a.exp, &a.frac); - a.frac = b.frac - a.frac; - a.exp = b.exp; - a_sign ^= 1; - } + /* Any remainder indicates inexact; set sticky bit. */ + q1 |= (r2 | r3) != 0; - if (a.frac == 0) { - a.cls = float_class_zero; - a.sign = s->float_rounding_mode == float_round_down; - } else { - int shift = clz64(a.frac) - 1; - a.frac = a.frac << shift; - a.exp = a.exp - shift; - a.sign = a_sign; - } - return a; - } - if (is_nan(a.cls) || is_nan(b.cls)) { - return pick_nan(a, b, s); - } - if (a.cls == float_class_inf) { - if (b.cls == float_class_inf) { - float_raise(float_flag_invalid, s); - return parts_default_nan(s); - } - return a; - } - if (a.cls == float_class_zero && b.cls == float_class_zero) { - a.sign = s->float_rounding_mode == float_round_down; - return a; - } - if (a.cls == float_class_zero || b.cls == float_class_inf) { - b.sign = a_sign ^ 1; - return b; - } - if (b.cls == float_class_zero) { - return a; - } - } else { - /* Addition */ - if (a.cls == float_class_normal && b.cls == float_class_normal) { - if (a.exp > b.exp) { - shift64RightJamming(b.frac, a.exp - b.exp, &b.frac); - } else if (a.exp < b.exp) { - shift64RightJamming(a.frac, b.exp - a.exp, &a.frac); - a.exp = b.exp; - } - a.frac += b.frac; - if (a.frac & DECOMPOSED_OVERFLOW_BIT) { - shift64RightJamming(a.frac, 1, &a.frac); - a.exp += 1; - } - return a; - } - if (is_nan(a.cls) || is_nan(b.cls)) { - return pick_nan(a, b, s); - } - if (a.cls == float_class_inf || b.cls == float_class_zero) { - return a; - } - if (b.cls == float_class_inf || a.cls == float_class_zero) { - b.sign = b_sign; - return b; - } - } - g_assert_not_reached(); + a->frac_hi = q0; + a->frac_lo = q1; + return ret; } -/* - * Returns the result of adding or subtracting the floating-point - * values `a' and `b'. The operation is performed according to the - * IEC/IEEE Standard for Binary Floating-Point Arithmetic. - */ +#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B) -float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status) +static bool frac64_eqz(FloatParts64 *a) { - FloatParts pa = float16_unpack_canonical(a, status); - FloatParts pb = float16_unpack_canonical(b, status); - FloatParts pr = addsub_floats(pa, pb, false, status); - - return float16_round_pack_canonical(pr, status); + return a->frac == 0; } -float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status) +static bool frac128_eqz(FloatParts128 *a) { - FloatParts pa = float16_unpack_canonical(a, status); - FloatParts pb = float16_unpack_canonical(b, status); - FloatParts pr = addsub_floats(pa, pb, true, status); - - return float16_round_pack_canonical(pr, status); + return (a->frac_hi | a->frac_lo) == 0; } -static float32 QEMU_SOFTFLOAT_ATTR -soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status) -{ - FloatParts pa = float32_unpack_canonical(a, status); - FloatParts pb = float32_unpack_canonical(b, status); - FloatParts pr = addsub_floats(pa, pb, subtract, status); - - return float32_round_pack_canonical(pr, status); -} +#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A) -static inline float32 soft_f32_add(float32 a, float32 b, float_status *status) +static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b) { - return soft_f32_addsub(a, b, false, status); + mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac); } -static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status) +static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b) { - return soft_f32_addsub(a, b, true, status); + mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo, + &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo); } -static float64 QEMU_SOFTFLOAT_ATTR -soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status) -{ - FloatParts pa = float64_unpack_canonical(a, status); - FloatParts pb = float64_unpack_canonical(b, status); - FloatParts pr = addsub_floats(pa, pb, subtract, status); +#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B) - return float64_round_pack_canonical(pr, status); +static void frac64_neg(FloatParts64 *a) +{ + a->frac = -a->frac; } -static inline float64 soft_f64_add(float64 a, float64 b, float_status *status) +static void frac128_neg(FloatParts128 *a) { - return soft_f64_addsub(a, b, false, status); + bool c = 0; + a->frac_lo = usub64_borrow(0, a->frac_lo, &c); + a->frac_hi = usub64_borrow(0, a->frac_hi, &c); } -static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status) +static void frac256_neg(FloatParts256 *a) { - return soft_f64_addsub(a, b, true, status); + bool c = 0; + a->frac_lo = usub64_borrow(0, a->frac_lo, &c); + a->frac_lm = usub64_borrow(0, a->frac_lm, &c); + a->frac_hm = usub64_borrow(0, a->frac_hm, &c); + a->frac_hi = usub64_borrow(0, a->frac_hi, &c); } -static float hard_f32_add(float a, float b) +#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A) + +static int frac64_normalize(FloatParts64 *a) { - return a + b; + if (a->frac) { + int shift = clz64(a->frac); + a->frac <<= shift; + return shift; + } + return 64; } -static float hard_f32_sub(float a, float b) +static int frac128_normalize(FloatParts128 *a) { - return a - b; + if (a->frac_hi) { + int shl = clz64(a->frac_hi); + a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl); + a->frac_lo <<= shl; + return shl; + } else if (a->frac_lo) { + int shl = clz64(a->frac_lo); + a->frac_hi = a->frac_lo << shl; + a->frac_lo = 0; + return shl + 64; + } + return 128; } -static double hard_f64_add(double a, double b) +static int frac256_normalize(FloatParts256 *a) { - return a + b; -} + uint64_t a0 = a->frac_hi, a1 = a->frac_hm; + uint64_t a2 = a->frac_lm, a3 = a->frac_lo; + int ret, shl; -static double hard_f64_sub(double a, double b) -{ - return a - b; + if (likely(a0)) { + shl = clz64(a0); + if (shl == 0) { + return 0; + } + ret = shl; + } else { + if (a1) { + ret = 64; + a0 = a1, a1 = a2, a2 = a3, a3 = 0; + } else if (a2) { + ret = 128; + a0 = a2, a1 = a3, a2 = 0, a3 = 0; + } else if (a3) { + ret = 192; + a0 = a3, a1 = 0, a2 = 0, a3 = 0; + } else { + ret = 256; + a0 = 0, a1 = 0, a2 = 0, a3 = 0; + goto done; + } + shl = clz64(a0); + if (shl == 0) { + goto done; + } + ret += shl; + } + + a0 = shl_double(a0, a1, shl); + a1 = shl_double(a1, a2, shl); + a2 = shl_double(a2, a3, shl); + a3 <<= shl; + + done: + a->frac_hi = a0; + a->frac_hm = a1; + a->frac_lm = a2; + a->frac_lo = a3; + return ret; } -static bool f32_addsubmul_post(union_float32 a, union_float32 b) +#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A) + +static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot) { - if (QEMU_HARDFLOAT_2F32_USE_FP) { - return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); + uint64_t a0, a1, b0, t0, t1, q, quot; + int exp_diff = a->exp - b->exp; + int shift; + + a0 = a->frac; + a1 = 0; + + if (exp_diff < -1) { + if (mod_quot) { + *mod_quot = 0; + } + return; + } + if (exp_diff == -1) { + a0 >>= 1; + exp_diff = 0; } - return !(float32_is_zero(a.s) && float32_is_zero(b.s)); -} -static bool f64_addsubmul_post(union_float64 a, union_float64 b) -{ - if (QEMU_HARDFLOAT_2F64_USE_FP) { - return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); + b0 = b->frac; + quot = q = b0 <= a0; + if (q) { + a0 -= b0; + } + + exp_diff -= 64; + while (exp_diff > 0) { + q = estimateDiv128To64(a0, a1, b0); + q = q > 2 ? q - 2 : 0; + mul64To128(b0, q, &t0, &t1); + sub128(a0, a1, t0, t1, &a0, &a1); + shortShift128Left(a0, a1, 62, &a0, &a1); + exp_diff -= 62; + quot = (quot << 62) + q; + } + + exp_diff += 64; + if (exp_diff > 0) { + q = estimateDiv128To64(a0, a1, b0); + q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0; + mul64To128(b0, q << (64 - exp_diff), &t0, &t1); + sub128(a0, a1, t0, t1, &a0, &a1); + shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1); + while (le128(t0, t1, a0, a1)) { + ++q; + sub128(a0, a1, t0, t1, &a0, &a1); + } + quot = (exp_diff < 64 ? quot << exp_diff : 0) + q; } else { - return !(float64_is_zero(a.s) && float64_is_zero(b.s)); + t0 = b0; + t1 = 0; } -} -static float32 float32_addsub(float32 a, float32 b, float_status *s, - hard_f32_op2_fn hard, soft_f32_op2_fn soft) -{ - return float32_gen2(a, b, s, hard, soft, - f32_is_zon2, f32_addsubmul_post); -} + if (mod_quot) { + *mod_quot = quot; + } else { + sub128(t0, t1, a0, a1, &t0, &t1); + if (lt128(t0, t1, a0, a1) || + (eq128(t0, t1, a0, a1) && (q & 1))) { + a0 = t0; + a1 = t1; + a->sign = !a->sign; + } + } + + if (likely(a0)) { + shift = clz64(a0); + shortShift128Left(a0, a1, shift, &a0, &a1); + } else if (likely(a1)) { + shift = clz64(a1); + a0 = a1 << shift; + a1 = 0; + shift += 64; + } else { + a->cls = float_class_zero; + return; + } -static float64 float64_addsub(float64 a, float64 b, float_status *s, - hard_f64_op2_fn hard, soft_f64_op2_fn soft) -{ - return float64_gen2(a, b, s, hard, soft, - f64_is_zon2, f64_addsubmul_post); + a->exp = b->exp + exp_diff - shift; + a->frac = a0 | (a1 != 0); } -float32 QEMU_FLATTEN -float32_add(float32 a, float32 b, float_status *s) +static void frac128_modrem(FloatParts128 *a, FloatParts128 *b, + uint64_t *mod_quot) { - return float32_addsub(a, b, s, hard_f32_add, soft_f32_add); + uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot; + int exp_diff = a->exp - b->exp; + int shift; + + a0 = a->frac_hi; + a1 = a->frac_lo; + a2 = 0; + + if (exp_diff < -1) { + if (mod_quot) { + *mod_quot = 0; + } + return; + } + if (exp_diff == -1) { + shift128Right(a0, a1, 1, &a0, &a1); + exp_diff = 0; + } + + b0 = b->frac_hi; + b1 = b->frac_lo; + + quot = q = le128(b0, b1, a0, a1); + if (q) { + sub128(a0, a1, b0, b1, &a0, &a1); + } + + exp_diff -= 64; + while (exp_diff > 0) { + q = estimateDiv128To64(a0, a1, b0); + q = q > 4 ? q - 4 : 0; + mul128By64To192(b0, b1, q, &t0, &t1, &t2); + sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2); + shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2); + exp_diff -= 61; + quot = (quot << 61) + q; + } + + exp_diff += 64; + if (exp_diff > 0) { + q = estimateDiv128To64(a0, a1, b0); + q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0; + mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2); + sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2); + shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2); + while (le192(t0, t1, t2, a0, a1, a2)) { + ++q; + sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2); + } + quot = (exp_diff < 64 ? quot << exp_diff : 0) + q; + } else { + t0 = b0; + t1 = b1; + t2 = 0; + } + + if (mod_quot) { + *mod_quot = quot; + } else { + sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2); + if (lt192(t0, t1, t2, a0, a1, a2) || + (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) { + a0 = t0; + a1 = t1; + a2 = t2; + a->sign = !a->sign; + } + } + + if (likely(a0)) { + shift = clz64(a0); + shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2); + } else if (likely(a1)) { + shift = clz64(a1); + shortShift128Left(a1, a2, shift, &a0, &a1); + a2 = 0; + shift += 64; + } else if (likely(a2)) { + shift = clz64(a2); + a0 = a2 << shift; + a1 = a2 = 0; + shift += 128; + } else { + a->cls = float_class_zero; + return; + } + + a->exp = b->exp + exp_diff - shift; + a->frac_hi = a0; + a->frac_lo = a1 | (a2 != 0); } -float32 QEMU_FLATTEN -float32_sub(float32 a, float32 b, float_status *s) +#define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q) + +static void frac64_shl(FloatParts64 *a, int c) { - return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub); + a->frac <<= c; } -float64 QEMU_FLATTEN -float64_add(float64 a, float64 b, float_status *s) +static void frac128_shl(FloatParts128 *a, int c) { - return float64_addsub(a, b, s, hard_f64_add, soft_f64_add); + uint64_t a0 = a->frac_hi, a1 = a->frac_lo; + + if (c & 64) { + a0 = a1, a1 = 0; + } + + c &= 63; + if (c) { + a0 = shl_double(a0, a1, c); + a1 = a1 << c; + } + + a->frac_hi = a0; + a->frac_lo = a1; } -float64 QEMU_FLATTEN -float64_sub(float64 a, float64 b, float_status *s) +#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C) + +static void frac64_shr(FloatParts64 *a, int c) { - return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub); + a->frac >>= c; } -/* - * Returns the result of adding or subtracting the bfloat16 - * values `a' and `b'. - */ -bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status) +static void frac128_shr(FloatParts128 *a, int c) { - FloatParts pa = bfloat16_unpack_canonical(a, status); - FloatParts pb = bfloat16_unpack_canonical(b, status); - FloatParts pr = addsub_floats(pa, pb, false, status); + uint64_t a0 = a->frac_hi, a1 = a->frac_lo; - return bfloat16_round_pack_canonical(pr, status); -} + if (c & 64) { + a1 = a0, a0 = 0; + } -bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status) -{ - FloatParts pa = bfloat16_unpack_canonical(a, status); - FloatParts pb = bfloat16_unpack_canonical(b, status); - FloatParts pr = addsub_floats(pa, pb, true, status); + c &= 63; + if (c) { + a1 = shr_double(a0, a1, c); + a0 = a0 >> c; + } - return bfloat16_round_pack_canonical(pr, status); + a->frac_hi = a0; + a->frac_lo = a1; } -/* - * Returns the result of multiplying the floating-point values `a' and - * `b'. The operation is performed according to the IEC/IEEE Standard - * for Binary Floating-Point Arithmetic. - */ +#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C) -static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s) +static void frac64_shrjam(FloatParts64 *a, int c) { - bool sign = a.sign ^ b.sign; + uint64_t a0 = a->frac; - if (a.cls == float_class_normal && b.cls == float_class_normal) { - uint64_t hi, lo; - int exp = a.exp + b.exp; - - mul64To128(a.frac, b.frac, &hi, &lo); - shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo); - if (lo & DECOMPOSED_OVERFLOW_BIT) { - shift64RightJamming(lo, 1, &lo); - exp += 1; + if (likely(c != 0)) { + if (likely(c < 64)) { + a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0); + } else { + a0 = a0 != 0; } - - /* Re-use a */ - a.exp = exp; - a.sign = sign; - a.frac = lo; - return a; - } - /* handle all the NaN cases */ - if (is_nan(a.cls) || is_nan(b.cls)) { - return pick_nan(a, b, s); - } - /* Inf * Zero == NaN */ - if ((a.cls == float_class_inf && b.cls == float_class_zero) || - (a.cls == float_class_zero && b.cls == float_class_inf)) { - s->float_exception_flags |= float_flag_invalid; - return parts_default_nan(s); - } - /* Multiply by 0 or Inf */ - if (a.cls == float_class_inf || a.cls == float_class_zero) { - a.sign = sign; - return a; + a->frac = a0; } - if (b.cls == float_class_inf || b.cls == float_class_zero) { - b.sign = sign; - return b; - } - g_assert_not_reached(); } -float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status) +static void frac128_shrjam(FloatParts128 *a, int c) { - FloatParts pa = float16_unpack_canonical(a, status); - FloatParts pb = float16_unpack_canonical(b, status); - FloatParts pr = mul_floats(pa, pb, status); + uint64_t a0 = a->frac_hi, a1 = a->frac_lo; + uint64_t sticky = 0; - return float16_round_pack_canonical(pr, status); + if (unlikely(c == 0)) { + return; + } else if (likely(c < 64)) { + /* nothing */ + } else if (likely(c < 128)) { + sticky = a1; + a1 = a0; + a0 = 0; + c &= 63; + if (c == 0) { + goto done; + } + } else { + sticky = a0 | a1; + a0 = a1 = 0; + goto done; + } + + sticky |= shr_double(a1, 0, c); + a1 = shr_double(a0, a1, c); + a0 = a0 >> c; + + done: + a->frac_lo = a1 | (sticky != 0); + a->frac_hi = a0; } -static float32 QEMU_SOFTFLOAT_ATTR -soft_f32_mul(float32 a, float32 b, float_status *status) +static void frac256_shrjam(FloatParts256 *a, int c) { - FloatParts pa = float32_unpack_canonical(a, status); - FloatParts pb = float32_unpack_canonical(b, status); - FloatParts pr = mul_floats(pa, pb, status); + uint64_t a0 = a->frac_hi, a1 = a->frac_hm; + uint64_t a2 = a->frac_lm, a3 = a->frac_lo; + uint64_t sticky = 0; - return float32_round_pack_canonical(pr, status); + if (unlikely(c == 0)) { + return; + } else if (likely(c < 64)) { + /* nothing */ + } else if (likely(c < 256)) { + if (unlikely(c & 128)) { + sticky |= a2 | a3; + a3 = a1, a2 = a0, a1 = 0, a0 = 0; + } + if (unlikely(c & 64)) { + sticky |= a3; + a3 = a2, a2 = a1, a1 = a0, a0 = 0; + } + c &= 63; + if (c == 0) { + goto done; + } + } else { + sticky = a0 | a1 | a2 | a3; + a0 = a1 = a2 = a3 = 0; + goto done; + } + + sticky |= shr_double(a3, 0, c); + a3 = shr_double(a2, a3, c); + a2 = shr_double(a1, a2, c); + a1 = shr_double(a0, a1, c); + a0 = a0 >> c; + + done: + a->frac_lo = a3 | (sticky != 0); + a->frac_lm = a2; + a->frac_hm = a1; + a->frac_hi = a0; } -static float64 QEMU_SOFTFLOAT_ATTR -soft_f64_mul(float64 a, float64 b, float_status *status) -{ - FloatParts pa = float64_unpack_canonical(a, status); - FloatParts pb = float64_unpack_canonical(b, status); - FloatParts pr = mul_floats(pa, pb, status); +#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C) - return float64_round_pack_canonical(pr, status); +static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b) +{ + return usub64_overflow(a->frac, b->frac, &r->frac); } -static float hard_f32_mul(float a, float b) +static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b) { - return a * b; + bool c = 0; + r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); + r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); + return c; } -static double hard_f64_mul(double a, double b) +static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b) { - return a * b; + bool c = 0; + r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c); + r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c); + r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c); + r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c); + return c; } -float32 QEMU_FLATTEN -float32_mul(float32 a, float32 b, float_status *s) +#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B) + +static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a) { - return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, - f32_is_zon2, f32_addsubmul_post); + r->frac = a->frac_hi | (a->frac_lo != 0); } -float64 QEMU_FLATTEN -float64_mul(float64 a, float64 b, float_status *s) +static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a) { - return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, - f64_is_zon2, f64_addsubmul_post); + r->frac_hi = a->frac_hi; + r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0); } -/* - * Returns the result of multiplying the bfloat16 - * values `a' and `b'. - */ +#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A) -bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status) +static void frac64_widen(FloatParts128 *r, FloatParts64 *a) { - FloatParts pa = bfloat16_unpack_canonical(a, status); - FloatParts pb = bfloat16_unpack_canonical(b, status); - FloatParts pr = mul_floats(pa, pb, status); + r->frac_hi = a->frac; + r->frac_lo = 0; +} - return bfloat16_round_pack_canonical(pr, status); +static void frac128_widen(FloatParts256 *r, FloatParts128 *a) +{ + r->frac_hi = a->frac_hi; + r->frac_hm = a->frac_lo; + r->frac_lm = 0; + r->frac_lo = 0; } +#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B) + /* - * Returns the result of multiplying the floating-point values `a' and - * `b' then adding 'c', with no intermediate rounding step after the - * multiplication. The operation is performed according to the - * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. - * The flags argument allows the caller to select negation of the - * addend, the intermediate product, or the final result. (The - * difference between this and having the caller do a separate - * negation is that negating externally will flip the sign bit on - * NaNs.) + * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa. + * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c + * and thus MIT licenced. */ +static const uint16_t rsqrt_tab[128] = { + 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, + 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b, + 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1, + 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, + 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59, + 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925, + 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, + 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040, + 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234, + 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, + 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, + 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192, + 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, + 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, + 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59, + 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560, +}; -static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c, - int flags, float_status *s) -{ - bool inf_zero = ((1 << a.cls) | (1 << b.cls)) == - ((1 << float_class_inf) | (1 << float_class_zero)); - bool p_sign; - bool sign_flip = flags & float_muladd_negate_result; - FloatClass p_class; - uint64_t hi, lo; - int p_exp; - - /* It is implementation-defined whether the cases of (0,inf,qnan) - * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN - * they return if they do), so we have to hand this information - * off to the target-specific pick-a-NaN routine. - */ - if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) { - return pick_nan_muladd(a, b, c, inf_zero, s); - } - - if (inf_zero) { - s->float_exception_flags |= float_flag_invalid; - return parts_default_nan(s); - } - - if (flags & float_muladd_negate_c) { - c.sign ^= 1; - } - - p_sign = a.sign ^ b.sign; - - if (flags & float_muladd_negate_product) { - p_sign ^= 1; - } - - if (a.cls == float_class_inf || b.cls == float_class_inf) { - p_class = float_class_inf; - } else if (a.cls == float_class_zero || b.cls == float_class_zero) { - p_class = float_class_zero; - } else { - p_class = float_class_normal; - } - - if (c.cls == float_class_inf) { - if (p_class == float_class_inf && p_sign != c.sign) { - s->float_exception_flags |= float_flag_invalid; - return parts_default_nan(s); - } else { - a.cls = float_class_inf; - a.sign = c.sign ^ sign_flip; - return a; - } - } - - if (p_class == float_class_inf) { - a.cls = float_class_inf; - a.sign = p_sign ^ sign_flip; - return a; - } - - if (p_class == float_class_zero) { - if (c.cls == float_class_zero) { - if (p_sign != c.sign) { - p_sign = s->float_rounding_mode == float_round_down; - } - c.sign = p_sign; - } else if (flags & float_muladd_halve_result) { - c.exp -= 1; - } - c.sign ^= sign_flip; - return c; - } +#define partsN(NAME) glue(glue(glue(parts,N),_),NAME) +#define FloatPartsN glue(FloatParts,N) +#define FloatPartsW glue(FloatParts,W) - /* a & b should be normals now... */ - assert(a.cls == float_class_normal && - b.cls == float_class_normal); +#define N 64 +#define W 128 - p_exp = a.exp + b.exp; +#include "softfloat-parts-addsub.c.inc" +#include "softfloat-parts.c.inc" - /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit - * result. - */ - mul64To128(a.frac, b.frac, &hi, &lo); - /* binary point now at bit 124 */ +#undef N +#undef W +#define N 128 +#define W 256 - /* check for overflow */ - if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) { - shift128RightJamming(hi, lo, 1, &hi, &lo); - p_exp += 1; - } +#include "softfloat-parts-addsub.c.inc" +#include "softfloat-parts.c.inc" - /* + add/sub */ - if (c.cls == float_class_zero) { - /* move binary point back to 62 */ - shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo); - } else { - int exp_diff = p_exp - c.exp; - if (p_sign == c.sign) { - /* Addition */ - if (exp_diff <= 0) { - shift128RightJamming(hi, lo, - DECOMPOSED_BINARY_POINT - exp_diff, - &hi, &lo); - lo += c.frac; - p_exp = c.exp; - } else { - uint64_t c_hi, c_lo; - /* shift c to the same binary point as the product (124) */ - c_hi = c.frac >> 2; - c_lo = 0; - shift128RightJamming(c_hi, c_lo, - exp_diff, - &c_hi, &c_lo); - add128(hi, lo, c_hi, c_lo, &hi, &lo); - /* move binary point back to 62 */ - shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo); - } +#undef N +#undef W +#define N 256 - if (lo & DECOMPOSED_OVERFLOW_BIT) { - shift64RightJamming(lo, 1, &lo); - p_exp += 1; - } +#include "softfloat-parts-addsub.c.inc" - } else { - /* Subtraction */ - uint64_t c_hi, c_lo; - /* make C binary point match product at bit 124 */ - c_hi = c.frac >> 2; - c_lo = 0; - - if (exp_diff <= 0) { - shift128RightJamming(hi, lo, -exp_diff, &hi, &lo); - if (exp_diff == 0 - && - (hi > c_hi || (hi == c_hi && lo >= c_lo))) { - sub128(hi, lo, c_hi, c_lo, &hi, &lo); - } else { - sub128(c_hi, c_lo, hi, lo, &hi, &lo); - p_sign ^= 1; - p_exp = c.exp; - } - } else { - shift128RightJamming(c_hi, c_lo, - exp_diff, - &c_hi, &c_lo); - sub128(hi, lo, c_hi, c_lo, &hi, &lo); - } +#undef N +#undef W +#undef partsN +#undef FloatPartsN +#undef FloatPartsW - if (hi == 0 && lo == 0) { - a.cls = float_class_zero; - a.sign = s->float_rounding_mode == float_round_down; - a.sign ^= sign_flip; - return a; - } else { - int shift; - if (hi != 0) { - shift = clz64(hi); - } else { - shift = clz64(lo) + 64; - } - /* Normalizing to a binary point of 124 is the - correct adjust for the exponent. However since we're - shifting, we might as well put the binary point back - at 62 where we really want it. Therefore shift as - if we're leaving 1 bit at the top of the word, but - adjust the exponent as if we're leaving 3 bits. */ - shift -= 1; - if (shift >= 64) { - lo = lo << (shift - 64); - } else { - hi = (hi << shift) | (lo >> (64 - shift)); - lo = hi | ((lo << shift) != 0); - } - p_exp -= shift - 2; - } - } - } +/* + * Pack/unpack routines with a specific FloatFmt. + */ - if (flags & float_muladd_halve_result) { - p_exp -= 1; - } +static void float16a_unpack_canonical(FloatParts64 *p, float16 f, + float_status *s, const FloatFmt *params) +{ + float16_unpack_raw(p, f); + parts_canonicalize(p, s, params); +} - /* finally prepare our result */ - a.cls = float_class_normal; - a.sign = p_sign ^ sign_flip; - a.exp = p_exp; - a.frac = lo; +static void float16_unpack_canonical(FloatParts64 *p, float16 f, + float_status *s) +{ + float16a_unpack_canonical(p, f, s, &float16_params); +} - return a; +static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f, + float_status *s) +{ + bfloat16_unpack_raw(p, f); + parts_canonicalize(p, s, &bfloat16_params); } -float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c, - int flags, float_status *status) +static float16 float16a_round_pack_canonical(FloatParts64 *p, + float_status *s, + const FloatFmt *params) { - FloatParts pa = float16_unpack_canonical(a, status); - FloatParts pb = float16_unpack_canonical(b, status); - FloatParts pc = float16_unpack_canonical(c, status); - FloatParts pr = muladd_floats(pa, pb, pc, flags, status); + parts_uncanon(p, s, params); + return float16_pack_raw(p); +} - return float16_round_pack_canonical(pr, status); +static float16 float16_round_pack_canonical(FloatParts64 *p, + float_status *s) +{ + return float16a_round_pack_canonical(p, s, &float16_params); } -static float32 QEMU_SOFTFLOAT_ATTR -soft_f32_muladd(float32 a, float32 b, float32 c, int flags, - float_status *status) +static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p, + float_status *s) { - FloatParts pa = float32_unpack_canonical(a, status); - FloatParts pb = float32_unpack_canonical(b, status); - FloatParts pc = float32_unpack_canonical(c, status); - FloatParts pr = muladd_floats(pa, pb, pc, flags, status); + parts_uncanon(p, s, &bfloat16_params); + return bfloat16_pack_raw(p); +} - return float32_round_pack_canonical(pr, status); +static void float32_unpack_canonical(FloatParts64 *p, float32 f, + float_status *s) +{ + float32_unpack_raw(p, f); + parts_canonicalize(p, s, &float32_params); } -static float64 QEMU_SOFTFLOAT_ATTR -soft_f64_muladd(float64 a, float64 b, float64 c, int flags, - float_status *status) +static float32 float32_round_pack_canonical(FloatParts64 *p, + float_status *s) { - FloatParts pa = float64_unpack_canonical(a, status); - FloatParts pb = float64_unpack_canonical(b, status); - FloatParts pc = float64_unpack_canonical(c, status); - FloatParts pr = muladd_floats(pa, pb, pc, flags, status); + parts_uncanon(p, s, &float32_params); + return float32_pack_raw(p); +} - return float64_round_pack_canonical(pr, status); +static void float64_unpack_canonical(FloatParts64 *p, float64 f, + float_status *s) +{ + float64_unpack_raw(p, f); + parts_canonicalize(p, s, &float64_params); } -static bool force_soft_fma; +static float64 float64_round_pack_canonical(FloatParts64 *p, + float_status *s) +{ + parts_uncanon(p, s, &float64_params); + return float64_pack_raw(p); +} -float32 QEMU_FLATTEN -float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) +static void float128_unpack_canonical(FloatParts128 *p, float128 f, + float_status *s) { - union_float32 ua, ub, uc, ur; + float128_unpack_raw(p, f); + parts_canonicalize(p, s, &float128_params); +} - ua.s = xa; - ub.s = xb; - uc.s = xc; +static float128 float128_round_pack_canonical(FloatParts128 *p, + float_status *s) +{ + parts_uncanon(p, s, &float128_params); + return float128_pack_raw(p); +} - if (unlikely(!can_use_fpu(s))) { - goto soft; - } - if (unlikely(flags & float_muladd_halve_result)) { - goto soft; +/* Returns false if the encoding is invalid. */ +static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f, + float_status *s) +{ + /* Ensure rounding precision is set before beginning. */ + switch (s->floatx80_rounding_precision) { + case floatx80_precision_x: + case floatx80_precision_d: + case floatx80_precision_s: + break; + default: + g_assert_not_reached(); } - float32_input_flush3(&ua.s, &ub.s, &uc.s, s); - if (unlikely(!f32_is_zon3(ua, ub, uc))) { - goto soft; + if (unlikely(floatx80_invalid_encoding(f))) { + float_raise(float_flag_invalid, s); + return false; } - if (unlikely(force_soft_fma)) { - goto soft; + floatx80_unpack_raw(p, f); + + if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) { + parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]); + } else { + /* The explicit integer bit is ignored, after invalid checks. */ + p->frac_hi &= MAKE_64BIT_MASK(0, 63); + p->cls = (p->frac_hi == 0 ? float_class_inf + : parts_is_snan_frac(p->frac_hi, s) + ? float_class_snan : float_class_qnan); } + return true; +} - /* - * When (a || b) == 0, there's no need to check for under/over flow, - * since we know the addend is (normal || 0) and the product is 0. - */ - if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) { - union_float32 up; - bool prod_sign; - - prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s); - prod_sign ^= !!(flags & float_muladd_negate_product); - up.s = float32_set_sign(float32_zero, prod_sign); +static floatx80 floatx80_round_pack_canonical(FloatParts128 *p, + float_status *s) +{ + const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision]; + uint64_t frac; + int exp; - if (flags & float_muladd_negate_c) { - uc.h = -uc.h; - } - ur.h = up.h + uc.h; - } else { - union_float32 ua_orig = ua; - union_float32 uc_orig = uc; + switch (p->cls) { + case float_class_normal: + if (s->floatx80_rounding_precision == floatx80_precision_x) { + parts_uncanon_normal(p, s, fmt); + frac = p->frac_hi; + exp = p->exp; + } else { + FloatParts64 p64; - if (flags & float_muladd_negate_product) { - ua.h = -ua.h; + p64.sign = p->sign; + p64.exp = p->exp; + frac_truncjam(&p64, p); + parts_uncanon_normal(&p64, s, fmt); + frac = p64.frac; + exp = p64.exp; } - if (flags & float_muladd_negate_c) { - uc.h = -uc.h; - } - - ur.h = fmaf(ua.h, ub.h, uc.h); - - if (unlikely(f32_is_inf(ur))) { - s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { - ua = ua_orig; - uc = uc_orig; - goto soft; + if (exp != fmt->exp_max) { + break; } - } - if (flags & float_muladd_negate_result) { - return float32_chs(ur.s); - } - return ur.s; - - soft: - return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s); -} - -float64 QEMU_FLATTEN -float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) -{ - union_float64 ua, ub, uc, ur; + /* rounded to inf -- fall through to set frac correctly */ - ua.s = xa; - ub.s = xb; - uc.s = xc; + case float_class_inf: + /* x86 and m68k differ in the setting of the integer bit. */ + frac = floatx80_infinity_low; + exp = fmt->exp_max; + break; - if (unlikely(!can_use_fpu(s))) { - goto soft; - } - if (unlikely(flags & float_muladd_halve_result)) { - goto soft; - } + case float_class_zero: + frac = 0; + exp = 0; + break; - float64_input_flush3(&ua.s, &ub.s, &uc.s, s); - if (unlikely(!f64_is_zon3(ua, ub, uc))) { - goto soft; - } + case float_class_snan: + case float_class_qnan: + /* NaNs have the integer bit set. */ + frac = p->frac_hi | (1ull << 63); + exp = fmt->exp_max; + break; - if (unlikely(force_soft_fma)) { - goto soft; + default: + g_assert_not_reached(); } - /* - * When (a || b) == 0, there's no need to check for under/over flow, - * since we know the addend is (normal || 0) and the product is 0. - */ - if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) { - union_float64 up; - bool prod_sign; - - prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s); - prod_sign ^= !!(flags & float_muladd_negate_product); - up.s = float64_set_sign(float64_zero, prod_sign); + return packFloatx80(p->sign, exp, frac); +} - if (flags & float_muladd_negate_c) { - uc.h = -uc.h; - } - ur.h = up.h + uc.h; - } else { - union_float64 ua_orig = ua; - union_float64 uc_orig = uc; +/* + * Addition and subtraction + */ - if (flags & float_muladd_negate_product) { - ua.h = -ua.h; - } - if (flags & float_muladd_negate_c) { - uc.h = -uc.h; - } +static float16 QEMU_FLATTEN +float16_addsub(float16 a, float16 b, float_status *status, bool subtract) +{ + FloatParts64 pa, pb, *pr; - ur.h = fma(ua.h, ub.h, uc.h); + float16_unpack_canonical(&pa, a, status); + float16_unpack_canonical(&pb, b, status); + pr = parts_addsub(&pa, &pb, status, subtract); - if (unlikely(f64_is_inf(ur))) { - s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { - ua = ua_orig; - uc = uc_orig; - goto soft; - } - } - if (flags & float_muladd_negate_result) { - return float64_chs(ur.s); - } - return ur.s; + return float16_round_pack_canonical(pr, status); +} - soft: - return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s); +float16 float16_add(float16 a, float16 b, float_status *status) +{ + return float16_addsub(a, b, status, false); } -/* - * Returns the result of multiplying the bfloat16 values `a' - * and `b' then adding 'c', with no intermediate rounding step after the - * multiplication. - */ +float16 float16_sub(float16 a, float16 b, float_status *status) +{ + return float16_addsub(a, b, status, true); +} -bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c, - int flags, float_status *status) +static float32 QEMU_SOFTFLOAT_ATTR +soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract) { - FloatParts pa = bfloat16_unpack_canonical(a, status); - FloatParts pb = bfloat16_unpack_canonical(b, status); - FloatParts pc = bfloat16_unpack_canonical(c, status); - FloatParts pr = muladd_floats(pa, pb, pc, flags, status); + FloatParts64 pa, pb, *pr; - return bfloat16_round_pack_canonical(pr, status); + float32_unpack_canonical(&pa, a, status); + float32_unpack_canonical(&pb, b, status); + pr = parts_addsub(&pa, &pb, status, subtract); + + return float32_round_pack_canonical(pr, status); } -/* - * Returns the result of dividing the floating-point value `a' by the - * corresponding value `b'. The operation is performed according to - * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. - */ +static float32 soft_f32_add(float32 a, float32 b, float_status *status) +{ + return soft_f32_addsub(a, b, status, false); +} -static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s) +static float32 soft_f32_sub(float32 a, float32 b, float_status *status) { - bool sign = a.sign ^ b.sign; + return soft_f32_addsub(a, b, status, true); +} - if (a.cls == float_class_normal && b.cls == float_class_normal) { - uint64_t n0, n1, q, r; - int exp = a.exp - b.exp; +static float64 QEMU_SOFTFLOAT_ATTR +soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract) +{ + FloatParts64 pa, pb, *pr; - /* - * We want a 2*N / N-bit division to produce exactly an N-bit - * result, so that we do not lose any precision and so that we - * do not have to renormalize afterward. If A.frac < B.frac, - * then division would produce an (N-1)-bit result; shift A left - * by one to produce the an N-bit result, and decrement the - * exponent to match. - * - * The udiv_qrnnd algorithm that we're using requires normalization, - * i.e. the msb of the denominator must be set. Since we know that - * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left - * by one (more), and the remainder must be shifted right by one. - */ - if (a.frac < b.frac) { - exp -= 1; - shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0); - } else { - shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0); - } - q = udiv_qrnnd(&r, n1, n0, b.frac << 1); + float64_unpack_canonical(&pa, a, status); + float64_unpack_canonical(&pb, b, status); + pr = parts_addsub(&pa, &pb, status, subtract); - /* - * Set lsb if there is a remainder, to set inexact. - * As mentioned above, to find the actual value of the remainder we - * would need to shift right, but (1) we are only concerned about - * non-zero-ness, and (2) the remainder will always be even because - * both inputs to the division primitive are even. - */ - a.frac = q | (r != 0); - a.sign = sign; - a.exp = exp; - return a; - } - /* handle all the NaN cases */ - if (is_nan(a.cls) || is_nan(b.cls)) { - return pick_nan(a, b, s); - } - /* 0/0 or Inf/Inf */ - if (a.cls == b.cls - && - (a.cls == float_class_inf || a.cls == float_class_zero)) { - s->float_exception_flags |= float_flag_invalid; - return parts_default_nan(s); - } - /* Inf / x or 0 / x */ - if (a.cls == float_class_inf || a.cls == float_class_zero) { - a.sign = sign; - return a; - } - /* Div 0 => Inf */ - if (b.cls == float_class_zero) { - s->float_exception_flags |= float_flag_divbyzero; - a.cls = float_class_inf; - a.sign = sign; - return a; - } - /* Div by Inf */ - if (b.cls == float_class_inf) { - a.cls = float_class_zero; - a.sign = sign; - return a; - } - g_assert_not_reached(); + return float64_round_pack_canonical(pr, status); } -float16 float16_div(float16 a, float16 b, float_status *status) +static float64 soft_f64_add(float64 a, float64 b, float_status *status) { - FloatParts pa = float16_unpack_canonical(a, status); - FloatParts pb = float16_unpack_canonical(b, status); - FloatParts pr = div_floats(pa, pb, status); - - return float16_round_pack_canonical(pr, status); + return soft_f64_addsub(a, b, status, false); } -static float32 QEMU_SOFTFLOAT_ATTR -soft_f32_div(float32 a, float32 b, float_status *status) +static float64 soft_f64_sub(float64 a, float64 b, float_status *status) { - FloatParts pa = float32_unpack_canonical(a, status); - FloatParts pb = float32_unpack_canonical(b, status); - FloatParts pr = div_floats(pa, pb, status); - - return float32_round_pack_canonical(pr, status); + return soft_f64_addsub(a, b, status, true); } -static float64 QEMU_SOFTFLOAT_ATTR -soft_f64_div(float64 a, float64 b, float_status *status) +static float hard_f32_add(float a, float b) { - FloatParts pa = float64_unpack_canonical(a, status); - FloatParts pb = float64_unpack_canonical(b, status); - FloatParts pr = div_floats(pa, pb, status); + return a + b; +} - return float64_round_pack_canonical(pr, status); +static float hard_f32_sub(float a, float b) +{ + return a - b; } -static float hard_f32_div(float a, float b) +static double hard_f64_add(double a, double b) { - return a / b; + return a + b; } -static double hard_f64_div(double a, double b) +static double hard_f64_sub(double a, double b) { - return a / b; + return a - b; } -static bool f32_div_pre(union_float32 a, union_float32 b) +static bool f32_addsubmul_post(union_float32 a, union_float32 b) { if (QEMU_HARDFLOAT_2F32_USE_FP) { - return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && - fpclassify(b.h) == FP_NORMAL; + return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); } - return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s); + return !(float32_is_zero(a.s) && float32_is_zero(b.s)); } -static bool f64_div_pre(union_float64 a, union_float64 b) +static bool f64_addsubmul_post(union_float64 a, union_float64 b) { if (QEMU_HARDFLOAT_2F64_USE_FP) { - return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && - fpclassify(b.h) == FP_NORMAL; + return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); + } else { + return !(float64_is_zero(a.s) && float64_is_zero(b.s)); } - return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s); } -static bool f32_div_post(union_float32 a, union_float32 b) +static float32 float32_addsub(float32 a, float32 b, float_status *s, + hard_f32_op2_fn hard, soft_f32_op2_fn soft) { - if (QEMU_HARDFLOAT_2F32_USE_FP) { - return fpclassify(a.h) != FP_ZERO; - } - return !float32_is_zero(a.s); + return float32_gen2(a, b, s, hard, soft, + f32_is_zon2, f32_addsubmul_post); } -static bool f64_div_post(union_float64 a, union_float64 b) +static float64 float64_addsub(float64 a, float64 b, float_status *s, + hard_f64_op2_fn hard, soft_f64_op2_fn soft) { - if (QEMU_HARDFLOAT_2F64_USE_FP) { - return fpclassify(a.h) != FP_ZERO; - } - return !float64_is_zero(a.s); + return float64_gen2(a, b, s, hard, soft, + f64_is_zon2, f64_addsubmul_post); } float32 QEMU_FLATTEN -float32_div(float32 a, float32 b, float_status *s) +float32_add(float32 a, float32 b, float_status *s) { - return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, - f32_div_pre, f32_div_post); + return float32_addsub(a, b, s, hard_f32_add, soft_f32_add); +} + +float32 QEMU_FLATTEN +float32_sub(float32 a, float32 b, float_status *s) +{ + return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub); } float64 QEMU_FLATTEN -float64_div(float64 a, float64 b, float_status *s) +float64_add(float64 a, float64 b, float_status *s) { - return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, - f64_div_pre, f64_div_post); + return float64_addsub(a, b, s, hard_f64_add, soft_f64_add); } -/* - * Returns the result of dividing the bfloat16 - * value `a' by the corresponding value `b'. - */ +float64 QEMU_FLATTEN +float64_sub(float64 a, float64 b, float_status *s) +{ + return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub); +} -bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status) +static bfloat16 QEMU_FLATTEN +bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract) { - FloatParts pa = bfloat16_unpack_canonical(a, status); - FloatParts pb = bfloat16_unpack_canonical(b, status); - FloatParts pr = div_floats(pa, pb, status); + FloatParts64 pa, pb, *pr; + + bfloat16_unpack_canonical(&pa, a, status); + bfloat16_unpack_canonical(&pb, b, status); + pr = parts_addsub(&pa, &pb, status, subtract); return bfloat16_round_pack_canonical(pr, status); } -/* - * Float to Float conversions - * - * Returns the result of converting one float format to another. The - * conversion is performed according to the IEC/IEEE Standard for - * Binary Floating-Point Arithmetic. - * - * The float_to_float helper only needs to take care of raising - * invalid exceptions and handling the conversion on NaNs. - */ - -static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf, - float_status *s) +bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status) { - if (dstf->arm_althp) { - switch (a.cls) { - case float_class_qnan: - case float_class_snan: - /* There is no NaN in the destination format. Raise Invalid - * and return a zero with the sign of the input NaN. - */ - s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_zero; - a.frac = 0; - a.exp = 0; - break; - - case float_class_inf: - /* There is no Inf in the destination format. Raise Invalid - * and return the maximum normal with the correct sign. - */ - s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_normal; - a.exp = dstf->exp_max; - a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift; - break; - - default: - break; - } - } else if (is_nan(a.cls)) { - if (is_snan(a.cls)) { - s->float_exception_flags |= float_flag_invalid; - a = parts_silence_nan(a, s); - } - if (s->default_nan_mode) { - return parts_default_nan(s); - } - } - return a; + return bfloat16_addsub(a, b, status, false); } -float32 float16_to_float32(float16 a, bool ieee, float_status *s) +bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status) { - const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; - FloatParts p = float16a_unpack_canonical(a, s, fmt16); - FloatParts pr = float_to_float(p, &float32_params, s); - return float32_round_pack_canonical(pr, s); + return bfloat16_addsub(a, b, status, true); } -float64 float16_to_float64(float16 a, bool ieee, float_status *s) +static float128 QEMU_FLATTEN +float128_addsub(float128 a, float128 b, float_status *status, bool subtract) { - const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; - FloatParts p = float16a_unpack_canonical(a, s, fmt16); - FloatParts pr = float_to_float(p, &float64_params, s); - return float64_round_pack_canonical(pr, s); -} + FloatParts128 pa, pb, *pr; -float16 float32_to_float16(float32 a, bool ieee, float_status *s) -{ - const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; - FloatParts p = float32_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, fmt16, s); - return float16a_round_pack_canonical(pr, s, fmt16); -} + float128_unpack_canonical(&pa, a, status); + float128_unpack_canonical(&pb, b, status); + pr = parts_addsub(&pa, &pb, status, subtract); -static float64 QEMU_SOFTFLOAT_ATTR -soft_float32_to_float64(float32 a, float_status *s) -{ - FloatParts p = float32_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, &float64_params, s); - return float64_round_pack_canonical(pr, s); + return float128_round_pack_canonical(pr, status); } -float64 float32_to_float64(float32 a, float_status *s) +float128 float128_add(float128 a, float128 b, float_status *status) { - if (likely(float32_is_normal(a))) { - /* Widening conversion can never produce inexact results. */ - union_float32 uf; - union_float64 ud; - uf.s = a; - ud.h = uf.h; - return ud.s; - } else if (float32_is_zero(a)) { - return float64_set_sign(float64_zero, float32_is_neg(a)); - } else { - return soft_float32_to_float64(a, s); - } + return float128_addsub(a, b, status, false); } -float16 float64_to_float16(float64 a, bool ieee, float_status *s) +float128 float128_sub(float128 a, float128 b, float_status *status) { - const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; - FloatParts p = float64_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, fmt16, s); - return float16a_round_pack_canonical(pr, s, fmt16); + return float128_addsub(a, b, status, true); } -float32 float64_to_float32(float64 a, float_status *s) +static floatx80 QEMU_FLATTEN +floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract) { - FloatParts p = float64_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, &float32_params, s); - return float32_round_pack_canonical(pr, s); -} + FloatParts128 pa, pb, *pr; -float32 bfloat16_to_float32(bfloat16 a, float_status *s) -{ - FloatParts p = bfloat16_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, &float32_params, s); - return float32_round_pack_canonical(pr, s); -} + if (!floatx80_unpack_canonical(&pa, a, status) || + !floatx80_unpack_canonical(&pb, b, status)) { + return floatx80_default_nan(status); + } -float64 bfloat16_to_float64(bfloat16 a, float_status *s) -{ - FloatParts p = bfloat16_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, &float64_params, s); - return float64_round_pack_canonical(pr, s); + pr = parts_addsub(&pa, &pb, status, subtract); + return floatx80_round_pack_canonical(pr, status); } -bfloat16 float32_to_bfloat16(float32 a, float_status *s) +floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) { - FloatParts p = float32_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, &bfloat16_params, s); - return bfloat16_round_pack_canonical(pr, s); + return floatx80_addsub(a, b, status, false); } -bfloat16 float64_to_bfloat16(float64 a, float_status *s) +floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) { - FloatParts p = float64_unpack_canonical(a, s); - FloatParts pr = float_to_float(p, &bfloat16_params, s); - return bfloat16_round_pack_canonical(pr, s); + return floatx80_addsub(a, b, status, true); } /* - * Rounds the floating-point value `a' to an integer, and returns the - * result as a floating-point value. The operation is performed - * according to the IEC/IEEE Standard for Binary Floating-Point - * Arithmetic. + * Multiplication */ -static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode, - int scale, float_status *s) +float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status) { - switch (a.cls) { - case float_class_qnan: - case float_class_snan: - return return_nan(a, s); - - case float_class_zero: - case float_class_inf: - /* already "integral" */ - break; - - case float_class_normal: - scale = MIN(MAX(scale, -0x10000), 0x10000); - a.exp += scale; - - if (a.exp >= DECOMPOSED_BINARY_POINT) { - /* already integral */ - break; - } - if (a.exp < 0) { - bool one; - /* all fractional */ - s->float_exception_flags |= float_flag_inexact; - switch (rmode) { - case float_round_nearest_even: - one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT; - break; - case float_round_ties_away: - one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT; - break; - case float_round_to_zero: - one = false; - break; - case float_round_up: - one = !a.sign; - break; - case float_round_down: - one = a.sign; - break; - case float_round_to_odd: - one = true; - break; - default: - g_assert_not_reached(); - } - - if (one) { - a.frac = DECOMPOSED_IMPLICIT_BIT; - a.exp = 0; - } else { - a.cls = float_class_zero; - } - } else { - uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp; - uint64_t frac_lsbm1 = frac_lsb >> 1; - uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb; - uint64_t rnd_mask = rnd_even_mask >> 1; - uint64_t inc; + FloatParts64 pa, pb, *pr; - switch (rmode) { - case float_round_nearest_even: - inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); - break; - case float_round_ties_away: - inc = frac_lsbm1; - break; - case float_round_to_zero: - inc = 0; - break; - case float_round_up: - inc = a.sign ? 0 : rnd_mask; - break; - case float_round_down: - inc = a.sign ? rnd_mask : 0; - break; - case float_round_to_odd: - inc = a.frac & frac_lsb ? 0 : rnd_mask; - break; - default: - g_assert_not_reached(); - } + float16_unpack_canonical(&pa, a, status); + float16_unpack_canonical(&pb, b, status); + pr = parts_mul(&pa, &pb, status); - if (a.frac & rnd_mask) { - s->float_exception_flags |= float_flag_inexact; - a.frac += inc; - a.frac &= ~rnd_mask; - if (a.frac & DECOMPOSED_OVERFLOW_BIT) { - a.frac >>= 1; - a.exp++; - } - } - } - break; - default: - g_assert_not_reached(); - } - return a; + return float16_round_pack_canonical(pr, status); } -float16 float16_round_to_int(float16 a, float_status *s) +static float32 QEMU_SOFTFLOAT_ATTR +soft_f32_mul(float32 a, float32 b, float_status *status) { - FloatParts pa = float16_unpack_canonical(a, s); - FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s); - return float16_round_pack_canonical(pr, s); -} + FloatParts64 pa, pb, *pr; -float32 float32_round_to_int(float32 a, float_status *s) -{ - FloatParts pa = float32_unpack_canonical(a, s); - FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s); - return float32_round_pack_canonical(pr, s); -} + float32_unpack_canonical(&pa, a, status); + float32_unpack_canonical(&pb, b, status); + pr = parts_mul(&pa, &pb, status); -float64 float64_round_to_int(float64 a, float_status *s) -{ - FloatParts pa = float64_unpack_canonical(a, s); - FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s); - return float64_round_pack_canonical(pr, s); + return float32_round_pack_canonical(pr, status); } -/* - * Rounds the bfloat16 value `a' to an integer, and returns the - * result as a bfloat16 value. - */ - -bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s) +static float64 QEMU_SOFTFLOAT_ATTR +soft_f64_mul(float64 a, float64 b, float_status *status) { - FloatParts pa = bfloat16_unpack_canonical(a, s); - FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s); - return bfloat16_round_pack_canonical(pr, s); -} - -/* - * Returns the result of converting the floating-point value `a' to - * the two's complement integer format. The conversion is performed - * according to the IEC/IEEE Standard for Binary Floating-Point - * Arithmetic---which means in particular that the conversion is - * rounded according to the current rounding mode. If `a' is a NaN, - * the largest positive integer is returned. Otherwise, if the - * conversion overflows, the largest integer with the same sign as `a' - * is returned. -*/ + FloatParts64 pa, pb, *pr; -static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode, - int scale, int64_t min, int64_t max, - float_status *s) -{ - uint64_t r; - int orig_flags = get_float_exception_flags(s); - FloatParts p = round_to_int(in, rmode, scale, s); + float64_unpack_canonical(&pa, a, status); + float64_unpack_canonical(&pb, b, status); + pr = parts_mul(&pa, &pb, status); - switch (p.cls) { - case float_class_snan: - case float_class_qnan: - s->float_exception_flags = orig_flags | float_flag_invalid; - return max; - case float_class_inf: - s->float_exception_flags = orig_flags | float_flag_invalid; - return p.sign ? min : max; - case float_class_zero: - return 0; - case float_class_normal: - if (p.exp < DECOMPOSED_BINARY_POINT) { - r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp); - } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) { - r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT); - } else { - r = UINT64_MAX; - } - if (p.sign) { - if (r <= -(uint64_t) min) { - return -r; - } else { - s->float_exception_flags = orig_flags | float_flag_invalid; - return min; - } - } else { - if (r <= max) { - return r; - } else { - s->float_exception_flags = orig_flags | float_flag_invalid; - return max; - } - } - default: - g_assert_not_reached(); - } + return float64_round_pack_canonical(pr, status); } -int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +static float hard_f32_mul(float a, float b) { - return round_to_int_and_pack(float16_unpack_canonical(a, s), - rmode, scale, INT8_MIN, INT8_MAX, s); + return a * b; } -int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +static double hard_f64_mul(double a, double b) { - return round_to_int_and_pack(float16_unpack_canonical(a, s), - rmode, scale, INT16_MIN, INT16_MAX, s); + return a * b; } -int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +float32 QEMU_FLATTEN +float32_mul(float32 a, float32 b, float_status *s) { - return round_to_int_and_pack(float16_unpack_canonical(a, s), - rmode, scale, INT32_MIN, INT32_MAX, s); + return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, + f32_is_zon2, f32_addsubmul_post); } -int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +float64 QEMU_FLATTEN +float64_mul(float64 a, float64 b, float_status *s) { - return round_to_int_and_pack(float16_unpack_canonical(a, s), - rmode, scale, INT64_MIN, INT64_MAX, s); + return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, + f64_is_zon2, f64_addsubmul_post); } -int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale, - float_status *s) +bfloat16 QEMU_FLATTEN +bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status) { - return round_to_int_and_pack(float32_unpack_canonical(a, s), - rmode, scale, INT16_MIN, INT16_MAX, s); -} + FloatParts64 pa, pb, *pr; -int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale, - float_status *s) -{ - return round_to_int_and_pack(float32_unpack_canonical(a, s), - rmode, scale, INT32_MIN, INT32_MAX, s); -} + bfloat16_unpack_canonical(&pa, a, status); + bfloat16_unpack_canonical(&pb, b, status); + pr = parts_mul(&pa, &pb, status); -int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale, - float_status *s) -{ - return round_to_int_and_pack(float32_unpack_canonical(a, s), - rmode, scale, INT64_MIN, INT64_MAX, s); + return bfloat16_round_pack_canonical(pr, status); } -int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale, - float_status *s) +float128 QEMU_FLATTEN +float128_mul(float128 a, float128 b, float_status *status) { - return round_to_int_and_pack(float64_unpack_canonical(a, s), - rmode, scale, INT16_MIN, INT16_MAX, s); -} + FloatParts128 pa, pb, *pr; -int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale, - float_status *s) -{ - return round_to_int_and_pack(float64_unpack_canonical(a, s), - rmode, scale, INT32_MIN, INT32_MAX, s); -} + float128_unpack_canonical(&pa, a, status); + float128_unpack_canonical(&pb, b, status); + pr = parts_mul(&pa, &pb, status); -int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, - float_status *s) -{ - return round_to_int_and_pack(float64_unpack_canonical(a, s), - rmode, scale, INT64_MIN, INT64_MAX, s); + return float128_round_pack_canonical(pr, status); } -int8_t float16_to_int8(float16 a, float_status *s) +floatx80 QEMU_FLATTEN +floatx80_mul(floatx80 a, floatx80 b, float_status *status) { - return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); -} + FloatParts128 pa, pb, *pr; -int16_t float16_to_int16(float16 a, float_status *s) -{ - return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); -} + if (!floatx80_unpack_canonical(&pa, a, status) || + !floatx80_unpack_canonical(&pb, b, status)) { + return floatx80_default_nan(status); + } -int32_t float16_to_int32(float16 a, float_status *s) -{ - return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); + pr = parts_mul(&pa, &pb, status); + return floatx80_round_pack_canonical(pr, status); } -int64_t float16_to_int64(float16 a, float_status *s) -{ - return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); -} +/* + * Fused multiply-add + */ -int16_t float32_to_int16(float32 a, float_status *s) +float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c, + int flags, float_status *status) { - return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s); -} + FloatParts64 pa, pb, pc, *pr; -int32_t float32_to_int32(float32 a, float_status *s) -{ - return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s); -} + float16_unpack_canonical(&pa, a, status); + float16_unpack_canonical(&pb, b, status); + float16_unpack_canonical(&pc, c, status); + pr = parts_muladd(&pa, &pb, &pc, flags, status); -int64_t float32_to_int64(float32 a, float_status *s) -{ - return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s); + return float16_round_pack_canonical(pr, status); } -int16_t float64_to_int16(float64 a, float_status *s) +static float32 QEMU_SOFTFLOAT_ATTR +soft_f32_muladd(float32 a, float32 b, float32 c, int flags, + float_status *status) { - return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s); -} + FloatParts64 pa, pb, pc, *pr; -int32_t float64_to_int32(float64 a, float_status *s) -{ - return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s); -} + float32_unpack_canonical(&pa, a, status); + float32_unpack_canonical(&pb, b, status); + float32_unpack_canonical(&pc, c, status); + pr = parts_muladd(&pa, &pb, &pc, flags, status); -int64_t float64_to_int64(float64 a, float_status *s) -{ - return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s); + return float32_round_pack_canonical(pr, status); } -int16_t float16_to_int16_round_to_zero(float16 a, float_status *s) +static float64 QEMU_SOFTFLOAT_ATTR +soft_f64_muladd(float64 a, float64 b, float64 c, int flags, + float_status *status) { - return float16_to_int16_scalbn(a, float_round_to_zero, 0, s); -} + FloatParts64 pa, pb, pc, *pr; -int32_t float16_to_int32_round_to_zero(float16 a, float_status *s) -{ - return float16_to_int32_scalbn(a, float_round_to_zero, 0, s); -} + float64_unpack_canonical(&pa, a, status); + float64_unpack_canonical(&pb, b, status); + float64_unpack_canonical(&pc, c, status); + pr = parts_muladd(&pa, &pb, &pc, flags, status); -int64_t float16_to_int64_round_to_zero(float16 a, float_status *s) -{ - return float16_to_int64_scalbn(a, float_round_to_zero, 0, s); + return float64_round_pack_canonical(pr, status); } -int16_t float32_to_int16_round_to_zero(float32 a, float_status *s) -{ - return float32_to_int16_scalbn(a, float_round_to_zero, 0, s); -} +static bool force_soft_fma; -int32_t float32_to_int32_round_to_zero(float32 a, float_status *s) +float32 QEMU_FLATTEN +float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s) { - return float32_to_int32_scalbn(a, float_round_to_zero, 0, s); -} + union_float32 ua, ub, uc, ur; -int64_t float32_to_int64_round_to_zero(float32 a, float_status *s) -{ - return float32_to_int64_scalbn(a, float_round_to_zero, 0, s); -} + ua.s = xa; + ub.s = xb; + uc.s = xc; -int16_t float64_to_int16_round_to_zero(float64 a, float_status *s) -{ - return float64_to_int16_scalbn(a, float_round_to_zero, 0, s); -} + if (unlikely(!can_use_fpu(s))) { + goto soft; + } + if (unlikely(flags & float_muladd_halve_result)) { + goto soft; + } -int32_t float64_to_int32_round_to_zero(float64 a, float_status *s) -{ - return float64_to_int32_scalbn(a, float_round_to_zero, 0, s); -} + float32_input_flush3(&ua.s, &ub.s, &uc.s, s); + if (unlikely(!f32_is_zon3(ua, ub, uc))) { + goto soft; + } -int64_t float64_to_int64_round_to_zero(float64 a, float_status *s) -{ - return float64_to_int64_scalbn(a, float_round_to_zero, 0, s); -} + if (unlikely(force_soft_fma)) { + goto soft; + } -/* - * Returns the result of converting the floating-point value `a' to - * the two's complement integer format. - */ + /* + * When (a || b) == 0, there's no need to check for under/over flow, + * since we know the addend is (normal || 0) and the product is 0. + */ + if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) { + union_float32 up; + bool prod_sign; -int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, - float_status *s) -{ - return round_to_int_and_pack(bfloat16_unpack_canonical(a, s), - rmode, scale, INT16_MIN, INT16_MAX, s); -} + prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s); + prod_sign ^= !!(flags & float_muladd_negate_product); + up.s = float32_set_sign(float32_zero, prod_sign); -int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, - float_status *s) -{ - return round_to_int_and_pack(bfloat16_unpack_canonical(a, s), - rmode, scale, INT32_MIN, INT32_MAX, s); -} + if (flags & float_muladd_negate_c) { + uc.h = -uc.h; + } + ur.h = up.h + uc.h; + } else { + union_float32 ua_orig = ua; + union_float32 uc_orig = uc; -int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, - float_status *s) -{ - return round_to_int_and_pack(bfloat16_unpack_canonical(a, s), - rmode, scale, INT64_MIN, INT64_MAX, s); -} + if (flags & float_muladd_negate_product) { + ua.h = -ua.h; + } + if (flags & float_muladd_negate_c) { + uc.h = -uc.h; + } -int16_t bfloat16_to_int16(bfloat16 a, float_status *s) -{ - return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); -} + ur.h = fmaf(ua.h, ub.h, uc.h); -int32_t bfloat16_to_int32(bfloat16 a, float_status *s) -{ - return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); -} + if (unlikely(f32_is_inf(ur))) { + float_raise(float_flag_overflow, s); + } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { + ua = ua_orig; + uc = uc_orig; + goto soft; + } + } + if (flags & float_muladd_negate_result) { + return float32_chs(ur.s); + } + return ur.s; -int64_t bfloat16_to_int64(bfloat16 a, float_status *s) -{ - return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); + soft: + return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s); } -int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s) +float64 QEMU_FLATTEN +float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s) { - return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s); -} + union_float64 ua, ub, uc, ur; -int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s) -{ - return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s); -} + ua.s = xa; + ub.s = xb; + uc.s = xc; -int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s) -{ - return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s); -} + if (unlikely(!can_use_fpu(s))) { + goto soft; + } + if (unlikely(flags & float_muladd_halve_result)) { + goto soft; + } -/* - * Returns the result of converting the floating-point value `a' to - * the unsigned integer format. The conversion is performed according - * to the IEC/IEEE Standard for Binary Floating-Point - * Arithmetic---which means in particular that the conversion is - * rounded according to the current rounding mode. If `a' is a NaN, - * the largest unsigned integer is returned. Otherwise, if the - * conversion overflows, the largest unsigned integer is returned. If - * the 'a' is negative, the result is rounded and zero is returned; - * values that do not round to zero will raise the inexact exception - * flag. - */ + float64_input_flush3(&ua.s, &ub.s, &uc.s, s); + if (unlikely(!f64_is_zon3(ua, ub, uc))) { + goto soft; + } -static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode, - int scale, uint64_t max, - float_status *s) -{ - int orig_flags = get_float_exception_flags(s); - FloatParts p = round_to_int(in, rmode, scale, s); - uint64_t r; + if (unlikely(force_soft_fma)) { + goto soft; + } - switch (p.cls) { - case float_class_snan: - case float_class_qnan: - s->float_exception_flags = orig_flags | float_flag_invalid; - return max; - case float_class_inf: - s->float_exception_flags = orig_flags | float_flag_invalid; - return p.sign ? 0 : max; - case float_class_zero: - return 0; - case float_class_normal: - if (p.sign) { - s->float_exception_flags = orig_flags | float_flag_invalid; - return 0; + /* + * When (a || b) == 0, there's no need to check for under/over flow, + * since we know the addend is (normal || 0) and the product is 0. + */ + if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) { + union_float64 up; + bool prod_sign; + + prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s); + prod_sign ^= !!(flags & float_muladd_negate_product); + up.s = float64_set_sign(float64_zero, prod_sign); + + if (flags & float_muladd_negate_c) { + uc.h = -uc.h; } + ur.h = up.h + uc.h; + } else { + union_float64 ua_orig = ua; + union_float64 uc_orig = uc; - if (p.exp < DECOMPOSED_BINARY_POINT) { - r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp); - } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) { - r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT); - } else { - s->float_exception_flags = orig_flags | float_flag_invalid; - return max; + if (flags & float_muladd_negate_product) { + ua.h = -ua.h; + } + if (flags & float_muladd_negate_c) { + uc.h = -uc.h; } - /* For uint64 this will never trip, but if p.exp is too large - * to shift a decomposed fraction we shall have exited via the - * 3rd leg above. - */ - if (r > max) { - s->float_exception_flags = orig_flags | float_flag_invalid; - return max; + ur.h = fma(ua.h, ub.h, uc.h); + + if (unlikely(f64_is_inf(ur))) { + float_raise(float_flag_overflow, s); + } else if (unlikely(fabs(ur.h) <= FLT_MIN)) { + ua = ua_orig; + uc = uc_orig; + goto soft; } - return r; - default: - g_assert_not_reached(); } + if (flags & float_muladd_negate_result) { + return float64_chs(ur.s); + } + return ur.s; + + soft: + return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s); } -uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c, + int flags, float_status *status) { - return round_to_uint_and_pack(float16_unpack_canonical(a, s), - rmode, scale, UINT8_MAX, s); + FloatParts64 pa, pb, pc, *pr; + + bfloat16_unpack_canonical(&pa, a, status); + bfloat16_unpack_canonical(&pb, b, status); + bfloat16_unpack_canonical(&pc, c, status); + pr = parts_muladd(&pa, &pb, &pc, flags, status); + + return bfloat16_round_pack_canonical(pr, status); } -uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c, + int flags, float_status *status) { - return round_to_uint_and_pack(float16_unpack_canonical(a, s), - rmode, scale, UINT16_MAX, s); + FloatParts128 pa, pb, pc, *pr; + + float128_unpack_canonical(&pa, a, status); + float128_unpack_canonical(&pb, b, status); + float128_unpack_canonical(&pc, c, status); + pr = parts_muladd(&pa, &pb, &pc, flags, status); + + return float128_round_pack_canonical(pr, status); } -uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +/* + * Division + */ + +float16 float16_div(float16 a, float16 b, float_status *status) { - return round_to_uint_and_pack(float16_unpack_canonical(a, s), - rmode, scale, UINT32_MAX, s); + FloatParts64 pa, pb, *pr; + + float16_unpack_canonical(&pa, a, status); + float16_unpack_canonical(&pb, b, status); + pr = parts_div(&pa, &pb, status); + + return float16_round_pack_canonical(pr, status); } -uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale, - float_status *s) +static float32 QEMU_SOFTFLOAT_ATTR +soft_f32_div(float32 a, float32 b, float_status *status) { - return round_to_uint_and_pack(float16_unpack_canonical(a, s), - rmode, scale, UINT64_MAX, s); + FloatParts64 pa, pb, *pr; + + float32_unpack_canonical(&pa, a, status); + float32_unpack_canonical(&pb, b, status); + pr = parts_div(&pa, &pb, status); + + return float32_round_pack_canonical(pr, status); } -uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale, - float_status *s) +static float64 QEMU_SOFTFLOAT_ATTR +soft_f64_div(float64 a, float64 b, float_status *status) { - return round_to_uint_and_pack(float32_unpack_canonical(a, s), - rmode, scale, UINT16_MAX, s); + FloatParts64 pa, pb, *pr; + + float64_unpack_canonical(&pa, a, status); + float64_unpack_canonical(&pb, b, status); + pr = parts_div(&pa, &pb, status); + + return float64_round_pack_canonical(pr, status); } -uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale, - float_status *s) +static float hard_f32_div(float a, float b) { - return round_to_uint_and_pack(float32_unpack_canonical(a, s), - rmode, scale, UINT32_MAX, s); + return a / b; } -uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale, - float_status *s) +static double hard_f64_div(double a, double b) { - return round_to_uint_and_pack(float32_unpack_canonical(a, s), - rmode, scale, UINT64_MAX, s); + return a / b; } -uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale, - float_status *s) +static bool f32_div_pre(union_float32 a, union_float32 b) { - return round_to_uint_and_pack(float64_unpack_canonical(a, s), - rmode, scale, UINT16_MAX, s); + if (QEMU_HARDFLOAT_2F32_USE_FP) { + return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && + fpclassify(b.h) == FP_NORMAL; + } + return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s); } -uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale, - float_status *s) +static bool f64_div_pre(union_float64 a, union_float64 b) { - return round_to_uint_and_pack(float64_unpack_canonical(a, s), - rmode, scale, UINT32_MAX, s); + if (QEMU_HARDFLOAT_2F64_USE_FP) { + return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) && + fpclassify(b.h) == FP_NORMAL; + } + return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s); } -uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, - float_status *s) +static bool f32_div_post(union_float32 a, union_float32 b) { - return round_to_uint_and_pack(float64_unpack_canonical(a, s), - rmode, scale, UINT64_MAX, s); + if (QEMU_HARDFLOAT_2F32_USE_FP) { + return fpclassify(a.h) != FP_ZERO; + } + return !float32_is_zero(a.s); } -uint8_t float16_to_uint8(float16 a, float_status *s) +static bool f64_div_post(union_float64 a, union_float64 b) { - return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); + if (QEMU_HARDFLOAT_2F64_USE_FP) { + return fpclassify(a.h) != FP_ZERO; + } + return !float64_is_zero(a.s); } -uint16_t float16_to_uint16(float16 a, float_status *s) +float32 QEMU_FLATTEN +float32_div(float32 a, float32 b, float_status *s) { - return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); + return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, + f32_div_pre, f32_div_post); } -uint32_t float16_to_uint32(float16 a, float_status *s) +float64 QEMU_FLATTEN +float64_div(float64 a, float64 b, float_status *s) { - return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); + return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, + f64_div_pre, f64_div_post); } -uint64_t float16_to_uint64(float16 a, float_status *s) +bfloat16 QEMU_FLATTEN +bfloat16_div(bfloat16 a, bfloat16 b, float_status *status) { - return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); -} + FloatParts64 pa, pb, *pr; -uint16_t float32_to_uint16(float32 a, float_status *s) -{ - return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); -} + bfloat16_unpack_canonical(&pa, a, status); + bfloat16_unpack_canonical(&pb, b, status); + pr = parts_div(&pa, &pb, status); -uint32_t float32_to_uint32(float32 a, float_status *s) -{ - return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); + return bfloat16_round_pack_canonical(pr, status); } -uint64_t float32_to_uint64(float32 a, float_status *s) +float128 QEMU_FLATTEN +float128_div(float128 a, float128 b, float_status *status) { - return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); -} + FloatParts128 pa, pb, *pr; -uint16_t float64_to_uint16(float64 a, float_status *s) -{ - return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); -} + float128_unpack_canonical(&pa, a, status); + float128_unpack_canonical(&pb, b, status); + pr = parts_div(&pa, &pb, status); -uint32_t float64_to_uint32(float64 a, float_status *s) -{ - return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); + return float128_round_pack_canonical(pr, status); } -uint64_t float64_to_uint64(float64 a, float_status *s) +floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) { - return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); -} + FloatParts128 pa, pb, *pr; -uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s) -{ - return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s); -} + if (!floatx80_unpack_canonical(&pa, a, status) || + !floatx80_unpack_canonical(&pb, b, status)) { + return floatx80_default_nan(status); + } -uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s) -{ - return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s); + pr = parts_div(&pa, &pb, status); + return floatx80_round_pack_canonical(pr, status); } -uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s) -{ - return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s); -} +/* + * Remainder + */ -uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s) +float32 float32_rem(float32 a, float32 b, float_status *status) { - return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s); + FloatParts64 pa, pb, *pr; + + float32_unpack_canonical(&pa, a, status); + float32_unpack_canonical(&pb, b, status); + pr = parts_modrem(&pa, &pb, NULL, status); + + return float32_round_pack_canonical(pr, status); } -uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s) +float64 float64_rem(float64 a, float64 b, float_status *status) { - return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s); + FloatParts64 pa, pb, *pr; + + float64_unpack_canonical(&pa, a, status); + float64_unpack_canonical(&pb, b, status); + pr = parts_modrem(&pa, &pb, NULL, status); + + return float64_round_pack_canonical(pr, status); } -uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s) +float128 float128_rem(float128 a, float128 b, float_status *status) { - return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s); + FloatParts128 pa, pb, *pr; + + float128_unpack_canonical(&pa, a, status); + float128_unpack_canonical(&pb, b, status); + pr = parts_modrem(&pa, &pb, NULL, status); + + return float128_round_pack_canonical(pr, status); } -uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s) +/* + * Returns the remainder of the extended double-precision floating-point value + * `a' with respect to the corresponding value `b'. + * If 'mod' is false, the operation is performed according to the IEC/IEEE + * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return + * the remainder based on truncating the quotient toward zero instead and + * *quotient is set to the low 64 bits of the absolute value of the integer + * quotient. + */ +floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, + uint64_t *quotient, float_status *status) { - return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s); + FloatParts128 pa, pb, *pr; + + *quotient = 0; + if (!floatx80_unpack_canonical(&pa, a, status) || + !floatx80_unpack_canonical(&pb, b, status)) { + return floatx80_default_nan(status); + } + pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status); + + return floatx80_round_pack_canonical(pr, status); } -uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s) +floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) { - return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s); + uint64_t quotient; + return floatx80_modrem(a, b, false, "ient, status); } -uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s) +floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) { - return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s); + uint64_t quotient; + return floatx80_modrem(a, b, true, "ient, status); } /* - * Returns the result of converting the bfloat16 value `a' to - * the unsigned integer format. + * Float to Float conversions + * + * Returns the result of converting one float format to another. The + * conversion is performed according to the IEC/IEEE Standard for + * Binary Floating-Point Arithmetic. + * + * Usually this only needs to take care of raising invalid exceptions + * and handling the conversion on NaNs. */ -uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode, - int scale, float_status *s) +static void parts_float_to_ahp(FloatParts64 *a, float_status *s) { - return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s), - rmode, scale, UINT16_MAX, s); -} + switch (a->cls) { + case float_class_qnan: + case float_class_snan: + /* + * There is no NaN in the destination format. Raise Invalid + * and return a zero with the sign of the input NaN. + */ + float_raise(float_flag_invalid, s); + a->cls = float_class_zero; + break; -uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode, - int scale, float_status *s) -{ - return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s), - rmode, scale, UINT32_MAX, s); -} + case float_class_inf: + /* + * There is no Inf in the destination format. Raise Invalid + * and return the maximum normal with the correct sign. + */ + float_raise(float_flag_invalid, s); + a->cls = float_class_normal; + a->exp = float16_params_ahp.exp_max; + a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift, + float16_params_ahp.frac_size + 1); + break; -uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode, - int scale, float_status *s) -{ - return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s), - rmode, scale, UINT64_MAX, s); + case float_class_normal: + case float_class_zero: + break; + + default: + g_assert_not_reached(); + } } -uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s) +static void parts64_float_to_float(FloatParts64 *a, float_status *s) { - return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); + if (is_nan(a->cls)) { + parts_return_nan(a, s); + } } -uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s) +static void parts128_float_to_float(FloatParts128 *a, float_status *s) { - return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); + if (is_nan(a->cls)) { + parts_return_nan(a, s); + } } -uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s) +#define parts_float_to_float(P, S) \ + PARTS_GENERIC_64_128(float_to_float, P)(P, S) + +static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, + float_status *s) { - return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); + a->cls = b->cls; + a->sign = b->sign; + a->exp = b->exp; + + if (a->cls == float_class_normal) { + frac_truncjam(a, b); + } else if (is_nan(a->cls)) { + /* Discard the low bits of the NaN. */ + a->frac = b->frac_hi; + parts_return_nan(a, s); + } } -uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s) +static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, + float_status *s) { - return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s); + a->cls = b->cls; + a->sign = b->sign; + a->exp = b->exp; + frac_widen(a, b); + + if (is_nan(a->cls)) { + parts_return_nan(a, s); + } } -uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s) +float32 float16_to_float32(float16 a, bool ieee, float_status *s) { - return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s); + const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; + FloatParts64 p; + + float16a_unpack_canonical(&p, a, s, fmt16); + parts_float_to_float(&p, s); + return float32_round_pack_canonical(&p, s); } -uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s) +float64 float16_to_float64(float16 a, bool ieee, float_status *s) { - return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s); -} + const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; + FloatParts64 p; -/* - * Integer to float conversions - * - * Returns the result of converting the two's complement integer `a' - * to the floating-point format. The conversion is performed according - * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. - */ + float16a_unpack_canonical(&p, a, s, fmt16); + parts_float_to_float(&p, s); + return float64_round_pack_canonical(&p, s); +} -static FloatParts int_to_float(int64_t a, int scale, float_status *status) +float16 float32_to_float16(float32 a, bool ieee, float_status *s) { - FloatParts r = { .sign = false }; + FloatParts64 p; + const FloatFmt *fmt; - if (a == 0) { - r.cls = float_class_zero; + float32_unpack_canonical(&p, a, s); + if (ieee) { + parts_float_to_float(&p, s); + fmt = &float16_params; } else { - uint64_t f = a; - int shift; - - r.cls = float_class_normal; - if (a < 0) { - f = -f; - r.sign = true; - } - shift = clz64(f) - 1; - scale = MIN(MAX(scale, -0x10000), 0x10000); - - r.exp = DECOMPOSED_BINARY_POINT - shift + scale; - r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift); + parts_float_to_ahp(&p, s); + fmt = &float16_params_ahp; } - - return r; + return float16a_round_pack_canonical(&p, s, fmt); } -float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status) +static float64 QEMU_SOFTFLOAT_ATTR +soft_float32_to_float64(float32 a, float_status *s) { - FloatParts pa = int_to_float(a, scale, status); - return float16_round_pack_canonical(pa, status); -} + FloatParts64 p; -float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status) -{ - return int64_to_float16_scalbn(a, scale, status); + float32_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float64_round_pack_canonical(&p, s); } -float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status) +float64 float32_to_float64(float32 a, float_status *s) { - return int64_to_float16_scalbn(a, scale, status); + if (likely(float32_is_normal(a))) { + /* Widening conversion can never produce inexact results. */ + union_float32 uf; + union_float64 ud; + uf.s = a; + ud.h = uf.h; + return ud.s; + } else if (float32_is_zero(a)) { + return float64_set_sign(float64_zero, float32_is_neg(a)); + } else { + return soft_float32_to_float64(a, s); + } } -float16 int64_to_float16(int64_t a, float_status *status) +float16 float64_to_float16(float64 a, bool ieee, float_status *s) { - return int64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + const FloatFmt *fmt; + + float64_unpack_canonical(&p, a, s); + if (ieee) { + parts_float_to_float(&p, s); + fmt = &float16_params; + } else { + parts_float_to_ahp(&p, s); + fmt = &float16_params_ahp; + } + return float16a_round_pack_canonical(&p, s, fmt); } -float16 int32_to_float16(int32_t a, float_status *status) +float32 float64_to_float32(float64 a, float_status *s) { - return int64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + + float64_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float32_round_pack_canonical(&p, s); } -float16 int16_to_float16(int16_t a, float_status *status) +float32 bfloat16_to_float32(bfloat16 a, float_status *s) { - return int64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float32_round_pack_canonical(&p, s); } -float16 int8_to_float16(int8_t a, float_status *status) +float64 bfloat16_to_float64(bfloat16 a, float_status *s) { - return int64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return float64_round_pack_canonical(&p, s); } -float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status) +bfloat16 float32_to_bfloat16(float32 a, float_status *s) { - FloatParts pa = int_to_float(a, scale, status); - return float32_round_pack_canonical(pa, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return bfloat16_round_pack_canonical(&p, s); } -float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status) +bfloat16 float64_to_bfloat16(float64 a, float_status *s) { - return int64_to_float32_scalbn(a, scale, status); + FloatParts64 p; + + float64_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return bfloat16_round_pack_canonical(&p, s); } -float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status) +float32 float128_to_float32(float128 a, float_status *s) { - return int64_to_float32_scalbn(a, scale, status); + FloatParts64 p64; + FloatParts128 p128; + + float128_unpack_canonical(&p128, a, s); + parts_float_to_float_narrow(&p64, &p128, s); + return float32_round_pack_canonical(&p64, s); } -float32 int64_to_float32(int64_t a, float_status *status) +float64 float128_to_float64(float128 a, float_status *s) { - return int64_to_float32_scalbn(a, 0, status); + FloatParts64 p64; + FloatParts128 p128; + + float128_unpack_canonical(&p128, a, s); + parts_float_to_float_narrow(&p64, &p128, s); + return float64_round_pack_canonical(&p64, s); } -float32 int32_to_float32(int32_t a, float_status *status) +float128 float32_to_float128(float32 a, float_status *s) { - return int64_to_float32_scalbn(a, 0, status); + FloatParts64 p64; + FloatParts128 p128; + + float32_unpack_canonical(&p64, a, s); + parts_float_to_float_widen(&p128, &p64, s); + return float128_round_pack_canonical(&p128, s); } -float32 int16_to_float32(int16_t a, float_status *status) +float128 float64_to_float128(float64 a, float_status *s) { - return int64_to_float32_scalbn(a, 0, status); + FloatParts64 p64; + FloatParts128 p128; + + float64_unpack_canonical(&p64, a, s); + parts_float_to_float_widen(&p128, &p64, s); + return float128_round_pack_canonical(&p128, s); } -float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status) +float32 floatx80_to_float32(floatx80 a, float_status *s) { - FloatParts pa = int_to_float(a, scale, status); - return float64_round_pack_canonical(pa, status); + FloatParts64 p64; + FloatParts128 p128; + + if (floatx80_unpack_canonical(&p128, a, s)) { + parts_float_to_float_narrow(&p64, &p128, s); + } else { + parts_default_nan(&p64, s); + } + return float32_round_pack_canonical(&p64, s); } -float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status) +float64 floatx80_to_float64(floatx80 a, float_status *s) { - return int64_to_float64_scalbn(a, scale, status); + FloatParts64 p64; + FloatParts128 p128; + + if (floatx80_unpack_canonical(&p128, a, s)) { + parts_float_to_float_narrow(&p64, &p128, s); + } else { + parts_default_nan(&p64, s); + } + return float64_round_pack_canonical(&p64, s); } -float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status) +float128 floatx80_to_float128(floatx80 a, float_status *s) { - return int64_to_float64_scalbn(a, scale, status); + FloatParts128 p; + + if (floatx80_unpack_canonical(&p, a, s)) { + parts_float_to_float(&p, s); + } else { + parts_default_nan(&p, s); + } + return float128_round_pack_canonical(&p, s); } -float64 int64_to_float64(int64_t a, float_status *status) +floatx80 float32_to_floatx80(float32 a, float_status *s) { - return int64_to_float64_scalbn(a, 0, status); + FloatParts64 p64; + FloatParts128 p128; + + float32_unpack_canonical(&p64, a, s); + parts_float_to_float_widen(&p128, &p64, s); + return floatx80_round_pack_canonical(&p128, s); } -float64 int32_to_float64(int32_t a, float_status *status) +floatx80 float64_to_floatx80(float64 a, float_status *s) { - return int64_to_float64_scalbn(a, 0, status); + FloatParts64 p64; + FloatParts128 p128; + + float64_unpack_canonical(&p64, a, s); + parts_float_to_float_widen(&p128, &p64, s); + return floatx80_round_pack_canonical(&p128, s); } -float64 int16_to_float64(int16_t a, float_status *status) +floatx80 float128_to_floatx80(float128 a, float_status *s) { - return int64_to_float64_scalbn(a, 0, status); + FloatParts128 p; + + float128_unpack_canonical(&p, a, s); + parts_float_to_float(&p, s); + return floatx80_round_pack_canonical(&p, s); } /* - * Returns the result of converting the two's complement integer `a' - * to the bfloat16 format. + * Round to integral value */ -bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status) +float16 float16_round_to_int(float16 a, float_status *s) { - FloatParts pa = int_to_float(a, scale, status); - return bfloat16_round_pack_canonical(pa, status); + FloatParts64 p; + + float16_unpack_canonical(&p, a, s); + parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params); + return float16_round_pack_canonical(&p, s); } -bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status) +float32 float32_round_to_int(float32 a, float_status *s) { - return int64_to_bfloat16_scalbn(a, scale, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params); + return float32_round_pack_canonical(&p, s); } -bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status) +float64 float64_round_to_int(float64 a, float_status *s) { - return int64_to_bfloat16_scalbn(a, scale, status); + FloatParts64 p; + + float64_unpack_canonical(&p, a, s); + parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params); + return float64_round_pack_canonical(&p, s); } -bfloat16 int64_to_bfloat16(int64_t a, float_status *status) +bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s) { - return int64_to_bfloat16_scalbn(a, 0, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params); + return bfloat16_round_pack_canonical(&p, s); } -bfloat16 int32_to_bfloat16(int32_t a, float_status *status) +float128 float128_round_to_int(float128 a, float_status *s) { - return int64_to_bfloat16_scalbn(a, 0, status); + FloatParts128 p; + + float128_unpack_canonical(&p, a, s); + parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params); + return float128_round_pack_canonical(&p, s); } -bfloat16 int16_to_bfloat16(int16_t a, float_status *status) +floatx80 floatx80_round_to_int(floatx80 a, float_status *status) { - return int64_to_bfloat16_scalbn(a, 0, status); + FloatParts128 p; + + if (!floatx80_unpack_canonical(&p, a, status)) { + return floatx80_default_nan(status); + } + + parts_round_to_int(&p, status->float_rounding_mode, 0, status, + &floatx80_params[status->floatx80_rounding_precision]); + return floatx80_round_pack_canonical(&p, status); } /* - * Unsigned Integer to float conversions - * - * Returns the result of converting the unsigned integer `a' to the - * floating-point format. The conversion is performed according to the - * IEC/IEEE Standard for Binary Floating-Point Arithmetic. + * Floating-point to signed integer conversions */ -static FloatParts uint_to_float(uint64_t a, int scale, float_status *status) +int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) { - FloatParts r = { .sign = false }; - - if (a == 0) { - r.cls = float_class_zero; - } else { - scale = MIN(MAX(scale, -0x10000), 0x10000); - r.cls = float_class_normal; - if ((int64_t)a < 0) { - r.exp = DECOMPOSED_BINARY_POINT + 1 + scale; - shift64RightJamming(a, 1, &a); - r.frac = a; - } else { - int shift = clz64(a) - 1; - r.exp = DECOMPOSED_BINARY_POINT - shift + scale; - r.frac = a << shift; - } - } + FloatParts64 p; - return r; + float16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s); } -float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status) +int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) { - FloatParts pa = uint_to_float(a, scale, status); - return float16_round_pack_canonical(pa, status); + FloatParts64 p; + + float16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); } -float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status) +int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float16_scalbn(a, scale, status); + FloatParts64 p; + + float16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); } -float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status) +int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float16_scalbn(a, scale, status); + FloatParts64 p; + + float16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); } -float16 uint64_to_float16(uint64_t a, float_status *status) +int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); } -float16 uint32_to_float16(uint32_t a, float_status *status) +int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); } -float16 uint16_to_float16(uint16_t a, float_status *status) +int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); } -float16 uint8_to_float16(uint8_t a, float_status *status) +int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float16_scalbn(a, 0, status); + FloatParts64 p; + + float64_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); } -float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status) +int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale, + float_status *s) { - FloatParts pa = uint_to_float(a, scale, status); - return float32_round_pack_canonical(pa, status); + FloatParts64 p; + + float64_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); } -float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status) +int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float32_scalbn(a, scale, status); + FloatParts64 p; + + float64_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); } -float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status) +int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float32_scalbn(a, scale, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s); } -float32 uint64_to_float32(uint64_t a, float_status *status) +int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float32_scalbn(a, 0, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); } -float32 uint32_to_float32(uint32_t a, float_status *status) +int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, + float_status *s) { - return uint64_to_float32_scalbn(a, 0, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); } -float32 uint16_to_float32(uint16_t a, float_status *status) +static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode, + int scale, float_status *s) { - return uint64_to_float32_scalbn(a, 0, status); + FloatParts128 p; + + float128_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); } -float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) +static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode, + int scale, float_status *s) { - FloatParts pa = uint_to_float(a, scale, status); - return float64_round_pack_canonical(pa, status); + FloatParts128 p; + + float128_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); } -float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status) +static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode, + int scale, float_status *s) { - return uint64_to_float64_scalbn(a, scale, status); + FloatParts128 p; + + if (!floatx80_unpack_canonical(&p, a, s)) { + parts_default_nan(&p, s); + } + return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s); } -float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status) +static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode, + int scale, float_status *s) { - return uint64_to_float64_scalbn(a, scale, status); + FloatParts128 p; + + if (!floatx80_unpack_canonical(&p, a, s)) { + parts_default_nan(&p, s); + } + return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); } -float64 uint64_to_float64(uint64_t a, float_status *status) +int8_t float16_to_int8(float16 a, float_status *s) { - return uint64_to_float64_scalbn(a, 0, status); + return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); } -float64 uint32_to_float64(uint32_t a, float_status *status) +int16_t float16_to_int16(float16 a, float_status *s) { - return uint64_to_float64_scalbn(a, 0, status); + return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); } -float64 uint16_to_float64(uint16_t a, float_status *status) +int32_t float16_to_int32(float16 a, float_status *s) { - return uint64_to_float64_scalbn(a, 0, status); + return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); } -/* - * Returns the result of converting the unsigned integer `a' to the - * bfloat16 format. - */ - -bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) +int64_t float16_to_int64(float16 a, float_status *s) { - FloatParts pa = uint_to_float(a, scale, status); - return bfloat16_round_pack_canonical(pa, status); + return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status) +int16_t float32_to_int16(float32 a, float_status *s) { - return uint64_to_bfloat16_scalbn(a, scale, status); + return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status) +int32_t float32_to_int32(float32 a, float_status *s) { - return uint64_to_bfloat16_scalbn(a, scale, status); + return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status) +int64_t float32_to_int64(float32 a, float_status *s) { - return uint64_to_bfloat16_scalbn(a, 0, status); + return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status) +int16_t float64_to_int16(float64 a, float_status *s) { - return uint64_to_bfloat16_scalbn(a, 0, status); + return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status) +int32_t float64_to_int32(float64 a, float_status *s) { - return uint64_to_bfloat16_scalbn(a, 0, status); + return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s); } -/* Float Min/Max */ -/* min() and max() functions. These can't be implemented as - * 'compare and pick one input' because that would mishandle - * NaNs and +0 vs -0. - * - * minnum() and maxnum() functions. These are similar to the min() - * and max() functions but if one of the arguments is a QNaN and - * the other is numerical then the numerical argument is returned. - * SNaNs will get quietened before being returned. - * minnum() and maxnum correspond to the IEEE 754-2008 minNum() - * and maxNum() operations. min() and max() are the typical min/max - * semantics provided by many CPUs which predate that specification. - * - * minnummag() and maxnummag() functions correspond to minNumMag() - * and minNumMag() from the IEEE-754 2008. - */ -static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin, - bool ieee, bool ismag, float_status *s) -{ - if (unlikely(is_nan(a.cls) || is_nan(b.cls))) { - if (ieee) { - /* Takes two floating-point values `a' and `b', one of - * which is a NaN, and returns the appropriate NaN - * result. If either `a' or `b' is a signaling NaN, - * the invalid exception is raised. - */ - if (is_snan(a.cls) || is_snan(b.cls)) { - return pick_nan(a, b, s); - } else if (is_nan(a.cls) && !is_nan(b.cls)) { - return b; - } else if (is_nan(b.cls) && !is_nan(a.cls)) { - return a; - } - } - return pick_nan(a, b, s); - } else { - int a_exp, b_exp; - - switch (a.cls) { - case float_class_normal: - a_exp = a.exp; - break; - case float_class_inf: - a_exp = INT_MAX; - break; - case float_class_zero: - a_exp = INT_MIN; - break; - default: - g_assert_not_reached(); - break; - } - switch (b.cls) { - case float_class_normal: - b_exp = b.exp; - break; - case float_class_inf: - b_exp = INT_MAX; - break; - case float_class_zero: - b_exp = INT_MIN; - break; - default: - g_assert_not_reached(); - break; - } - - if (ismag && (a_exp != b_exp || a.frac != b.frac)) { - bool a_less = a_exp < b_exp; - if (a_exp == b_exp) { - a_less = a.frac < b.frac; - } - return a_less ^ ismin ? b : a; - } - - if (a.sign == b.sign) { - bool a_less = a_exp < b_exp; - if (a_exp == b_exp) { - a_less = a.frac < b.frac; - } - return a.sign ^ a_less ^ ismin ? b : a; - } else { - return a.sign ^ ismin ? b : a; - } - } +int64_t float64_to_int64(float64 a, float_status *s) +{ + return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s); } -#define MINMAX(sz, name, ismin, isiee, ismag) \ -float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \ - float_status *s) \ -{ \ - FloatParts pa = float ## sz ## _unpack_canonical(a, s); \ - FloatParts pb = float ## sz ## _unpack_canonical(b, s); \ - FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \ - \ - return float ## sz ## _round_pack_canonical(pr, s); \ -} - -MINMAX(16, min, true, false, false) -MINMAX(16, minnum, true, true, false) -MINMAX(16, minnummag, true, true, true) -MINMAX(16, max, false, false, false) -MINMAX(16, maxnum, false, true, false) -MINMAX(16, maxnummag, false, true, true) - -MINMAX(32, min, true, false, false) -MINMAX(32, minnum, true, true, false) -MINMAX(32, minnummag, true, true, true) -MINMAX(32, max, false, false, false) -MINMAX(32, maxnum, false, true, false) -MINMAX(32, maxnummag, false, true, true) - -MINMAX(64, min, true, false, false) -MINMAX(64, minnum, true, true, false) -MINMAX(64, minnummag, true, true, true) -MINMAX(64, max, false, false, false) -MINMAX(64, maxnum, false, true, false) -MINMAX(64, maxnummag, false, true, true) - -#undef MINMAX - -#define BF16_MINMAX(name, ismin, isiee, ismag) \ -bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \ -{ \ - FloatParts pa = bfloat16_unpack_canonical(a, s); \ - FloatParts pb = bfloat16_unpack_canonical(b, s); \ - FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \ - \ - return bfloat16_round_pack_canonical(pr, s); \ -} - -BF16_MINMAX(min, true, false, false) -BF16_MINMAX(minnum, true, true, false) -BF16_MINMAX(minnummag, true, true, true) -BF16_MINMAX(max, false, false, false) -BF16_MINMAX(maxnum, false, true, false) -BF16_MINMAX(maxnummag, false, true, true) - -#undef BF16_MINMAX - -/* Floating point compare */ -static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet, - float_status *s) -{ - if (is_nan(a.cls) || is_nan(b.cls)) { - if (!is_quiet || - a.cls == float_class_snan || - b.cls == float_class_snan) { - s->float_exception_flags |= float_flag_invalid; - } - return float_relation_unordered; - } - - if (a.cls == float_class_zero) { - if (b.cls == float_class_zero) { - return float_relation_equal; - } - return b.sign ? float_relation_greater : float_relation_less; - } else if (b.cls == float_class_zero) { - return a.sign ? float_relation_less : float_relation_greater; - } - - /* The only really important thing about infinity is its sign. If - * both are infinities the sign marks the smallest of the two. - */ - if (a.cls == float_class_inf) { - if ((b.cls == float_class_inf) && (a.sign == b.sign)) { - return float_relation_equal; - } - return a.sign ? float_relation_less : float_relation_greater; - } else if (b.cls == float_class_inf) { - return b.sign ? float_relation_greater : float_relation_less; - } - - if (a.sign != b.sign) { - return a.sign ? float_relation_less : float_relation_greater; - } - - if (a.exp == b.exp) { - if (a.frac == b.frac) { - return float_relation_equal; - } - if (a.sign) { - return a.frac > b.frac ? - float_relation_less : float_relation_greater; - } else { - return a.frac > b.frac ? - float_relation_greater : float_relation_less; - } - } else { - if (a.sign) { - return a.exp > b.exp ? float_relation_less : float_relation_greater; - } else { - return a.exp > b.exp ? float_relation_greater : float_relation_less; - } - } +int32_t float128_to_int32(float128 a, float_status *s) +{ + return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s); } -#define COMPARE(name, attr, sz) \ -static int attr \ -name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \ -{ \ - FloatParts pa = float ## sz ## _unpack_canonical(a, s); \ - FloatParts pb = float ## sz ## _unpack_canonical(b, s); \ - return compare_floats(pa, pb, is_quiet, s); \ +int64_t float128_to_int64(float128 a, float_status *s) +{ + return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s); } -COMPARE(soft_f16_compare, QEMU_FLATTEN, 16) -COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32) -COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64) - -#undef COMPARE - -FloatRelation float16_compare(float16 a, float16 b, float_status *s) +int32_t floatx80_to_int32(floatx80 a, float_status *s) { - return soft_f16_compare(a, b, false, s); + return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s); } -FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s) +int64_t floatx80_to_int64(floatx80 a, float_status *s) { - return soft_f16_compare(a, b, true, s); + return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s); } -static FloatRelation QEMU_FLATTEN -f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s) +int16_t float16_to_int16_round_to_zero(float16 a, float_status *s) { - union_float32 ua, ub; - - ua.s = xa; - ub.s = xb; - - if (QEMU_NO_HARDFLOAT) { - goto soft; - } - - float32_input_flush2(&ua.s, &ub.s, s); - if (isgreaterequal(ua.h, ub.h)) { - if (isgreater(ua.h, ub.h)) { - return float_relation_greater; - } - return float_relation_equal; - } - if (likely(isless(ua.h, ub.h))) { - return float_relation_less; - } - /* The only condition remaining is unordered. - * Fall through to set flags. - */ - soft: - return soft_f32_compare(ua.s, ub.s, is_quiet, s); + return float16_to_int16_scalbn(a, float_round_to_zero, 0, s); } -FloatRelation float32_compare(float32 a, float32 b, float_status *s) +int32_t float16_to_int32_round_to_zero(float16 a, float_status *s) { - return f32_compare(a, b, false, s); + return float16_to_int32_scalbn(a, float_round_to_zero, 0, s); } -FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s) +int64_t float16_to_int64_round_to_zero(float16 a, float_status *s) { - return f32_compare(a, b, true, s); + return float16_to_int64_scalbn(a, float_round_to_zero, 0, s); } -static FloatRelation QEMU_FLATTEN -f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s) +int16_t float32_to_int16_round_to_zero(float32 a, float_status *s) { - union_float64 ua, ub; - - ua.s = xa; - ub.s = xb; - - if (QEMU_NO_HARDFLOAT) { - goto soft; - } + return float32_to_int16_scalbn(a, float_round_to_zero, 0, s); +} - float64_input_flush2(&ua.s, &ub.s, s); - if (isgreaterequal(ua.h, ub.h)) { - if (isgreater(ua.h, ub.h)) { - return float_relation_greater; - } - return float_relation_equal; - } - if (likely(isless(ua.h, ub.h))) { - return float_relation_less; - } - /* The only condition remaining is unordered. - * Fall through to set flags. - */ - soft: - return soft_f64_compare(ua.s, ub.s, is_quiet, s); +int32_t float32_to_int32_round_to_zero(float32 a, float_status *s) +{ + return float32_to_int32_scalbn(a, float_round_to_zero, 0, s); } -FloatRelation float64_compare(float64 a, float64 b, float_status *s) +int64_t float32_to_int64_round_to_zero(float32 a, float_status *s) { - return f64_compare(a, b, false, s); + return float32_to_int64_scalbn(a, float_round_to_zero, 0, s); } -FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s) +int16_t float64_to_int16_round_to_zero(float64 a, float_status *s) { - return f64_compare(a, b, true, s); + return float64_to_int16_scalbn(a, float_round_to_zero, 0, s); } -static FloatRelation QEMU_FLATTEN -soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s) +int32_t float64_to_int32_round_to_zero(float64 a, float_status *s) { - FloatParts pa = bfloat16_unpack_canonical(a, s); - FloatParts pb = bfloat16_unpack_canonical(b, s); - return compare_floats(pa, pb, is_quiet, s); + return float64_to_int32_scalbn(a, float_round_to_zero, 0, s); } -FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s) +int64_t float64_to_int64_round_to_zero(float64 a, float_status *s) { - return soft_bf16_compare(a, b, false, s); + return float64_to_int64_scalbn(a, float_round_to_zero, 0, s); } -FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s) +int32_t float128_to_int32_round_to_zero(float128 a, float_status *s) { - return soft_bf16_compare(a, b, true, s); + return float128_to_int32_scalbn(a, float_round_to_zero, 0, s); } -/* Multiply A by 2 raised to the power N. */ -static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s) +int64_t float128_to_int64_round_to_zero(float128 a, float_status *s) { - if (unlikely(is_nan(a.cls))) { - return return_nan(a, s); - } - if (a.cls == float_class_normal) { - /* The largest float type (even though not supported by FloatParts) - * is float128, which has a 15 bit exponent. Bounding N to 16 bits - * still allows rounding to infinity, without allowing overflow - * within the int32_t that backs FloatParts.exp. - */ - n = MIN(MAX(n, -0x10000), 0x10000); - a.exp += n; - } - return a; + return float128_to_int64_scalbn(a, float_round_to_zero, 0, s); } -float16 float16_scalbn(float16 a, int n, float_status *status) +int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s) { - FloatParts pa = float16_unpack_canonical(a, status); - FloatParts pr = scalbn_decomposed(pa, n, status); - return float16_round_pack_canonical(pr, status); + return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s); } -float32 float32_scalbn(float32 a, int n, float_status *status) +int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s) { - FloatParts pa = float32_unpack_canonical(a, status); - FloatParts pr = scalbn_decomposed(pa, n, status); - return float32_round_pack_canonical(pr, status); + return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s); } -float64 float64_scalbn(float64 a, int n, float_status *status) +int16_t bfloat16_to_int16(bfloat16 a, float_status *s) { - FloatParts pa = float64_unpack_canonical(a, status); - FloatParts pr = scalbn_decomposed(pa, n, status); - return float64_round_pack_canonical(pr, status); + return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status) +int32_t bfloat16_to_int32(bfloat16 a, float_status *s) { - FloatParts pa = bfloat16_unpack_canonical(a, status); - FloatParts pr = scalbn_decomposed(pa, n, status); - return bfloat16_round_pack_canonical(pr, status); + return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s); +} + +int64_t bfloat16_to_int64(bfloat16 a, float_status *s) +{ + return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); +} + +int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s) +{ + return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s); +} + +int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s) +{ + return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s); +} + +int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s) +{ + return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s); } /* - * Square Root - * - * The old softfloat code did an approximation step before zeroing in - * on the final result. However for simpleness we just compute the - * square root by iterating down from the implicit bit to enough extra - * bits to ensure we get a correctly rounded result. - * - * This does mean however the calculation is slower than before, - * especially for 64 bit floats. + * Floating-point to unsigned integer conversions */ -static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p) +uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) { - uint64_t a_frac, r_frac, s_frac; - int bit, last_bit; + FloatParts64 p; - if (is_nan(a.cls)) { - return return_nan(a, s); - } - if (a.cls == float_class_zero) { - return a; /* sqrt(+-0) = +-0 */ - } - if (a.sign) { - s->float_exception_flags |= float_flag_invalid; - return parts_default_nan(s); - } - if (a.cls == float_class_inf) { - return a; /* sqrt(+inf) = +inf */ - } + float16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s); +} - assert(a.cls == float_class_normal); +uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) +{ + FloatParts64 p; - /* We need two overflow bits at the top. Adding room for that is a - * right shift. If the exponent is odd, we can discard the low bit - * by multiplying the fraction by 2; that's a left shift. Combine - * those and we shift right if the exponent is even. - */ - a_frac = a.frac; - if (!(a.exp & 1)) { - a_frac >>= 1; - } - a.exp >>= 1; + float16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); +} - /* Bit-by-bit computation of sqrt. */ - r_frac = 0; - s_frac = 0; +uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) +{ + FloatParts64 p; - /* Iterate from implicit bit down to the 3 extra bits to compute a - * properly rounded result. Remember we've inserted one more bit - * at the top, so these positions are one less. - */ - bit = DECOMPOSED_BINARY_POINT - 1; - last_bit = MAX(p->frac_shift - 4, 0); - do { - uint64_t q = 1ULL << bit; - uint64_t t_frac = s_frac + q; - if (t_frac <= a_frac) { - s_frac = t_frac + q; - a_frac -= t_frac; - r_frac += q; - } - a_frac <<= 1; - } while (--bit >= last_bit); + float16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); +} - /* Undo the right shift done above. If there is any remaining - * fraction, the result is inexact. Set the sticky bit. - */ - a.frac = (r_frac << 1) + (a_frac != 0); +uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale, + float_status *s) +{ + FloatParts64 p; - return a; + float16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); } -float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status) +uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale, + float_status *s) { - FloatParts pa = float16_unpack_canonical(a, status); - FloatParts pr = sqrt_float(pa, status, &float16_params); - return float16_round_pack_canonical(pr, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); } -static float32 QEMU_SOFTFLOAT_ATTR -soft_f32_sqrt(float32 a, float_status *status) +uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale, + float_status *s) { - FloatParts pa = float32_unpack_canonical(a, status); - FloatParts pr = sqrt_float(pa, status, &float32_params); - return float32_round_pack_canonical(pr, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); } -static float64 QEMU_SOFTFLOAT_ATTR -soft_f64_sqrt(float64 a, float_status *status) +uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale, + float_status *s) { - FloatParts pa = float64_unpack_canonical(a, status); - FloatParts pr = sqrt_float(pa, status, &float64_params); - return float64_round_pack_canonical(pr, status); + FloatParts64 p; + + float32_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); } -float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s) +uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale, + float_status *s) { - union_float32 ua, ur; + FloatParts64 p; - ua.s = xa; - if (unlikely(!can_use_fpu(s))) { - goto soft; - } + float64_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); +} - float32_input_flush1(&ua.s, s); - if (QEMU_HARDFLOAT_1F32_USE_FP) { - if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || - fpclassify(ua.h) == FP_ZERO) || - signbit(ua.h))) { - goto soft; - } - } else if (unlikely(!float32_is_zero_or_normal(ua.s) || - float32_is_neg(ua.s))) { - goto soft; - } - ur.h = sqrtf(ua.h); - return ur.s; +uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale, + float_status *s) +{ + FloatParts64 p; - soft: - return soft_f32_sqrt(ua.s, s); + float64_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); } -float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s) +uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, + float_status *s) { - union_float64 ua, ur; + FloatParts64 p; - ua.s = xa; - if (unlikely(!can_use_fpu(s))) { - goto soft; - } + float64_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); +} - float64_input_flush1(&ua.s, s); - if (QEMU_HARDFLOAT_1F64_USE_FP) { - if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || - fpclassify(ua.h) == FP_ZERO) || - signbit(ua.h))) { - goto soft; - } - } else if (unlikely(!float64_is_zero_or_normal(ua.s) || - float64_is_neg(ua.s))) { - goto soft; - } - ur.h = sqrt(ua.h); - return ur.s; +uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode, + int scale, float_status *s) +{ + FloatParts64 p; - soft: - return soft_f64_sqrt(ua.s, s); + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s); } -bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status) +uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode, + int scale, float_status *s) { - FloatParts pa = bfloat16_unpack_canonical(a, status); - FloatParts pr = sqrt_float(pa, status, &bfloat16_params); - return bfloat16_round_pack_canonical(pr, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); } -/*---------------------------------------------------------------------------- -| The pattern for a default generated NaN. -*----------------------------------------------------------------------------*/ +uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode, + int scale, float_status *s) +{ + FloatParts64 p; -float16 float16_default_nan(float_status *status) + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); +} + +static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode, + int scale, float_status *s) { - FloatParts p = parts_default_nan(status); - p.frac >>= float16_params.frac_shift; - return float16_pack_raw(p); + FloatParts128 p; + + float128_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s); } -float32 float32_default_nan(float_status *status) +static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode, + int scale, float_status *s) { - FloatParts p = parts_default_nan(status); - p.frac >>= float32_params.frac_shift; - return float32_pack_raw(p); + FloatParts128 p; + + float128_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); } -float64 float64_default_nan(float_status *status) +uint8_t float16_to_uint8(float16 a, float_status *s) { - FloatParts p = parts_default_nan(status); - p.frac >>= float64_params.frac_shift; - return float64_pack_raw(p); + return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); } -float128 float128_default_nan(float_status *status) +uint16_t float16_to_uint16(float16 a, float_status *s) { - FloatParts p = parts_default_nan(status); - float128 r; + return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); +} - /* Extrapolate from the choices made by parts_default_nan to fill - * in the quad-floating format. If the low bit is set, assume we - * want to set all non-snan bits. - */ - r.low = -(p.frac & 1); - r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48); - r.high |= UINT64_C(0x7FFF000000000000); - r.high |= (uint64_t)p.sign << 63; +uint32_t float16_to_uint32(float16 a, float_status *s) +{ + return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); +} - return r; +uint64_t float16_to_uint64(float16 a, float_status *s) +{ + return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 bfloat16_default_nan(float_status *status) +uint16_t float32_to_uint16(float32 a, float_status *s) { - FloatParts p = parts_default_nan(status); - p.frac >>= bfloat16_params.frac_shift; - return bfloat16_pack_raw(p); + return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); } -/*---------------------------------------------------------------------------- -| Returns a quiet NaN from a signalling NaN for the floating point value `a'. -*----------------------------------------------------------------------------*/ +uint32_t float32_to_uint32(float32 a, float_status *s) +{ + return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); +} -float16 float16_silence_nan(float16 a, float_status *status) +uint64_t float32_to_uint64(float32 a, float_status *s) { - FloatParts p = float16_unpack_raw(a); - p.frac <<= float16_params.frac_shift; - p = parts_silence_nan(p, status); - p.frac >>= float16_params.frac_shift; - return float16_pack_raw(p); + return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); } -float32 float32_silence_nan(float32 a, float_status *status) +uint16_t float64_to_uint16(float64 a, float_status *s) { - FloatParts p = float32_unpack_raw(a); - p.frac <<= float32_params.frac_shift; - p = parts_silence_nan(p, status); - p.frac >>= float32_params.frac_shift; - return float32_pack_raw(p); + return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); } -float64 float64_silence_nan(float64 a, float_status *status) +uint32_t float64_to_uint32(float64 a, float_status *s) { - FloatParts p = float64_unpack_raw(a); - p.frac <<= float64_params.frac_shift; - p = parts_silence_nan(p, status); - p.frac >>= float64_params.frac_shift; - return float64_pack_raw(p); + return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); } -bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status) +uint64_t float64_to_uint64(float64 a, float_status *s) { - FloatParts p = bfloat16_unpack_raw(a); - p.frac <<= bfloat16_params.frac_shift; - p = parts_silence_nan(p, status); - p.frac >>= bfloat16_params.frac_shift; - return bfloat16_pack_raw(p); + return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); } -/*---------------------------------------------------------------------------- -| If `a' is denormal and we are in flush-to-zero mode then set the -| input-denormal exception and return zero. Otherwise just return the value. -*----------------------------------------------------------------------------*/ +uint32_t float128_to_uint32(float128 a, float_status *s) +{ + return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); +} -static bool parts_squash_denormal(FloatParts p, float_status *status) +uint64_t float128_to_uint64(float128 a, float_status *s) { - if (p.exp == 0 && p.frac != 0) { - float_raise(float_flag_input_denormal, status); - return true; - } + return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); +} - return false; +uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s) +{ + return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s); } -float16 float16_squash_input_denormal(float16 a, float_status *status) +uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s) { - if (status->flush_inputs_to_zero) { - FloatParts p = float16_unpack_raw(a); - if (parts_squash_denormal(p, status)) { - return float16_set_sign(float16_zero, p.sign); - } - } - return a; + return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s); } -float32 float32_squash_input_denormal(float32 a, float_status *status) +uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s) { - if (status->flush_inputs_to_zero) { - FloatParts p = float32_unpack_raw(a); - if (parts_squash_denormal(p, status)) { - return float32_set_sign(float32_zero, p.sign); - } - } - return a; + return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s); } -float64 float64_squash_input_denormal(float64 a, float_status *status) +uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s) { - if (status->flush_inputs_to_zero) { - FloatParts p = float64_unpack_raw(a); - if (parts_squash_denormal(p, status)) { - return float64_set_sign(float64_zero, p.sign); - } - } - return a; + return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s); } -bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status) +uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s) { - if (status->flush_inputs_to_zero) { - FloatParts p = bfloat16_unpack_raw(a); - if (parts_squash_denormal(p, status)) { - return bfloat16_set_sign(bfloat16_zero, p.sign); - } - } - return a; + return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s); } -/*---------------------------------------------------------------------------- -| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 -| and 7, and returns the properly rounded 32-bit integer corresponding to the -| input. If `zSign' is 1, the input is negated before being converted to an -| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input -| is simply rounded to an integer, with the inexact exception raised if the -| input cannot be represented exactly as an integer. However, if the fixed- -| point input is too large, the invalid exception is raised and the largest -| positive or negative integer is returned. -*----------------------------------------------------------------------------*/ +uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s) +{ + return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s); +} -static int32_t roundAndPackInt32(bool zSign, uint64_t absZ, - float_status *status) +uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s) { - int8_t roundingMode; - bool roundNearestEven; - int8_t roundIncrement, roundBits; - int32_t z; + return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s); +} - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - roundIncrement = 0x40; - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : 0x7f; - break; - case float_round_down: - roundIncrement = zSign ? 0x7f : 0; - break; - case float_round_to_odd: - roundIncrement = absZ & 0x80 ? 0 : 0x7f; - break; - default: - abort(); - } - roundBits = absZ & 0x7F; - absZ = ( absZ + roundIncrement )>>7; - if (!(roundBits ^ 0x40) && roundNearestEven) { - absZ &= ~1; - } - z = absZ; - if ( zSign ) z = - z; - if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { - float_raise(float_flag_invalid, status); - return zSign ? INT32_MIN : INT32_MAX; - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - return z; +uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s) +{ + return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s); +} +uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s) +{ + return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s); } -/*---------------------------------------------------------------------------- -| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and -| `absZ1', with binary point between bits 63 and 64 (between the input words), -| and returns the properly rounded 64-bit integer corresponding to the input. -| If `zSign' is 1, the input is negated before being converted to an integer. -| Ordinarily, the fixed-point input is simply rounded to an integer, with -| the inexact exception raised if the input cannot be represented exactly as -| an integer. However, if the fixed-point input is too large, the invalid -| exception is raised and the largest positive or negative integer is -| returned. -*----------------------------------------------------------------------------*/ +uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s) +{ + return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s); +} -static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1, - float_status *status) +uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s) { - int8_t roundingMode; - bool roundNearestEven, increment; - int64_t z; + return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s); +} - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t) absZ1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && absZ1; - break; - case float_round_down: - increment = zSign && absZ1; - break; - case float_round_to_odd: - increment = !(absZ0 & 1) && absZ1; - break; - default: - abort(); - } - if ( increment ) { - ++absZ0; - if ( absZ0 == 0 ) goto overflow; - if (!(absZ1 << 1) && roundNearestEven) { - absZ0 &= ~1; - } - } - z = absZ0; - if ( zSign ) z = - z; - if ( z && ( ( z < 0 ) ^ zSign ) ) { - overflow: - float_raise(float_flag_invalid, status); - return zSign ? INT64_MIN : INT64_MAX; - } - if (absZ1) { - status->float_exception_flags |= float_flag_inexact; - } - return z; +uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s) +{ + return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); +} +uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s) +{ + return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s); } -/*---------------------------------------------------------------------------- -| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and -| `absZ1', with binary point between bits 63 and 64 (between the input words), -| and returns the properly rounded 64-bit unsigned integer corresponding to the -| input. Ordinarily, the fixed-point input is simply rounded to an integer, -| with the inexact exception raised if the input cannot be represented exactly -| as an integer. However, if the fixed-point input is too large, the invalid -| exception is raised and the largest unsigned integer is returned. -*----------------------------------------------------------------------------*/ +uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s) +{ + return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); +} -static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0, - uint64_t absZ1, float_status *status) +uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s) { - int8_t roundingMode; - bool roundNearestEven, increment; + return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s); +} - roundingMode = status->float_rounding_mode; - roundNearestEven = (roundingMode == float_round_nearest_even); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)absZ1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && absZ1; - break; - case float_round_down: - increment = zSign && absZ1; - break; - case float_round_to_odd: - increment = !(absZ0 & 1) && absZ1; - break; - default: - abort(); - } - if (increment) { - ++absZ0; - if (absZ0 == 0) { - float_raise(float_flag_invalid, status); - return UINT64_MAX; - } - if (!(absZ1 << 1) && roundNearestEven) { - absZ0 &= ~1; - } - } +uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s) +{ + return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s); +} - if (zSign && absZ0) { - float_raise(float_flag_invalid, status); - return 0; - } +uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s) +{ + return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s); +} - if (absZ1) { - status->float_exception_flags |= float_flag_inexact; - } - return absZ0; +/* + * Signed integer to floating-point conversions + */ + +float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status) +{ + FloatParts64 p; + + parts_sint_to_float(&p, a, scale, status); + return float16_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Normalizes the subnormal single-precision floating-point value represented -| by the denormalized significand `aSig'. The normalized exponent and -| significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ +float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status) +{ + return int64_to_float16_scalbn(a, scale, status); +} -static void - normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr) +float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status) { - int8_t shiftCount; + return int64_to_float16_scalbn(a, scale, status); +} - shiftCount = clz32(aSig) - 8; - *zSigPtr = aSig<float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - roundIncrement = 0x40; - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : 0x7f; - break; - case float_round_down: - roundIncrement = zSign ? 0x7f : 0; - break; - case float_round_to_odd: - roundIncrement = zSig & 0x80 ? 0 : 0x7f; - break; - default: - abort(); - break; - } - roundBits = zSig & 0x7F; - if ( 0xFD <= (uint16_t) zExp ) { - if ( ( 0xFD < zExp ) - || ( ( zExp == 0xFD ) - && ( (int32_t) ( zSig + roundIncrement ) < 0 ) ) - ) { - bool overflow_to_inf = roundingMode != float_round_to_odd && - roundIncrement != 0; - float_raise(float_flag_overflow | float_flag_inexact, status); - return packFloat32(zSign, 0xFF, -!overflow_to_inf); - } - if ( zExp < 0 ) { - if (status->flush_to_zero) { - float_raise(float_flag_output_denormal, status); - return packFloat32(zSign, 0, 0); - } - isTiny = status->tininess_before_rounding - || (zExp < -1) - || (zSig + roundIncrement < 0x80000000); - shift32RightJamming( zSig, - zExp, &zSig ); - zExp = 0; - roundBits = zSig & 0x7F; - if (isTiny && roundBits) { - float_raise(float_flag_underflow, status); - } - if (roundingMode == float_round_to_odd) { - /* - * For round-to-odd case, the roundIncrement depends on - * zSig which just changed. - */ - roundIncrement = zSig & 0x80 ? 0 : 0x7f; - } - } - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig = ( zSig + roundIncrement )>>7; - if (!(roundBits ^ 0x40) && roundNearestEven) { - zSig &= ~1; +float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status) +{ + FloatParts64 p; + + /* Without scaling, there are no overflow concerns. */ + if (likely(scale == 0) && can_use_fpu(status)) { + union_float32 ur; + ur.h = a; + return ur.s; } - if ( zSig == 0 ) zExp = 0; - return packFloat32( zSign, zExp, zSig ); + parts64_sint_to_float(&p, a, scale, status); + return float32_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand `zSig', and returns the proper single-precision floating- -| point value corresponding to the abstract input. This routine is just like -| `roundAndPackFloat32' except that `zSig' does not have to be normalized. -| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' -| floating-point exponent. -*----------------------------------------------------------------------------*/ +float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status) +{ + return int64_to_float32_scalbn(a, scale, status); +} -static float32 - normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, - float_status *status) +float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status) { - int8_t shiftCount; + return int64_to_float32_scalbn(a, scale, status); +} - shiftCount = clz32(zSig) - 1; - return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - roundIncrement = 0x200; - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : 0x3ff; - break; - case float_round_down: - roundIncrement = zSign ? 0x3ff : 0; - break; - case float_round_to_odd: - roundIncrement = (zSig & 0x400) ? 0 : 0x3ff; - break; - default: - abort(); - } - roundBits = zSig & 0x3FF; - if ( 0x7FD <= (uint16_t) zExp ) { - if ( ( 0x7FD < zExp ) - || ( ( zExp == 0x7FD ) - && ( (int64_t) ( zSig + roundIncrement ) < 0 ) ) - ) { - bool overflow_to_inf = roundingMode != float_round_to_odd && - roundIncrement != 0; - float_raise(float_flag_overflow | float_flag_inexact, status); - return packFloat64(zSign, 0x7FF, -(!overflow_to_inf)); - } - if ( zExp < 0 ) { - if (status->flush_to_zero) { - float_raise(float_flag_output_denormal, status); - return packFloat64(zSign, 0, 0); - } - isTiny = status->tininess_before_rounding - || (zExp < -1) - || (zSig + roundIncrement < UINT64_C(0x8000000000000000)); - shift64RightJamming( zSig, - zExp, &zSig ); - zExp = 0; - roundBits = zSig & 0x3FF; - if (isTiny && roundBits) { - float_raise(float_flag_underflow, status); - } - if (roundingMode == float_round_to_odd) { - /* - * For round-to-odd case, the roundIncrement depends on - * zSig which just changed. - */ - roundIncrement = (zSig & 0x400) ? 0 : 0x3ff; - } - } - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig = ( zSig + roundIncrement )>>10; - if (!(roundBits ^ 0x200) && roundNearestEven) { - zSig &= ~1; - } - if ( zSig == 0 ) zExp = 0; - return packFloat64( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand `zSig', and returns the proper double-precision floating- -| point value corresponding to the abstract input. This routine is just like -| `roundAndPackFloat64' except that `zSig' does not have to be normalized. -| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' -| floating-point exponent. -*----------------------------------------------------------------------------*/ - -static float64 - normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, - float_status *status) -{ - int8_t shiftCount; - - shiftCount = clz64(zSig) - 1; - return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - if ( roundingPrecision == 80 ) goto precision80; - if ( roundingPrecision == 64 ) { - roundIncrement = UINT64_C(0x0000000000000400); - roundMask = UINT64_C(0x00000000000007FF); - } - else if ( roundingPrecision == 32 ) { - roundIncrement = UINT64_C(0x0000008000000000); - roundMask = UINT64_C(0x000000FFFFFFFFFF); - } - else { - goto precision80; - } - zSig0 |= ( zSig1 != 0 ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : roundMask; - break; - case float_round_down: - roundIncrement = zSign ? roundMask : 0; - break; - default: - abort(); - } - roundBits = zSig0 & roundMask; - if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { - if ( ( 0x7FFE < zExp ) - || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) - ) { - goto overflow; - } - if ( zExp <= 0 ) { - if (status->flush_to_zero) { - float_raise(float_flag_output_denormal, status); - return packFloatx80(zSign, 0, 0); - } - isTiny = status->tininess_before_rounding - || (zExp < 0 ) - || (zSig0 <= zSig0 + roundIncrement); - shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); - zExp = 0; - roundBits = zSig0 & roundMask; - if (isTiny && roundBits) { - float_raise(float_flag_underflow, status); - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig0 += roundIncrement; - if ( (int64_t) zSig0 < 0 ) zExp = 1; - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig0 += roundIncrement; - if ( zSig0 < roundIncrement ) { - ++zExp; - zSig0 = UINT64_C(0x8000000000000000); - } - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - return packFloatx80( zSign, zExp, zSig0 ); - precision80: - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig1; - break; - case float_round_down: - increment = zSign && zSig1; - break; - default: - abort(); - } - if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { - if ( ( 0x7FFE < zExp ) - || ( ( zExp == 0x7FFE ) - && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) ) - && increment - ) - ) { - roundMask = 0; - overflow: - float_raise(float_flag_overflow | float_flag_inexact, status); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return packFloatx80( zSign, 0x7FFE, ~ roundMask ); - } - return packFloatx80(zSign, - floatx80_infinity_high, - floatx80_infinity_low); - } - if ( zExp <= 0 ) { - isTiny = status->tininess_before_rounding - || (zExp < 0) - || !increment - || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF)); - shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); - zExp = 0; - if (isTiny && zSig1) { - float_raise(float_flag_underflow, status); - } - if (zSig1) { - status->float_exception_flags |= float_flag_inexact; - } - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig1; - break; - case float_round_down: - increment = zSign && zSig1; - break; - default: - abort(); - } - if ( increment ) { - ++zSig0; - if (!(zSig1 << 1) && roundNearestEven) { - zSig0 &= ~1; - } - if ( (int64_t) zSig0 < 0 ) zExp = 1; - } - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if (zSig1) { - status->float_exception_flags |= float_flag_inexact; - } - if ( increment ) { - ++zSig0; - if ( zSig0 == 0 ) { - ++zExp; - zSig0 = UINT64_C(0x8000000000000000); - } - else { - if (!(zSig1 << 1) && roundNearestEven) { - zSig0 &= ~1; - } - } - } - else { - if ( zSig0 == 0 ) zExp = 0; - } - return packFloatx80( zSign, zExp, zSig0 ); - -} - -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent -| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', -| and returns the proper extended double-precision floating-point value -| corresponding to the abstract input. This routine is just like -| `roundAndPackFloatx80' except that the input significand does not have to be -| normalized. -*----------------------------------------------------------------------------*/ - -floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, - bool zSign, int32_t zExp, - uint64_t zSig0, uint64_t zSig1, - float_status *status) -{ - int8_t shiftCount; - - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = clz64(zSig0); - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - zExp -= shiftCount; - return roundAndPackFloatx80(roundingPrecision, zSign, zExp, - zSig0, zSig1, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the least-significant 64 fraction bits of the quadruple-precision -| floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline uint64_t extractFloat128Frac1( float128 a ) -{ - - return a.low; - -} - -/*---------------------------------------------------------------------------- -| Returns the most-significant 48 fraction bits of the quadruple-precision -| floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline uint64_t extractFloat128Frac0( float128 a ) -{ - - return a.high & UINT64_C(0x0000FFFFFFFFFFFF); - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the quadruple-precision floating-point value -| `a'. -*----------------------------------------------------------------------------*/ - -static inline int32_t extractFloat128Exp( float128 a ) -{ - - return ( a.high>>48 ) & 0x7FFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the quadruple-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline bool extractFloat128Sign(float128 a) -{ - return a.high >> 63; -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal quadruple-precision floating-point value -| represented by the denormalized significand formed by the concatenation of -| `aSig0' and `aSig1'. The normalized exponent is stored at the location -| pointed to by `zExpPtr'. The most significant 49 bits of the normalized -| significand are stored at the location pointed to by `zSig0Ptr', and the -| least significant 64 bits of the normalized significand are stored at the -| location pointed to by `zSig1Ptr'. -*----------------------------------------------------------------------------*/ - -static void - normalizeFloat128Subnormal( - uint64_t aSig0, - uint64_t aSig1, - int32_t *zExpPtr, - uint64_t *zSig0Ptr, - uint64_t *zSig1Ptr - ) -{ - int8_t shiftCount; - - if ( aSig0 == 0 ) { - shiftCount = clz64(aSig1) - 15; - if ( shiftCount < 0 ) { - *zSig0Ptr = aSig1>>( - shiftCount ); - *zSig1Ptr = aSig1<<( shiftCount & 63 ); - } - else { - *zSig0Ptr = aSig1<float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig2 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig2; - break; - case float_round_down: - increment = zSign && zSig2; - break; - case float_round_to_odd: - increment = !(zSig1 & 0x1) && zSig2; - break; - default: - abort(); - } - if ( 0x7FFD <= (uint32_t) zExp ) { - if ( ( 0x7FFD < zExp ) - || ( ( zExp == 0x7FFD ) - && eq128( - UINT64_C(0x0001FFFFFFFFFFFF), - UINT64_C(0xFFFFFFFFFFFFFFFF), - zSig0, - zSig1 - ) - && increment - ) - ) { - float_raise(float_flag_overflow | float_flag_inexact, status); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - || (roundingMode == float_round_to_odd) - ) { - return - packFloat128( - zSign, - 0x7FFE, - UINT64_C(0x0000FFFFFFFFFFFF), - UINT64_C(0xFFFFFFFFFFFFFFFF) - ); - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( zExp < 0 ) { - if (status->flush_to_zero) { - float_raise(float_flag_output_denormal, status); - return packFloat128(zSign, 0, 0, 0); - } - isTiny = status->tininess_before_rounding - || (zExp < -1) - || !increment - || lt128(zSig0, zSig1, - UINT64_C(0x0001FFFFFFFFFFFF), - UINT64_C(0xFFFFFFFFFFFFFFFF)); - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); - zExp = 0; - if (isTiny && zSig2) { - float_raise(float_flag_underflow, status); - } - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig2 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig2; - break; - case float_round_down: - increment = zSign && zSig2; - break; - case float_round_to_odd: - increment = !(zSig1 & 0x1) && zSig2; - break; - default: - abort(); - } - } - } - if (zSig2) { - status->float_exception_flags |= float_flag_inexact; - } - if ( increment ) { - add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); - if ((zSig2 + zSig2 == 0) && roundNearestEven) { - zSig1 &= ~1; - } - } - else { - if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; - } - return packFloat128( zSign, zExp, zSig0, zSig1 ); + FloatParts64 p; + parts_sint_to_float(&p, a, scale, status); + return bfloat16_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand formed by the concatenation of `zSig0' and `zSig1', and -| returns the proper quadruple-precision floating-point value corresponding -| to the abstract input. This routine is just like `roundAndPackFloat128' -| except that the input significand has fewer bits and does not have to be -| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- -| point exponent. -*----------------------------------------------------------------------------*/ - -static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp, - uint64_t zSig0, uint64_t zSig1, - float_status *status) +bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status) { - int8_t shiftCount; - uint64_t zSig2; - - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = clz64(zSig0) - 15; - if ( 0 <= shiftCount ) { - zSig2 = 0; - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - } - else { - shift128ExtraRightJamming( - zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); - } - zExp -= shiftCount; - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); + return int64_to_bfloat16_scalbn(a, scale, status); +} +bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status) +{ + return int64_to_bfloat16_scalbn(a, scale, status); } +bfloat16 int64_to_bfloat16(int64_t a, float_status *status) +{ + return int64_to_bfloat16_scalbn(a, 0, status); +} -/*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ +bfloat16 int32_to_bfloat16(int32_t a, float_status *status) +{ + return int64_to_bfloat16_scalbn(a, 0, status); +} -floatx80 int32_to_floatx80(int32_t a, float_status *status) +bfloat16 int16_to_bfloat16(int16_t a, float_status *status) { - bool zSign; - uint32_t absA; - int8_t shiftCount; - uint64_t zSig; + return int64_to_bfloat16_scalbn(a, 0, status); +} - if ( a == 0 ) return packFloatx80( 0, 0, 0 ); - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = clz32(absA) + 32; - zSig = absA; - return packFloatx80( zSign, 0x403E - shiftCount, zSig<>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - if ( 0 < expDiff ) { - q = ( ( (uint64_t) aSig )<<32 ) / bSig; - q >>= 32 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - } - else { - if ( bSig <= aSig ) aSig -= bSig; - aSig64 = ( (uint64_t) aSig )<<40; - bSig64 = ( (uint64_t) bSig )<<40; - expDiff -= 64; - while ( 0 < expDiff ) { - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - aSig64 = - ( ( bSig * q64 )<<38 ); - expDiff -= 62; - } - expDiff += 64; - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - q = q64>>( 64 - expDiff ); - bSig <<= 6; - aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (int32_t) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (int32_t) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status); + return uint64_to_float32_scalbn(a, scale, status); } +float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status) +{ + return uint64_to_float32_scalbn(a, scale, status); +} +float32 uint64_to_float32(uint64_t a, float_status *status) +{ + return uint64_to_float32_scalbn(a, 0, status); +} -/*---------------------------------------------------------------------------- -| Returns the binary exponential of the single-precision floating-point value -| `a'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -| -| Uses the following identities: -| -| 1. ------------------------------------------------------------------------- -| x x*ln(2) -| 2 = e -| -| 2. ------------------------------------------------------------------------- -| 2 3 4 5 n -| x x x x x x x -| e = 1 + --- + --- + --- + --- + --- + ... + --- + ... -| 1! 2! 3! 4! 5! n! -*----------------------------------------------------------------------------*/ - -static const float64 float32_exp2_coefficients[15] = +float32 uint32_to_float32(uint32_t a, float_status *status) { - const_float64( 0x3ff0000000000000ll ), /* 1 */ - const_float64( 0x3fe0000000000000ll ), /* 2 */ - const_float64( 0x3fc5555555555555ll ), /* 3 */ - const_float64( 0x3fa5555555555555ll ), /* 4 */ - const_float64( 0x3f81111111111111ll ), /* 5 */ - const_float64( 0x3f56c16c16c16c17ll ), /* 6 */ - const_float64( 0x3f2a01a01a01a01all ), /* 7 */ - const_float64( 0x3efa01a01a01a01all ), /* 8 */ - const_float64( 0x3ec71de3a556c734ll ), /* 9 */ - const_float64( 0x3e927e4fb7789f5cll ), /* 10 */ - const_float64( 0x3e5ae64567f544e4ll ), /* 11 */ - const_float64( 0x3e21eed8eff8d898ll ), /* 12 */ - const_float64( 0x3de6124613a86d09ll ), /* 13 */ - const_float64( 0x3da93974a8c07c9dll ), /* 14 */ - const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ -}; + return uint64_to_float32_scalbn(a, 0, status); +} -float32 float32_exp2(float32 a, float_status *status) +float32 uint16_to_float32(uint16_t a, float_status *status) { - bool aSign; - int aExp; - uint32_t aSig; - float64 r, x, xn; - int i; - a = float32_squash_input_denormal(a, status); + return uint64_to_float32_scalbn(a, 0, status); +} - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); +float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status) +{ + FloatParts64 p; - if ( aExp == 0xFF) { - if (aSig) { - return propagateFloat32NaN(a, float32_zero, status); - } - return (aSign) ? float32_zero : a; + /* Without scaling, there are no overflow concerns. */ + if (likely(scale == 0) && can_use_fpu(status)) { + union_float64 ur; + ur.h = a; + return ur.s; } - if (aExp == 0) { - if (aSig == 0) return float32_one; - } - - float_raise(float_flag_inexact, status); - /* ******************************* */ - /* using float64 for approximation */ - /* ******************************* */ - x = float32_to_float64(a, status); - x = float64_mul(x, float64_ln2, status); + parts_uint_to_float(&p, a, scale, status); + return float64_round_pack_canonical(&p, status); +} - xn = x; - r = float64_one; - for (i = 0 ; i < 15 ; i++) { - float64 f; +float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status) +{ + return uint64_to_float64_scalbn(a, scale, status); +} - f = float64_mul(xn, float32_exp2_coefficients[i], status); - r = float64_add(r, f, status); +float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status) +{ + return uint64_to_float64_scalbn(a, scale, status); +} - xn = float64_mul(xn, x, status); - } +float64 uint64_to_float64(uint64_t a, float_status *status) +{ + return uint64_to_float64_scalbn(a, 0, status); +} - return float64_to_float32(r, status); +float64 uint32_to_float64(uint32_t a, float_status *status) +{ + return uint64_to_float64_scalbn(a, 0, status); } -/*---------------------------------------------------------------------------- -| Returns the binary log of the single-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -float32 float32_log2(float32 a, float_status *status) +float64 uint16_to_float64(uint16_t a, float_status *status) { - bool aSign, zSign; - int aExp; - uint32_t aSig, zSig, i; + return uint64_to_float64_scalbn(a, 0, status); +} - a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); +bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status) +{ + FloatParts64 p; - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - if ( aSign ) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - if ( aExp == 0xFF ) { - if (aSig) { - return propagateFloat32NaN(a, float32_zero, status); - } - return a; - } + parts_uint_to_float(&p, a, scale, status); + return bfloat16_round_pack_canonical(&p, status); +} - aExp -= 0x7F; - aSig |= 0x00800000; - zSign = aExp < 0; - zSig = aExp << 23; +bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status) +{ + return uint64_to_bfloat16_scalbn(a, scale, status); +} - for (i = 1 << 22; i > 0; i >>= 1) { - aSig = ( (uint64_t)aSig * aSig ) >> 23; - if ( aSig & 0x01000000 ) { - aSig >>= 1; - zSig |= i; - } - } +bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status) +{ + return uint64_to_bfloat16_scalbn(a, scale, status); +} - if ( zSign ) - zSig = -zSig; +bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status) +{ + return uint64_to_bfloat16_scalbn(a, 0, status); +} - return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status); +bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status) +{ + return uint64_to_bfloat16_scalbn(a, 0, status); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ +bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status) +{ + return uint64_to_bfloat16_scalbn(a, 0, status); +} -floatx80 float64_to_floatx80(float64 a, float_status *status) -{ - bool aSign; - int aExp; - uint64_t aSig; - - a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if (aSig) { - floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status), - status); - return floatx80_silence_nan(res, status); - } - return packFloatx80(aSign, - floatx80_infinity_high, - floatx80_infinity_low); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - return - packFloatx80( - aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11); +float128 uint64_to_float128(uint64_t a, float_status *status) +{ + FloatParts128 p; + parts_uint_to_float(&p, a, 0, status); + return float128_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the quadruple-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ +/* + * Minimum and maximum + */ -float128 float64_to_float128(float64 a, float_status *status) +static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags) { - bool aSign; - int aExp; - uint64_t aSig, zSig0, zSig1; + FloatParts64 pa, pb, *pr; - a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if (aSig) { - return commonNaNToFloat128(float64ToCommonNaN(a, status), status); - } - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); + float16_unpack_canonical(&pa, a, s); + float16_unpack_canonical(&pb, b, s); + pr = parts_minmax(&pa, &pb, s, flags); + return float16_round_pack_canonical(pr, s); } +static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b, + float_status *s, int flags) +{ + FloatParts64 pa, pb, *pr; -/*---------------------------------------------------------------------------- -| Returns the remainder of the double-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ + bfloat16_unpack_canonical(&pa, a, s); + bfloat16_unpack_canonical(&pb, b, s); + pr = parts_minmax(&pa, &pb, s, flags); -float64 float64_rem(float64 a, float64 b, float_status *status) + return bfloat16_round_pack_canonical(pr, s); +} + +static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags) { - bool aSign, zSign; - int aExp, bExp, expDiff; - uint64_t aSig, bSig; - uint64_t q, alternateASig; - int64_t sigMean; - - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - if ( aExp == 0x7FF ) { - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { - return propagateFloat64NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - if ( bExp == 0x7FF ) { - if (bSig) { - return propagateFloat64NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig = (aSig | UINT64_C(0x0010000000000000)) << 11; - bSig = (bSig | UINT64_C(0x0010000000000000)) << 11; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - aSig = - ( ( bSig>>2 ) * q ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (int64_t) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (int64_t) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status); + FloatParts64 pa, pb, *pr; + float32_unpack_canonical(&pa, a, s); + float32_unpack_canonical(&pb, b, s); + pr = parts_minmax(&pa, &pb, s, flags); + + return float32_round_pack_canonical(pr, s); } -/*---------------------------------------------------------------------------- -| Returns the binary log of the double-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -float64 float64_log2(float64 a, float_status *status) +static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags) { - bool aSign, zSign; - int aExp; - uint64_t aSig, aSig0, aSig1, zSig, i; - a = float64_squash_input_denormal(a, status); + FloatParts64 pa, pb, *pr; - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); + float64_unpack_canonical(&pa, a, s); + float64_unpack_canonical(&pb, b, s); + pr = parts_minmax(&pa, &pb, s, flags); - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - if ( aSign ) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - if ( aExp == 0x7FF ) { - if (aSig) { - return propagateFloat64NaN(a, float64_zero, status); - } - return a; - } + return float64_round_pack_canonical(pr, s); +} - aExp -= 0x3FF; - aSig |= UINT64_C(0x0010000000000000); - zSign = aExp < 0; - zSig = (uint64_t)aExp << 52; - for (i = 1LL << 51; i > 0; i >>= 1) { - mul64To128( aSig, aSig, &aSig0, &aSig1 ); - aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 ); - if ( aSig & UINT64_C(0x0020000000000000) ) { - aSig >>= 1; - zSig |= i; - } - } +static float128 float128_minmax(float128 a, float128 b, + float_status *s, int flags) +{ + FloatParts128 pa, pb, *pr; - if ( zSign ) - zSig = -zSig; - return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status); + float128_unpack_canonical(&pa, a, s); + float128_unpack_canonical(&pb, b, s); + pr = parts_minmax(&pa, &pb, s, flags); + + return float128_round_pack_canonical(pr, s); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, the -| largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ +#define MINMAX_1(type, name, flags) \ + type type##_##name(type a, type b, float_status *s) \ + { return type##_minmax(a, b, s, flags); } -int32_t floatx80_to_int32(floatx80 a, float_status *status) -{ - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig; +#define MINMAX_2(type) \ + MINMAX_1(type, max, 0) \ + MINMAX_1(type, maxnum, minmax_isnum) \ + MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \ + MINMAX_1(type, maximum_number, minmax_isnumber) \ + MINMAX_1(type, min, minmax_ismin) \ + MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \ + MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \ + MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \ - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32(aSign, aSig, status); +MINMAX_2(float16) +MINMAX_2(bfloat16) +MINMAX_2(float32) +MINMAX_2(float64) +MINMAX_2(float128) -} +#undef MINMAX_1 +#undef MINMAX_2 -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ +/* + * Floating point compare + */ -int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) +static FloatRelation QEMU_FLATTEN +float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet) { - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig, savedASig; - int32_t z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if (aExp || aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - shiftCount = 0x403E - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; - } - return z; + FloatParts64 pa, pb; + float16_unpack_canonical(&pa, a, s); + float16_unpack_canonical(&pb, b, s); + return parts_compare(&pa, &pb, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, -| the largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ +FloatRelation float16_compare(float16 a, float16 b, float_status *s) +{ + return float16_do_compare(a, b, s, false); +} -int64_t floatx80_to_int64(floatx80 a, float_status *status) +FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s) { - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig, aSigExtra; + return float16_do_compare(a, b, s, true); +} - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1ULL << 63; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = 0x403E - aExp; - if ( shiftCount <= 0 ) { - if ( shiftCount ) { - float_raise(float_flag_invalid, status); - if (!aSign || floatx80_is_any_nan(a)) { - return INT64_MAX; - } - return INT64_MIN; - } - aSigExtra = 0; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64(aSign, aSig, aSigExtra, status); +static FloatRelation QEMU_SOFTFLOAT_ATTR +float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet) +{ + FloatParts64 pa, pb; + float32_unpack_canonical(&pa, a, s); + float32_unpack_canonical(&pb, b, s); + return parts_compare(&pa, &pb, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) +static FloatRelation QEMU_FLATTEN +float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet) { - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - int64_t z; + union_float32 ua, ub; - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1ULL << 63; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = aExp - 0x403E; - if ( 0 <= shiftCount ) { - aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF); - if ( ( a.high != 0xC03E ) || aSig ) { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { - return INT64_MAX; - } - } - return INT64_MIN; + ua.s = xa; + ub.s = xb; + + if (QEMU_NO_HARDFLOAT) { + goto soft; } - else if ( aExp < 0x3FFF ) { - if (aExp | aSig) { - status->float_exception_flags |= float_flag_inexact; + + float32_input_flush2(&ua.s, &ub.s, s); + if (isgreaterequal(ua.h, ub.h)) { + if (isgreater(ua.h, ub.h)) { + return float_relation_greater; } - return 0; + return float_relation_equal; } - z = aSig>>( - shiftCount ); - if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) { - status->float_exception_flags |= float_flag_inexact; + if (likely(isless(ua.h, ub.h))) { + return float_relation_less; } - if ( aSign ) z = - z; - return z; - + /* + * The only condition remaining is unordered. + * Fall through to set flags. + */ + soft: + return float32_do_compare(ua.s, ub.s, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the single-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +FloatRelation float32_compare(float32 a, float32 b, float_status *s) +{ + return float32_hs_compare(a, b, s, false); +} -float32 floatx80_to_float32(floatx80 a, float_status *status) +FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s) { - bool aSign; - int32_t aExp; - uint64_t aSig; + return float32_hs_compare(a, b, s, true); +} - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) { - float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status), - status); - return float32_silence_nan(res, status); - } - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 33, &aSig ); - if ( aExp || aSig ) aExp -= 0x3F81; - return roundAndPackFloat32(aSign, aExp, aSig, status); +static FloatRelation QEMU_SOFTFLOAT_ATTR +float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet) +{ + FloatParts64 pa, pb; + float64_unpack_canonical(&pa, a, s); + float64_unpack_canonical(&pb, b, s); + return parts_compare(&pa, &pb, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the double-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 floatx80_to_float64(floatx80 a, float_status *status) +static FloatRelation QEMU_FLATTEN +float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet) { - bool aSign; - int32_t aExp; - uint64_t aSig, zSig; + union_float64 ua, ub; + + ua.s = xa; + ub.s = xb; - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); + if (QEMU_NO_HARDFLOAT) { + goto soft; } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) { - float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status), - status); - return float64_silence_nan(res, status); + + float64_input_flush2(&ua.s, &ub.s, s); + if (isgreaterequal(ua.h, ub.h)) { + if (isgreater(ua.h, ub.h)) { + return float_relation_greater; } - return packFloat64( aSign, 0x7FF, 0 ); + return float_relation_equal; } - shift64RightJamming( aSig, 1, &zSig ); - if ( aExp || aSig ) aExp -= 0x3C01; - return roundAndPackFloat64(aSign, aExp, zSig, status); - + if (likely(isless(ua.h, ub.h))) { + return float_relation_less; + } + /* + * The only condition remaining is unordered. + * Fall through to set flags. + */ + soft: + return float64_do_compare(ua.s, ub.s, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the quadruple-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +FloatRelation float64_compare(float64 a, float64 b, float_status *s) +{ + return float64_hs_compare(a, b, s, false); +} -float128 floatx80_to_float128(floatx80 a, float_status *status) +FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s) { - bool aSign; - int aExp; - uint64_t aSig, zSig0, zSig1; + return float64_hs_compare(a, b, s, true); +} - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return float128_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) { - float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status), - status); - return float128_silence_nan(res, status); - } - shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp, zSig0, zSig1 ); +static FloatRelation QEMU_FLATTEN +bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet) +{ + FloatParts64 pa, pb; + bfloat16_unpack_canonical(&pa, a, s); + bfloat16_unpack_canonical(&pb, b, s); + return parts_compare(&pa, &pb, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Rounds the extended double-precision floating-point value `a' -| to the precision provided by floatx80_rounding_precision and returns the -| result as an extended double-precision floating-point value. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_round(floatx80 a, float_status *status) +FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s) { - return roundAndPackFloatx80(status->floatx80_rounding_precision, - extractFloatx80Sign(a), - extractFloatx80Exp(a), - extractFloatx80Frac(a), 0, status); + return bfloat16_do_compare(a, b, s, false); } -/*---------------------------------------------------------------------------- -| Rounds the extended double-precision floating-point value `a' to an integer, -| and returns the result as an extended quadruple-precision floating-point -| value. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_round_to_int(floatx80 a, float_status *status) +FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s) { - bool aSign; - int32_t aExp; - uint64_t lastBitMask, roundBitsMask; - floatx80 z; + return bfloat16_do_compare(a, b, s, true); +} - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aExp = extractFloatx80Exp( a ); - if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaN(a, a, status); - } - return a; - } - if ( aExp < 0x3FFF ) { - if ( ( aExp == 0 ) - && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) { - return a; - } - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloatx80Sign( a ); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) - ) { - return - packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000)); - } - break; - case float_round_ties_away: - if (aExp == 0x3FFE) { - return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000)); - } - break; - case float_round_down: - return - aSign ? - packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000)) - : packFloatx80( 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloatx80( 1, 0, 0 ) - : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000)); - - case float_round_to_zero: - break; - default: - g_assert_not_reached(); - } - return packFloatx80( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x403E - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - switch (status->float_rounding_mode) { - case float_round_nearest_even: - z.low += lastBitMask>>1; - if ((z.low & roundBitsMask) == 0) { - z.low &= ~lastBitMask; - } - break; - case float_round_ties_away: - z.low += lastBitMask >> 1; - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloatx80Sign(z)) { - z.low += roundBitsMask; - } - break; - case float_round_down: - if (extractFloatx80Sign(z)) { - z.low += roundBitsMask; - } - break; - default: - abort(); - } - z.low &= ~ roundBitsMask; - if ( z.low == 0 ) { - ++z.high; - z.low = UINT64_C(0x8000000000000000); - } - if (z.low != a.low) { - status->float_exception_flags |= float_flag_inexact; - } - return z; +static FloatRelation QEMU_FLATTEN +float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet) +{ + FloatParts128 pa, pb; + float128_unpack_canonical(&pa, a, s); + float128_unpack_canonical(&pb, b, s); + return parts_compare(&pa, &pb, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the extended double- -| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is -| negated before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +FloatRelation float128_compare(float128 a, float128 b, float_status *s) +{ + return float128_do_compare(a, b, s, false); +} -static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - int32_t expDiff; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) --expDiff; - shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80(zSign, - floatx80_infinity_high, - floatx80_infinity_low); - } - if ( aExp == 0 ) ++expDiff; - shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - zSig1 = 0; - zSig0 = aSig + bSig; - if ( aExp == 0 ) { - if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) { - /* At least one of the values is a pseudo-denormal, - * and there is a carry out of the result. */ - zExp = 1; - goto shiftRight1; - } - if (zSig0 == 0) { - return packFloatx80(zSign, 0, 0); - } - normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); - goto roundAndPack; - } - zExp = aExp; - goto shiftRight1; - } - zSig0 = aSig + bSig; - if ( (int64_t) zSig0 < 0 ) goto roundAndPack; - shiftRight1: - shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= UINT64_C(0x8000000000000000); - ++zExp; - roundAndPack: - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); +FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s) +{ + return float128_do_compare(a, b, s, true); } -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the extended -| double-precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +static FloatRelation QEMU_FLATTEN +floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet) +{ + FloatParts128 pa, pb; -static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - int32_t expDiff; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - zSig1 = 0; - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); - bExpBigger: - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80(zSign ^ 1, floatx80_infinity_high, - floatx80_infinity_low); - } - if ( aExp == 0 ) ++expDiff; - shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - bBigger: - sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; + if (!floatx80_unpack_canonical(&pa, a, s) || + !floatx80_unpack_canonical(&pb, b, s)) { + return float_relation_unordered; } - if ( bExp == 0 ) --expDiff; - shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - aBigger: - sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); + return parts_compare(&pa, &pb, s, is_quiet); } -/*---------------------------------------------------------------------------- -| Returns the result of adding the extended double-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_do_compare(a, b, s, false); +} -floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) +FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s) { - bool aSign, bSign; + return floatx80_do_compare(a, b, s, true); +} - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return addFloatx80Sigs(a, b, aSign, status); - } - else { - return subFloatx80Sigs(a, b, aSign, status); - } +/* + * Scale by 2**N + */ + +float16 float16_scalbn(float16 a, int n, float_status *status) +{ + FloatParts64 p; + float16_unpack_canonical(&p, a, status); + parts_scalbn(&p, n, status); + return float16_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +float32 float32_scalbn(float32 a, int n, float_status *status) +{ + FloatParts64 p; -floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) + float32_unpack_canonical(&p, a, status); + parts_scalbn(&p, n, status); + return float32_round_pack_canonical(&p, status); +} + +float64 float64_scalbn(float64 a, int n, float_status *status) { - bool aSign, bSign; + FloatParts64 p; - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return subFloatx80Sigs(a, b, aSign, status); - } - else { - return addFloatx80Sigs(a, b, aSign, status); - } + float64_unpack_canonical(&p, a, status); + parts_scalbn(&p, n, status); + return float64_round_pack_canonical(&p, status); +} + +bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status) +{ + FloatParts64 p; + bfloat16_unpack_canonical(&p, a, status); + parts_scalbn(&p, n, status); + return bfloat16_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +float128 float128_scalbn(float128 a, int n, float_status *status) +{ + FloatParts128 p; + + float128_unpack_canonical(&p, a, status); + parts_scalbn(&p, n, status); + return float128_round_pack_canonical(&p, status); +} -floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) +floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) { - bool aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; + FloatParts128 p; - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); + if (!floatx80_unpack_canonical(&p, a, status)) { return floatx80_default_nan(status); } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN(a, b, status); - } - if ( ( bExp | bSig ) == 0 ) goto invalid; - return packFloatx80(zSign, floatx80_infinity_high, - floatx80_infinity_low); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - return packFloatx80(zSign, floatx80_infinity_high, - floatx80_infinity_low); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FFE; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); + parts_scalbn(&p, n, status); + return floatx80_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Returns the result of dividing the extended double-precision floating-point -| value `a' by the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +/* + * Square Root + */ -floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) +float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status) { - bool aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - uint64_t rem0, rem1, rem2, term0, term1, term2; + FloatParts64 p; - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - return packFloatx80(zSign, floatx80_infinity_high, - floatx80_infinity_low); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - float_raise(float_flag_divbyzero, status); - return packFloatx80(zSign, floatx80_infinity_high, - floatx80_infinity_low); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FFE; - rem1 = 0; - if ( bSig <= aSig ) { - shift128Right( aSig, 0, 1, &aSig, &rem1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig, rem1, bSig ); - mul64To128( bSig, zSig0, &term0, &term1 ); - sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, bSig ); - if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { - mul64To128( bSig, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); - } - zSig1 |= ( ( rem1 | rem2 ) != 0 ); - } - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); + float16_unpack_canonical(&p, a, status); + parts_sqrt(&p, status, &float16_params); + return float16_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Returns the remainder of the extended double-precision floating-point value -| `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic, -| if 'mod' is false; if 'mod' is true, return the remainder based on truncating -| the quotient toward zero instead. '*quotient' is set to the low 64 bits of -| the absolute value of the integer quotient. -*----------------------------------------------------------------------------*/ +static float32 QEMU_SOFTFLOAT_ATTR +soft_f32_sqrt(float32 a, float_status *status) +{ + FloatParts64 p; + + float32_unpack_canonical(&p, a, status); + parts_sqrt(&p, status, &float32_params); + return float32_round_pack_canonical(&p, status); +} -floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient, - float_status *status) +static float64 QEMU_SOFTFLOAT_ATTR +soft_f64_sqrt(float64 a, float_status *status) { - bool aSign, zSign; - int32_t aExp, bExp, expDiff, aExpOrig; - uint64_t aSig0, aSig1, bSig; - uint64_t q, term0, term1, alternateASig0, alternateASig1; + FloatParts64 p; - *quotient = 0; - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig0 = extractFloatx80Frac( a ); - aExpOrig = aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - if (aExp == 0 && aSig0 >> 63) { - /* - * Pseudo-denormal argument must be returned in normalized - * form. - */ - return packFloatx80(aSign, 1, aSig0); - } - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig0 == 0 ) return a; - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - zSign = aSign; - expDiff = aExp - bExp; - aSig1 = 0; - if ( expDiff < 0 ) { - if ( mod || expDiff < -1 ) { - if (aExp == 1 && aExpOrig == 0) { - /* - * Pseudo-denormal argument must be returned in - * normalized form. - */ - return packFloatx80(aSign, aExp, aSig0); - } - return a; - } - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); - expDiff = 0; - } - *quotient = q = ( bSig <= aSig0 ); - if ( q ) aSig0 -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - mul64To128( bSig, q, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); - expDiff -= 62; - *quotient <<= 62; - *quotient += q; + float64_unpack_canonical(&p, a, status); + parts_sqrt(&p, status, &float64_params); + return float64_round_pack_canonical(&p, status); +} + +float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s) +{ + union_float32 ua, ur; + + ua.s = xa; + if (unlikely(!can_use_fpu(s))) { + goto soft; } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++q; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - if (expDiff < 64) { - *quotient <<= expDiff; - } else { - *quotient = 0; + + float32_input_flush1(&ua.s, s); + if (QEMU_HARDFLOAT_1F32_USE_FP) { + if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || + fpclassify(ua.h) == FP_ZERO) || + signbit(ua.h))) { + goto soft; } - *quotient += q; + } else if (unlikely(!float32_is_zero_or_normal(ua.s) || + float32_is_neg(ua.s))) { + goto soft; } - else { - term1 = 0; - term0 = bSig; + ur.h = sqrtf(ua.h); + return ur.s; + + soft: + return soft_f32_sqrt(ua.s, s); +} + +float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s) +{ + union_float64 ua, ur; + + ua.s = xa; + if (unlikely(!can_use_fpu(s))) { + goto soft; } - if (!mod) { - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( q & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; - ++*quotient; + + float64_input_flush1(&ua.s, s); + if (QEMU_HARDFLOAT_1F64_USE_FP) { + if (unlikely(!(fpclassify(ua.h) == FP_NORMAL || + fpclassify(ua.h) == FP_ZERO) || + signbit(ua.h))) { + goto soft; } + } else if (unlikely(!float64_is_zero_or_normal(ua.s) || + float64_is_neg(ua.s))) { + goto soft; } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status); + ur.h = sqrt(ua.h); + return ur.s; + soft: + return soft_f64_sqrt(ua.s, s); } -/*---------------------------------------------------------------------------- -| Returns the remainder of the extended double-precision floating-point value -| `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) +bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status) { - uint64_t quotient; - return floatx80_modrem(a, b, false, "ient, status); + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, status); + parts_sqrt(&p, status, &bfloat16_params); + return bfloat16_round_pack_canonical(&p, status); } -/*---------------------------------------------------------------------------- -| Returns the remainder of the extended double-precision floating-point value -| `a' with respect to the corresponding value `b', with the quotient truncated -| toward zero. -*----------------------------------------------------------------------------*/ +float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status) +{ + FloatParts128 p; -floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) + float128_unpack_canonical(&p, a, status); + parts_sqrt(&p, status, &float128_params); + return float128_round_pack_canonical(&p, status); +} + +floatx80 floatx80_sqrt(floatx80 a, float_status *s) { - uint64_t quotient; - return floatx80_modrem(a, b, true, "ient, status); + FloatParts128 p; + + if (!floatx80_unpack_canonical(&p, a, s)) { + return floatx80_default_nan(s); + } + parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]); + return floatx80_round_pack_canonical(&p, s); } -/*---------------------------------------------------------------------------- -| Returns the square root of the extended double-precision floating-point -| value `a'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +/* + * log2 + */ +float32 float32_log2(float32 a, float_status *status) +{ + FloatParts64 p; + + float32_unpack_canonical(&p, a, status); + parts_log2(&p, status, &float32_params); + return float32_round_pack_canonical(&p, status); +} -floatx80 floatx80_sqrt(floatx80 a, float_status *status) +float64 float64_log2(float64 a, float_status *status) { - bool aSign; - int32_t aExp, zExp; - uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; + FloatParts64 p; - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig0 << 1)) { - return propagateFloatx80NaN(a, a, status); - } - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 ) == 0 ) return a; - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - if ( aExp == 0 ) { - if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; - zSig0 = estimateSqrt32( aExp, aSig0>>32 ); - shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= doubleZSig0; - return roundAndPackFloatx80(status->floatx80_rounding_precision, - 0, zExp, zSig0, zSig1, status); + float64_unpack_canonical(&p, a, status); + parts_log2(&p, status, &float64_params); + return float64_round_pack_canonical(&p, status); } /*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 32-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. +| The pattern for a default generated NaN. *----------------------------------------------------------------------------*/ -int32_t float128_to_int32(float128 a, float_status *status) +float16 float16_default_nan(float_status *status) { - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; - if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000); - aSig0 |= ( aSig1 != 0 ); - shiftCount = 0x4028 - aExp; - if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); - return roundAndPackInt32(aSign, aSig0, status); + FloatParts64 p; + parts_default_nan(&p, status); + p.frac >>= float16_params.frac_shift; + return float16_pack_raw(&p); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 32-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. If -| `a' is a NaN, the largest positive integer is returned. Otherwise, if the -| conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ +float32 float32_default_nan(float_status *status) +{ + FloatParts64 p; -int32_t float128_to_int32_round_to_zero(float128 a, float_status *status) -{ - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1, savedASig; - int32_t z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - aSig0 |= ( aSig1 != 0 ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if (aExp || aSig0) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - aSig0 |= UINT64_C(0x0001000000000000); - shiftCount = 0x402F - aExp; - savedASig = aSig0; - aSig0 >>= shiftCount; - z = aSig0; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? INT32_MIN : INT32_MAX; - } - if ( ( aSig0<float_exception_flags |= float_flag_inexact; - } - return z; + parts_default_nan(&p, status); + p.frac >>= float32_params.frac_shift; + return float32_pack_raw(&p); +} + +float64 float64_default_nan(float_status *status) +{ + FloatParts64 p; + parts_default_nan(&p, status); + p.frac >>= float64_params.frac_shift; + return float64_pack_raw(&p); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 64-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ +float128 float128_default_nan(float_status *status) +{ + FloatParts128 p; -int64_t float128_to_int64(float128 a, float_status *status) -{ - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000); - shiftCount = 0x402F - aExp; - if ( shiftCount <= 0 ) { - if ( 0x403E < aExp ) { - float_raise(float_flag_invalid, status); - if ( ! aSign - || ( ( aExp == 0x7FFF ) - && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) ) - ) - ) { - return INT64_MAX; - } - return INT64_MIN; - } - shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); - } - else { - shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); - } - return roundAndPackInt64(aSign, aSig0, aSig1, status); + parts_default_nan(&p, status); + frac_shr(&p, float128_params.frac_shift); + return float128_pack_raw(&p); +} + +bfloat16 bfloat16_default_nan(float_status *status) +{ + FloatParts64 p; + parts_default_nan(&p, status); + p.frac >>= bfloat16_params.frac_shift; + return bfloat16_pack_raw(&p); } /*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 64-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. +| Returns a quiet NaN from a signalling NaN for the floating point value `a'. *----------------------------------------------------------------------------*/ -int64_t float128_to_int64_round_to_zero(float128 a, float_status *status) -{ - bool aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1; - int64_t z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000); - shiftCount = aExp - 0x402F; - if ( 0 < shiftCount ) { - if ( 0x403E <= aExp ) { - aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF); - if ( ( a.high == UINT64_C(0xC03E000000000000) ) - && ( aSig1 < UINT64_C(0x0002000000000000) ) ) { - if (aSig1) { - status->float_exception_flags |= float_flag_inexact; - } - } - else { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { - return INT64_MAX; - } - } - return INT64_MIN; - } - z = ( aSig0<>( ( - shiftCount ) & 63 ) ); - if ( (uint64_t) ( aSig1<float_exception_flags |= float_flag_inexact; - } - } - else { - if ( aExp < 0x3FFF ) { - if ( aExp | aSig0 | aSig1 ) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - z = aSig0>>( - shiftCount ); - if ( aSig1 - || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) { - status->float_exception_flags |= float_flag_inexact; - } - } - if ( aSign ) z = - z; - return z; +float16 float16_silence_nan(float16 a, float_status *status) +{ + FloatParts64 p; + float16_unpack_raw(&p, a); + p.frac <<= float16_params.frac_shift; + parts_silence_nan(&p, status); + p.frac >>= float16_params.frac_shift; + return float16_pack_raw(&p); } -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point value -| `a' to the 64-bit unsigned integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. If the conversion overflows, the -| largest unsigned integer is returned. If 'a' is negative, the value is -| rounded and zero is returned; negative values that do not round to zero -| will raise the inexact exception. -*----------------------------------------------------------------------------*/ +float32 float32_silence_nan(float32 a, float_status *status) +{ + FloatParts64 p; -uint64_t float128_to_uint64(float128 a, float_status *status) -{ - bool aSign; - int aExp; - int shiftCount; - uint64_t aSig0, aSig1; - - aSig0 = extractFloat128Frac0(a); - aSig1 = extractFloat128Frac1(a); - aExp = extractFloat128Exp(a); - aSign = extractFloat128Sign(a); - if (aSign && (aExp > 0x3FFE)) { - float_raise(float_flag_invalid, status); - if (float128_is_any_nan(a)) { - return UINT64_MAX; - } else { - return 0; - } - } - if (aExp) { - aSig0 |= UINT64_C(0x0001000000000000); - } - shiftCount = 0x402F - aExp; - if (shiftCount <= 0) { - if (0x403E < aExp) { - float_raise(float_flag_invalid, status); - return UINT64_MAX; - } - shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1); - } else { - shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1); - } - return roundAndPackUint64(aSign, aSig0, aSig1, status); + float32_unpack_raw(&p, a); + p.frac <<= float32_params.frac_shift; + parts_silence_nan(&p, status); + p.frac >>= float32_params.frac_shift; + return float32_pack_raw(&p); +} + +float64 float64_silence_nan(float64 a, float_status *status) +{ + FloatParts64 p; + + float64_unpack_raw(&p, a); + p.frac <<= float64_params.frac_shift; + parts_silence_nan(&p, status); + p.frac >>= float64_params.frac_shift; + return float64_pack_raw(&p); } -uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status) +bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status) { - uint64_t v; - signed char current_rounding_mode = status->float_rounding_mode; + FloatParts64 p; + + bfloat16_unpack_raw(&p, a); + p.frac <<= bfloat16_params.frac_shift; + parts_silence_nan(&p, status); + p.frac >>= bfloat16_params.frac_shift; + return bfloat16_pack_raw(&p); +} - set_float_rounding_mode(float_round_to_zero, status); - v = float128_to_uint64(a, status); - set_float_rounding_mode(current_rounding_mode, status); +float128 float128_silence_nan(float128 a, float_status *status) +{ + FloatParts128 p; - return v; + float128_unpack_raw(&p, a); + frac_shl(&p, float128_params.frac_shift); + parts_silence_nan(&p, status); + frac_shr(&p, float128_params.frac_shift); + return float128_pack_raw(&p); } /*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 32-bit unsigned integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, -| if the conversion overflows, the largest unsigned integer is returned. -| If 'a' is negative, the value is rounded and zero is returned; negative -| values that do not round to zero will raise the inexact exception. +| If `a' is denormal and we are in flush-to-zero mode then set the +| input-denormal exception and return zero. Otherwise just return the value. *----------------------------------------------------------------------------*/ -uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status) +static bool parts_squash_denormal(FloatParts64 p, float_status *status) { - uint64_t v; - uint32_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float128_to_uint64_round_to_zero(a, status); - if (v > 0xffffffff) { - res = 0xffffffff; - } else { - return v; + if (p.exp == 0 && p.frac != 0) { + float_raise(float_flag_input_denormal, status); + return true; } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point value -| `a' to the 32-bit unsigned integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. If the conversion overflows, the -| largest unsigned integer is returned. If 'a' is negative, the value is -| rounded and zero is returned; negative values that do not round to zero -| will raise the inexact exception. -*----------------------------------------------------------------------------*/ + return false; +} -uint32_t float128_to_uint32(float128 a, float_status *status) +float16 float16_squash_input_denormal(float16 a, float_status *status) { - uint64_t v; - uint32_t res; - int old_exc_flags = get_float_exception_flags(status); + if (status->flush_inputs_to_zero) { + FloatParts64 p; - v = float128_to_uint64(a, status); - if (v > 0xffffffff) { - res = 0xffffffff; - } else { - return v; + float16_unpack_raw(&p, a); + if (parts_squash_denormal(p, status)) { + return float16_set_sign(float16_zero, p.sign); + } } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; + return a; } -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the single-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float128_to_float32(float128 a, float_status *status) +float32 float32_squash_input_denormal(float32 a, float_status *status) { - bool aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - uint32_t zSig; + if (status->flush_inputs_to_zero) { + FloatParts64 p; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat32(float128ToCommonNaN(a, status), status); + float32_unpack_raw(&p, a); + if (parts_squash_denormal(p, status)) { + return float32_set_sign(float32_zero, p.sign); } - return packFloat32( aSign, 0xFF, 0 ); } - aSig0 |= ( aSig1 != 0 ); - shift64RightJamming( aSig0, 18, &aSig0 ); - zSig = aSig0; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x3F81; - } - return roundAndPackFloat32(aSign, aExp, zSig, status); - + return a; } -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float128_to_float64(float128 a, float_status *status) +float64 float64_squash_input_denormal(float64 a, float_status *status) { - bool aSign; - int32_t aExp; - uint64_t aSig0, aSig1; + if (status->flush_inputs_to_zero) { + FloatParts64 p; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat64(float128ToCommonNaN(a, status), status); + float64_unpack_raw(&p, a); + if (parts_squash_denormal(p, status)) { + return float64_set_sign(float64_zero, p.sign); } - return packFloat64( aSign, 0x7FF, 0 ); } - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - aSig0 |= ( aSig1 != 0 ); - if ( aExp || aSig0 ) { - aSig0 |= UINT64_C(0x4000000000000000); - aExp -= 0x3C01; - } - return roundAndPackFloat64(aSign, aExp, aSig0, status); + return a; +} + +bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status) +{ + if (status->flush_inputs_to_zero) { + FloatParts64 p; + bfloat16_unpack_raw(&p, a); + if (parts_squash_denormal(p, status)) { + return bfloat16_set_sign(bfloat16_zero, p.sign); + } + } + return a; } /*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the extended double-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. +| Normalizes the subnormal extended double-precision floating-point value +| represented by the denormalized significand `aSig'. The normalized exponent +| and significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. *----------------------------------------------------------------------------*/ -floatx80 float128_to_floatx80(float128 a, float_status *status) -{ - bool aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status), - status); - return floatx80_silence_nan(res, status); - } - return packFloatx80(aSign, floatx80_infinity_high, - floatx80_infinity_low); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - else { - aSig0 |= UINT64_C(0x0001000000000000); - } - shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); - return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status); +void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, + uint64_t *zSigPtr) +{ + int8_t shiftCount; + shiftCount = clz64(aSig); + *zSigPtr = aSig<float_rounding_mode) { - case float_round_nearest_even: - if ( lastBitMask ) { - add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else { - if ( (int64_t) z.low < 0 ) { - ++z.high; - if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1; - } - } - break; - case float_round_ties_away: - if (lastBitMask) { - add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low); - } else { - if ((int64_t) z.low < 0) { - ++z.high; - } - } - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloat128Sign(z)) { - add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); - } - break; - case float_round_down: - if (extractFloat128Sign(z)) { - add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); - } - break; - case float_round_to_odd: - /* - * Note that if lastBitMask == 0, the last bit is the lsb - * of high, and roundBitsMask == -1. - */ - if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) { - add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); - } - break; - default: - abort(); - } - z.low &= ~ roundBitsMask; + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingPrecision) { + case floatx80_precision_x: + goto precision80; + case floatx80_precision_d: + roundIncrement = UINT64_C(0x0000000000000400); + roundMask = UINT64_C(0x00000000000007FF); + break; + case floatx80_precision_s: + roundIncrement = UINT64_C(0x0000008000000000); + roundMask = UINT64_C(0x000000FFFFFFFFFF); + break; + default: + g_assert_not_reached(); } - else { - if ( aExp < 0x3FFF ) { - if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a; - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloat128Sign( a ); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) - && ( extractFloat128Frac0( a ) - | extractFloat128Frac1( a ) ) - ) { - return packFloat128( aSign, 0x3FFF, 0, 0 ); - } - break; - case float_round_ties_away: - if (aExp == 0x3FFE) { - return packFloat128(aSign, 0x3FFF, 0, 0); - } - break; - case float_round_down: - return - aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) - : packFloat128( 0, 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloat128( 1, 0, 0, 0 ) - : packFloat128( 0, 0x3FFF, 0, 0 ); - - case float_round_to_odd: - return packFloat128(aSign, 0x3FFF, 0, 0); - - case float_round_to_zero: - break; - } - return packFloat128( aSign, 0, 0, 0 ); + zSig0 |= ( zSig1 != 0 ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : roundMask; + break; + case float_round_down: + roundIncrement = zSign ? roundMask : 0; + break; + default: + abort(); + } + roundBits = zSig0 & roundMask; + if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + goto overflow; } - lastBitMask = 1; - lastBitMask <<= 0x402F - aExp; - roundBitsMask = lastBitMask - 1; - z.low = 0; - z.high = a.high; - switch (status->float_rounding_mode) { - case float_round_nearest_even: - z.high += lastBitMask>>1; - if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { - z.high &= ~ lastBitMask; + if ( zExp <= 0 ) { + if (status->flush_to_zero) { + float_raise(float_flag_output_denormal, status); + return packFloatx80(zSign, 0, 0); } - break; - case float_round_ties_away: - z.high += lastBitMask>>1; - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloat128Sign(z)) { - z.high |= ( a.low != 0 ); - z.high += roundBitsMask; + isTiny = status->tininess_before_rounding + || (zExp < 0 ) + || (zSig0 <= zSig0 + roundIncrement); + shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); + zExp = 0; + roundBits = zSig0 & roundMask; + if (isTiny && roundBits) { + float_raise(float_flag_underflow, status); } - break; - case float_round_down: - if (extractFloat128Sign(z)) { - z.high |= (a.low != 0); - z.high += roundBitsMask; + if (roundBits) { + float_raise(float_flag_inexact, status); } - break; - case float_round_to_odd: - if ((z.high & lastBitMask) == 0) { - z.high |= (a.low != 0); - z.high += roundBitsMask; + zSig0 += roundIncrement; + if ( (int64_t) zSig0 < 0 ) zExp = 1; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; } - break; - default: - abort(); + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); } - z.high &= ~ roundBitsMask; } - if ( ( z.low != a.low ) || ( z.high != a.high ) ) { - status->float_exception_flags |= float_flag_inexact; + if (roundBits) { + float_raise(float_flag_inexact, status); } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the quadruple-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float128 addFloat128Sigs(float128 a, float128 b, bool zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - int32_t expDiff; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, b, status); + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = UINT64_C(0x8000000000000000); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + precision80: + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); + } + if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) + && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) ) + && increment + ) + ) { + roundMask = 0; + overflow: + float_raise(float_flag_overflow | float_flag_inexact, status); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); } - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= UINT64_C(0x0001000000000000); + return packFloatx80(zSign, + floatx80_infinity_high, + floatx80_infinity_low); } - shift128ExtraRightJamming( - bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); + if ( zExp <= 0 ) { + isTiny = status->tininess_before_rounding + || (zExp < 0) + || !increment + || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF)); + shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); + zExp = 0; + if (isTiny && zSig1) { + float_raise(float_flag_underflow, status); + } + if (zSig1) { + float_raise(float_flag_inexact, status); } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= UINT64_C(0x0001000000000000); - } - shift128ExtraRightJamming( - aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN(a, b, status); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); } - return a; - } - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - if ( aExp == 0 ) { - if (status->flush_to_zero) { - if (zSig0 | zSig1) { - float_raise(float_flag_output_denormal, status); + if ( increment ) { + ++zSig0; + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; } - return packFloat128(zSign, 0, 0, 0); + if ( (int64_t) zSig0 < 0 ) zExp = 1; } - return packFloat128( zSign, 0, zSig0, zSig1 ); - } - zSig2 = 0; - zSig0 |= UINT64_C(0x0002000000000000); - zExp = aExp; - goto shiftRight1; - } - aSig0 |= UINT64_C(0x0001000000000000); - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - --zExp; - if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack; - ++zExp; - shiftRight1: - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - roundAndPack: - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the quadruple- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float128 subFloat128Sigs(float128 a, float128 b, bool zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; - int32_t expDiff; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN(a, b, status); + return packFloatx80( zSign, zExp, zSig0 ); } - float_raise(float_flag_invalid, status); - return float128_default_nan(status); } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; + if (zSig1) { + float_raise(float_flag_inexact, status); } - if ( bSig0 < aSig0 ) goto aBigger; - if ( aSig0 < bSig0 ) goto bBigger; - if ( bSig1 < aSig1 ) goto aBigger; - if ( aSig1 < bSig1 ) goto bBigger; - return packFloat128(status->float_rounding_mode == float_round_down, - 0, 0, 0); - bExpBigger: - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = UINT64_C(0x8000000000000000); } - return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= UINT64_C(0x4000000000000000); - } - shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - bSig0 |= UINT64_C(0x4000000000000000); - bBigger: - sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, b, status); + else { + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; + } } - return a; - } - if ( bExp == 0 ) { - --expDiff; } else { - bSig0 |= UINT64_C(0x4000000000000000); + if ( zSig0 == 0 ) zExp = 0; } - shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); - aSig0 |= UINT64_C(0x4000000000000000); - aBigger: - sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1, - status); + return packFloatx80( zSign, zExp, zSig0 ); } /*---------------------------------------------------------------------------- -| Returns the result of adding the quadruple-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. +| Takes an abstract floating-point value having sign `zSign', exponent +| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. This routine is just like +| `roundAndPackFloatx80' except that the input significand does not have to be +| normalized. *----------------------------------------------------------------------------*/ -float128 float128_add(float128 a, float128 b, float_status *status) +floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, + bool zSign, int32_t zExp, + uint64_t zSig0, uint64_t zSig1, + float_status *status) { - bool aSign, bSign; + int8_t shiftCount; - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return addFloat128Sigs(a, b, aSign, status); - } - else { - return subFloat128Sigs(a, b, aSign, status); + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; } + shiftCount = clz64(zSig0); + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + zExp -= shiftCount; + return roundAndPackFloatx80(roundingPrecision, zSign, zExp, + zSig0, zSig1, status); } /*---------------------------------------------------------------------------- -| Returns the result of subtracting the quadruple-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. +| Returns the binary exponential of the single-precision floating-point value +| `a'. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +| +| Uses the following identities: +| +| 1. ------------------------------------------------------------------------- +| x x*ln(2) +| 2 = e +| +| 2. ------------------------------------------------------------------------- +| 2 3 4 5 n +| x x x x x x x +| e = 1 + --- + --- + --- + --- + --- + ... + --- + ... +| 1! 2! 3! 4! 5! n! *----------------------------------------------------------------------------*/ -float128 float128_sub(float128 a, float128 b, float_status *status) +static const float64 float32_exp2_coefficients[15] = { - bool aSign, bSign; - - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return subFloat128Sigs(a, b, aSign, status); - } - else { - return addFloat128Sigs(a, b, aSign, status); - } - -} + const_float64( 0x3ff0000000000000ll ), /* 1 */ + const_float64( 0x3fe0000000000000ll ), /* 2 */ + const_float64( 0x3fc5555555555555ll ), /* 3 */ + const_float64( 0x3fa5555555555555ll ), /* 4 */ + const_float64( 0x3f81111111111111ll ), /* 5 */ + const_float64( 0x3f56c16c16c16c17ll ), /* 6 */ + const_float64( 0x3f2a01a01a01a01all ), /* 7 */ + const_float64( 0x3efa01a01a01a01all ), /* 8 */ + const_float64( 0x3ec71de3a556c734ll ), /* 9 */ + const_float64( 0x3e927e4fb7789f5cll ), /* 10 */ + const_float64( 0x3e5ae64567f544e4ll ), /* 11 */ + const_float64( 0x3e21eed8eff8d898ll ), /* 12 */ + const_float64( 0x3de6124613a86d09ll ), /* 13 */ + const_float64( 0x3da93974a8c07c9dll ), /* 14 */ + const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ +}; -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the quadruple-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ +float32 float32_exp2(float32 a, float_status *status) +{ + FloatParts64 xp, xnp, tp, rp; + int i; -float128 float128_mul(float128 a, float128 b, float_status *status) -{ - bool aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN(a, b, status); - } - if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return float128_default_nan(status); + float32_unpack_canonical(&xp, a, status); + if (unlikely(xp.cls != float_class_normal)) { + switch (xp.cls) { + case float_class_snan: + case float_class_qnan: + parts_return_nan(&xp, status); + return float32_round_pack_canonical(&xp, status); + case float_class_inf: + return xp.sign ? float32_zero : a; + case float_class_zero: + return float32_one; + default: + break; } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - zExp = aExp + bExp - 0x4000; - aSig0 |= UINT64_C(0x0001000000000000); - shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); - mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); - add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zSig2 |= ( zSig3 != 0 ); - if (UINT64_C( 0x0002000000000000) <= zSig0 ) { - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - ++zExp; + g_assert_not_reached(); } - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); -} + float_raise(float_flag_inexact, status); -/*---------------------------------------------------------------------------- -| Returns the result of dividing the quadruple-precision floating-point value -| `a' by the corresponding value `b'. The operation is performed according to -| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ + float64_unpack_canonical(&tp, float64_ln2, status); + xp = *parts_mul(&xp, &tp, status); + xnp = xp; -float128 float128_div(float128 a, float128 b, float_status *status) -{ - bool aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, b, status); - } - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - goto invalid; - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - return packFloat128( zSign, 0, 0, 0 ); - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return float128_default_nan(status); - } - float_raise(float_flag_divbyzero, status); - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = aExp - bExp + 0x3FFD; - shortShift128Left( - aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 ); - shortShift128Left( - bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 ); - if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { - shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); - mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); - sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); - } - zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); - if ( ( zSig1 & 0x3FFF ) <= 4 ) { - mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); - sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + float64_unpack_canonical(&rp, float64_one, status); + for (i = 0 ; i < 15 ; i++) { + float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status); + rp = *parts_muladd(&tp, &xp, &rp, 0, status); + xnp = *parts_mul(&xnp, &xp, status); } - shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); - -} -/*---------------------------------------------------------------------------- -| Returns the remainder of the quadruple-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float128_rem(float128 a, float128 b, float_status *status) -{ - bool aSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; - uint64_t allZero, alternateASig0, alternateASig1, sigMean1; - int64_t sigMean0; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN(a, b, status); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return float128_default_nan(status); - } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return a; - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - expDiff = aExp - bExp; - if ( expDiff < -1 ) return a; - shortShift128Left( - aSig0 | UINT64_C(0x0001000000000000), - aSig1, - 15 - ( expDiff < 0 ), - &aSig0, - &aSig1 - ); - shortShift128Left( - bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 ); - q = le128( bSig0, bSig1, aSig0, aSig1 ); - if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); - shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); - shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); - sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); - expDiff -= 61; - } - if ( -64 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - q >>= - expDiff; - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - expDiff += 52; - if ( expDiff < 0 ) { - shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - } - else { - shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); - } - mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); - sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); - } - else { - shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - } - do { - alternateASig0 = aSig0; - alternateASig1 = aSig1; - ++q; - sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - } while ( 0 <= (int64_t) aSig0 ); - add128( - aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 ); - if ( ( sigMean0 < 0 ) - || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - } - zSign = ( (int64_t) aSig0 < 0 ); - if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); - return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1, - status); + return float32_round_pack_canonical(&rp, status); } /*---------------------------------------------------------------------------- -| Returns the square root of the quadruple-precision floating-point value `a'. +| Rounds the extended double-precision floating-point value `a' +| to the precision provided by floatx80_rounding_precision and returns the +| result as an extended double-precision floating-point value. | The operation is performed according to the IEC/IEEE Standard for Binary | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_sqrt(float128 a, float_status *status) -{ - bool aSign; - int32_t aExp, zExp; - uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, a, status); - } - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; - invalid: - float_raise(float_flag_invalid, status); - return float128_default_nan(status); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; - aSig0 |= UINT64_C(0x0001000000000000); - zSig0 = estimateSqrt32( aExp, aSig0>>17 ); - shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & 0x1FFF ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status); - -} - -static inline FloatRelation -floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet, - float_status *status) -{ - bool aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return float_relation_unordered; - } - if (( ( extractFloatx80Exp( a ) == 0x7fff ) && - ( extractFloatx80Frac( a )<<1 ) ) || - ( ( extractFloatx80Exp( b ) == 0x7fff ) && - ( extractFloatx80Frac( b )<<1 ) )) { - if (!is_quiet || - floatx80_is_signaling_nan(a, status) || - floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return float_relation_unordered; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - - if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) && - ( ( a.low | b.low ) == 0 ) ) { - /* zero case */ - return float_relation_equal; - } else { - return 1 - (2 * aSign); - } - } else { - /* Normalize pseudo-denormals before comparison. */ - if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) { - ++a.high; - } - if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) { - ++b.high; - } - if (a.low == b.low && a.high == b.high) { - return float_relation_equal; - } else { - return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); - } - } -} - -FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status) -{ - return floatx80_compare_internal(a, b, 0, status); -} - -FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, - float_status *status) -{ - return floatx80_compare_internal(a, b, 1, status); -} - -static inline FloatRelation -float128_compare_internal(float128 a, float128 b, bool is_quiet, - float_status *status) -{ - bool aSign, bSign; - - if (( ( extractFloat128Exp( a ) == 0x7fff ) && - ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) || - ( ( extractFloat128Exp( b ) == 0x7fff ) && - ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) { - if (!is_quiet || - float128_is_signaling_nan(a, status) || - float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return float_relation_unordered; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) { - /* zero case */ - return float_relation_equal; - } else { - return 1 - (2 * aSign); - } - } else { - if (a.low == b.low && a.high == b.high) { - return float_relation_equal; - } else { - return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); - } - } -} - -FloatRelation float128_compare(float128 a, float128 b, float_status *status) -{ - return float128_compare_internal(a, b, 0, status); -} - -FloatRelation float128_compare_quiet(float128 a, float128 b, - float_status *status) -{ - return float128_compare_internal(a, b, 1, status); -} - -floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) +floatx80 floatx80_round(floatx80 a, float_status *status) { - bool aSign; - int32_t aExp; - uint64_t aSig; + FloatParts128 p; - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); + if (!floatx80_unpack_canonical(&p, a, status)) { return floatx80_default_nan(status); } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF ) { - if ( aSig<<1 ) { - return propagateFloatx80NaN(a, a, status); - } - return a; - } - - if (aExp == 0) { - if (aSig == 0) { - return a; - } - aExp++; - } - - if (n > 0x10000) { - n = 0x10000; - } else if (n < -0x10000) { - n = -0x10000; - } - - aExp += n; - return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, - aSign, aExp, aSig, 0, status); -} - -float128 float128_scalbn(float128 a, int n, float_status *status) -{ - bool aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return propagateFloat128NaN(a, a, status); - } - return a; - } - if (aExp != 0) { - aSig0 |= UINT64_C(0x0001000000000000); - } else if (aSig0 == 0 && aSig1 == 0) { - return a; - } else { - aExp++; - } - - if (n > 0x10000) { - n = 0x10000; - } else if (n < -0x10000) { - n = -0x10000; - } - - aExp += n - 1; - return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1 - , status); - + return floatx80_round_pack_canonical(&p, status); } static void __attribute__((constructor)) softfloat_init(void) diff --git a/fsdev/9p-marshal.c b/fsdev/9p-marshal.c index a01bba6908a..51881fe2201 100644 --- a/fsdev/9p-marshal.c +++ b/fsdev/9p-marshal.c @@ -18,6 +18,8 @@ #include "9p-marshal.h" +P9ARRAY_DEFINE_TYPE(V9fsString, v9fs_string_free); + void v9fs_string_free(V9fsString *str) { g_free(str->data); diff --git a/fsdev/9p-marshal.h b/fsdev/9p-marshal.h index ceaf2f521ec..f1abbe151c3 100644 --- a/fsdev/9p-marshal.h +++ b/fsdev/9p-marshal.h @@ -1,10 +1,13 @@ #ifndef QEMU_9P_MARSHAL_H #define QEMU_9P_MARSHAL_H +#include "p9array.h" + typedef struct V9fsString { uint16_t size; char *data; } V9fsString; +P9ARRAY_DECLARE_TYPE(V9fsString); typedef struct V9fsQID { uint8_t type; diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index d9d058b756d..16c1a9d9fed 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -18,6 +18,7 @@ #include #include "qemu/statfs.h" #include "qemu-fsdev-throttle.h" +#include "p9array.h" #define SM_LOCAL_MODE_BITS 0600 #define SM_LOCAL_DIR_MODE_BITS 0700 @@ -105,6 +106,7 @@ struct V9fsPath { uint16_t size; char *data; }; +P9ARRAY_DECLARE_TYPE(V9fsPath); typedef union V9fsFidOpenState V9fsFidOpenState; diff --git a/fsdev/p9array.h b/fsdev/p9array.h new file mode 100644 index 00000000000..6aa25327ca3 --- /dev/null +++ b/fsdev/p9array.h @@ -0,0 +1,160 @@ +/* + * P9Array - deep auto free C-array + * + * Copyright (c) 2021 Crudebyte + * + * Authors: + * Christian Schoenebeck + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef QEMU_P9ARRAY_H +#define QEMU_P9ARRAY_H + +#include "qemu/compiler.h" + +/** + * P9Array provides a mechanism to access arrays in common C-style (e.g. by + * square bracket [] operator) in conjunction with reference variables that + * perform deep auto free of the array when leaving the scope of the auto + * reference variable. That means not only is the array itself automatically + * freed, but also memory dynamically allocated by the individual array + * elements. + * + * Example: + * + * Consider the following user struct @c Foo which shall be used as scalar + * (element) type of an array: + * @code + * typedef struct Foo { + * int i; + * char *s; + * } Foo; + * @endcode + * and assume it has the following function to free memory allocated by @c Foo + * instances: + * @code + * void free_foo(Foo *foo) { + * free(foo->s); + * } + * @endcode + * Add the following to a shared header file: + * @code + * P9ARRAY_DECLARE_TYPE(Foo); + * @endcode + * and the following to a C unit file: + * @code + * P9ARRAY_DEFINE_TYPE(Foo, free_foo); + * @endcode + * Finally the array may then be used like this: + * @code + * void doSomething(size_t n) { + * P9ARRAY_REF(Foo) foos = NULL; + * P9ARRAY_NEW(Foo, foos, n); + * for (size_t i = 0; i < n; ++i) { + * foos[i].i = i; + * foos[i].s = calloc(4096, 1); + * snprintf(foos[i].s, 4096, "foo %d", i); + * if (...) { + * return; // array auto freed here + * } + * } + * // array auto freed here + * } + * @endcode + */ + +/** + * Declares an array type for the passed @a scalar_type. + * + * This is typically used from a shared header file. + * + * @param scalar_type - type of the individual array elements + */ +#define P9ARRAY_DECLARE_TYPE(scalar_type) \ + typedef struct P9Array##scalar_type { \ + size_t len; \ + scalar_type first[]; \ + } P9Array##scalar_type; \ + \ + void p9array_new_##scalar_type(scalar_type **auto_var, size_t len); \ + void p9array_auto_free_##scalar_type(scalar_type **auto_var); \ + +/** + * Defines an array type for the passed @a scalar_type and appropriate + * @a scalar_cleanup_func. + * + * This is typically used from a C unit file. + * + * @param scalar_type - type of the individual array elements + * @param scalar_cleanup_func - appropriate function to free memory dynamically + * allocated by individual array elements before + */ +#define P9ARRAY_DEFINE_TYPE(scalar_type, scalar_cleanup_func) \ + void p9array_new_##scalar_type(scalar_type **auto_var, size_t len) \ + { \ + p9array_auto_free_##scalar_type(auto_var); \ + P9Array##scalar_type *arr = g_malloc0(sizeof(P9Array##scalar_type) + \ + len * sizeof(scalar_type)); \ + arr->len = len; \ + *auto_var = &arr->first[0]; \ + } \ + \ + void p9array_auto_free_##scalar_type(scalar_type **auto_var) \ + { \ + scalar_type *first = (*auto_var); \ + if (!first) { \ + return; \ + } \ + P9Array##scalar_type *arr = (P9Array##scalar_type *) ( \ + ((char *)first) - offsetof(P9Array##scalar_type, first) \ + ); \ + for (size_t i = 0; i < arr->len; ++i) { \ + scalar_cleanup_func(&arr->first[i]); \ + } \ + g_free(arr); \ + } \ + +/** + * Used to declare a reference variable (unique pointer) for an array. After + * leaving the scope of the reference variable, the associated array is + * automatically freed. + * + * @param scalar_type - type of the individual array elements + */ +#define P9ARRAY_REF(scalar_type) \ + __attribute((__cleanup__(p9array_auto_free_##scalar_type))) scalar_type* + +/** + * Allocates a new array of passed @a scalar_type with @a len number of array + * elements and assigns the created array to the reference variable + * @a auto_var. + * + * @param scalar_type - type of the individual array elements + * @param auto_var - destination reference variable + * @param len - amount of array elements to be allocated immediately + */ +#define P9ARRAY_NEW(scalar_type, auto_var, len) \ + QEMU_BUILD_BUG_MSG( \ + !__builtin_types_compatible_p(scalar_type, typeof(*auto_var)), \ + "P9Array scalar type mismatch" \ + ); \ + p9array_new_##scalar_type((&auto_var), len) + +#endif /* QEMU_P9ARRAY_H */ diff --git a/gdb-xml/arm-m-profile-mve.xml b/gdb-xml/arm-m-profile-mve.xml new file mode 100644 index 00000000000..cba664c4c5b --- /dev/null +++ b/gdb-xml/arm-m-profile-mve.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + diff --git a/gdb-xml/arm-neon.xml b/gdb-xml/arm-neon.xml index ce3ee03ec48..9dce0a996fc 100644 --- a/gdb-xml/arm-neon.xml +++ b/gdb-xml/arm-neon.xml @@ -82,7 +82,5 @@ - - diff --git a/gdb-xml/arm-vfp-sysregs.xml b/gdb-xml/arm-vfp-sysregs.xml new file mode 100644 index 00000000000..c4aa2721c8d --- /dev/null +++ b/gdb-xml/arm-vfp-sysregs.xml @@ -0,0 +1,17 @@ + + + + + + + diff --git a/gdb-xml/arm-vfp.xml b/gdb-xml/arm-vfp.xml index b20881e9a99..ebed5b3d573 100644 --- a/gdb-xml/arm-vfp.xml +++ b/gdb-xml/arm-vfp.xml @@ -23,7 +23,5 @@ - - diff --git a/gdb-xml/arm-vfp3.xml b/gdb-xml/arm-vfp3.xml index 227afd8017f..ef391c7144d 100644 --- a/gdb-xml/arm-vfp3.xml +++ b/gdb-xml/arm-vfp3.xml @@ -39,7 +39,5 @@ - - diff --git a/gdbstub.c b/gdbstub.c index 19769539ba1..9e0fd0f6d99 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -31,14 +31,13 @@ #include "qemu/cutils.h" #include "qemu/module.h" #include "trace/trace-root.h" +#include "exec/gdbstub.h" #ifdef CONFIG_USER_ONLY #include "qemu.h" #else #include "monitor/monitor.h" #include "chardev/char.h" #include "chardev/char-fe.h" -#include "sysemu/sysemu.h" -#include "exec/gdbstub.h" #include "hw/cpu/cluster.h" #include "hw/boards.h" #endif @@ -103,7 +102,7 @@ static inline int cpu_gdb_index(CPUState *cpu) { #if defined(CONFIG_USER_ONLY) TaskState *ts = (TaskState *) cpu->opaque; - return ts->ts_tid; + return ts ? ts->ts_tid : -1; #else return cpu->cpu_index + 1; #endif @@ -474,6 +473,15 @@ int use_gdb_syscalls(void) return gdb_syscall_mode == GDB_SYS_ENABLED; } +static bool stub_can_reverse(void) +{ +#ifdef CONFIG_USER_ONLY + return false; +#else + return replay_mode == REPLAY_MODE_PLAY; +#endif +} + /* Resume execution. */ static inline void gdb_continue(void) { @@ -1347,6 +1355,8 @@ typedef union GdbCmdVariant { } thread_id; } GdbCmdVariant; +#define get_param(p, i) (&g_array_index(p, GdbCmdVariant, i)) + static const char *cmd_next_param(const char *param, const char delimiter) { static const char all_delimiters[] = ",;:="; @@ -1372,55 +1382,52 @@ static const char *cmd_next_param(const char *param, const char delimiter) } static int cmd_parse_params(const char *data, const char *schema, - GdbCmdVariant *params, int *num_params) + GArray *params) { - int curr_param; const char *curr_schema, *curr_data; - *num_params = 0; - - if (!schema) { - return 0; - } + g_assert(schema); + g_assert(params->len == 0); curr_schema = schema; - curr_param = 0; curr_data = data; while (curr_schema[0] && curr_schema[1] && *curr_data) { + GdbCmdVariant this_param; + switch (curr_schema[0]) { case 'l': if (qemu_strtoul(curr_data, &curr_data, 16, - ¶ms[curr_param].val_ul)) { + &this_param.val_ul)) { return -EINVAL; } - curr_param++; curr_data = cmd_next_param(curr_data, curr_schema[1]); + g_array_append_val(params, this_param); break; case 'L': if (qemu_strtou64(curr_data, &curr_data, 16, - (uint64_t *)¶ms[curr_param].val_ull)) { + (uint64_t *)&this_param.val_ull)) { return -EINVAL; } - curr_param++; curr_data = cmd_next_param(curr_data, curr_schema[1]); + g_array_append_val(params, this_param); break; case 's': - params[curr_param].data = curr_data; - curr_param++; + this_param.data = curr_data; curr_data = cmd_next_param(curr_data, curr_schema[1]); + g_array_append_val(params, this_param); break; case 'o': - params[curr_param].opcode = *(uint8_t *)curr_data; - curr_param++; + this_param.opcode = *(uint8_t *)curr_data; curr_data = cmd_next_param(curr_data, curr_schema[1]); + g_array_append_val(params, this_param); break; case 't': - params[curr_param].thread_id.kind = + this_param.thread_id.kind = read_thread_id(curr_data, &curr_data, - ¶ms[curr_param].thread_id.pid, - ¶ms[curr_param].thread_id.tid); - curr_param++; + &this_param.thread_id.pid, + &this_param.thread_id.tid); curr_data = cmd_next_param(curr_data, curr_schema[1]); + g_array_append_val(params, this_param); break; case '?': curr_data = cmd_next_param(curr_data, curr_schema[1]); @@ -1431,16 +1438,10 @@ static int cmd_parse_params(const char *data, const char *schema, curr_schema += 2; } - *num_params = curr_param; return 0; } -typedef struct GdbCmdContext { - GdbCmdVariant *params; - int num_params; -} GdbCmdContext; - -typedef void (*GdbCmdHandler)(GdbCmdContext *gdb_ctx, void *user_ctx); +typedef void (*GdbCmdHandler)(GArray *params, void *user_ctx); /* * cmd_startswith -> cmd is compared using startswith @@ -1480,8 +1481,8 @@ static inline int startswith(const char *string, const char *pattern) static int process_string_cmd(void *user_ctx, const char *data, const GdbCmdParseEntry *cmds, int num_cmds) { - int i, schema_len, max_num_params = 0; - GdbCmdContext gdb_ctx; + int i; + g_autoptr(GArray) params = g_array_new(false, true, sizeof(GdbCmdVariant)); if (!cmds) { return -1; @@ -1497,24 +1498,13 @@ static int process_string_cmd(void *user_ctx, const char *data, } if (cmd->schema) { - schema_len = strlen(cmd->schema); - if (schema_len % 2) { - return -2; + if (cmd_parse_params(&data[strlen(cmd->cmd)], + cmd->schema, params)) { + return -1; } - - max_num_params = schema_len / 2; } - gdb_ctx.params = - (GdbCmdVariant *)alloca(sizeof(*gdb_ctx.params) * max_num_params); - memset(gdb_ctx.params, 0, sizeof(*gdb_ctx.params) * max_num_params); - - if (cmd_parse_params(&data[strlen(cmd->cmd)], cmd->schema, - gdb_ctx.params, &gdb_ctx.num_params)) { - return -1; - } - - cmd->handler(&gdb_ctx, user_ctx); + cmd->handler(params, user_ctx); return 0; } @@ -1537,18 +1527,18 @@ static void run_cmd_parser(const char *data, const GdbCmdParseEntry *cmd) } } -static void handle_detach(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_detach(GArray *params, void *user_ctx) { GDBProcess *process; uint32_t pid = 1; if (gdbserver_state.multiprocess) { - if (!gdb_ctx->num_params) { + if (!params->len) { put_packet("E22"); return; } - pid = gdb_ctx->params[0].val_ul; + pid = get_param(params, 0)->val_ul; } process = gdb_get_process(pid); @@ -1571,22 +1561,22 @@ static void handle_detach(GdbCmdContext *gdb_ctx, void *user_ctx) put_packet("OK"); } -static void handle_thread_alive(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_thread_alive(GArray *params, void *user_ctx) { CPUState *cpu; - if (!gdb_ctx->num_params) { + if (!params->len) { put_packet("E22"); return; } - if (gdb_ctx->params[0].thread_id.kind == GDB_READ_THREAD_ERR) { + if (get_param(params, 0)->thread_id.kind == GDB_READ_THREAD_ERR) { put_packet("E22"); return; } - cpu = gdb_get_cpu(gdb_ctx->params[0].thread_id.pid, - gdb_ctx->params[0].thread_id.tid); + cpu = gdb_get_cpu(get_param(params, 0)->thread_id.pid, + get_param(params, 0)->thread_id.tid); if (!cpu) { put_packet("E22"); return; @@ -1595,17 +1585,17 @@ static void handle_thread_alive(GdbCmdContext *gdb_ctx, void *user_ctx) put_packet("OK"); } -static void handle_continue(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_continue(GArray *params, void *user_ctx) { - if (gdb_ctx->num_params) { - gdb_set_cpu_pc(gdb_ctx->params[0].val_ull); + if (params->len) { + gdb_set_cpu_pc(get_param(params, 0)->val_ull); } gdbserver_state.signal = 0; gdb_continue(); } -static void handle_cont_with_sig(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_cont_with_sig(GArray *params, void *user_ctx) { unsigned long signal = 0; @@ -1613,8 +1603,8 @@ static void handle_cont_with_sig(GdbCmdContext *gdb_ctx, void *user_ctx) * Note: C sig;[addr] is currently unsupported and we simply * omit the addr parameter */ - if (gdb_ctx->num_params) { - signal = gdb_ctx->params[0].val_ul; + if (params->len) { + signal = get_param(params, 0)->val_ul; } gdbserver_state.signal = gdb_signal_to_target(signal); @@ -1624,27 +1614,27 @@ static void handle_cont_with_sig(GdbCmdContext *gdb_ctx, void *user_ctx) gdb_continue(); } -static void handle_set_thread(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_set_thread(GArray *params, void *user_ctx) { CPUState *cpu; - if (gdb_ctx->num_params != 2) { + if (params->len != 2) { put_packet("E22"); return; } - if (gdb_ctx->params[1].thread_id.kind == GDB_READ_THREAD_ERR) { + if (get_param(params, 1)->thread_id.kind == GDB_READ_THREAD_ERR) { put_packet("E22"); return; } - if (gdb_ctx->params[1].thread_id.kind != GDB_ONE_THREAD) { + if (get_param(params, 1)->thread_id.kind != GDB_ONE_THREAD) { put_packet("OK"); return; } - cpu = gdb_get_cpu(gdb_ctx->params[1].thread_id.pid, - gdb_ctx->params[1].thread_id.tid); + cpu = gdb_get_cpu(get_param(params, 1)->thread_id.pid, + get_param(params, 1)->thread_id.tid); if (!cpu) { put_packet("E22"); return; @@ -1654,7 +1644,7 @@ static void handle_set_thread(GdbCmdContext *gdb_ctx, void *user_ctx) * Note: This command is deprecated and modern gdb's will be using the * vCont command instead. */ - switch (gdb_ctx->params[0].opcode) { + switch (get_param(params, 0)->opcode) { case 'c': gdbserver_state.c_cpu = cpu; put_packet("OK"); @@ -1669,18 +1659,18 @@ static void handle_set_thread(GdbCmdContext *gdb_ctx, void *user_ctx) } } -static void handle_insert_bp(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_insert_bp(GArray *params, void *user_ctx) { int res; - if (gdb_ctx->num_params != 3) { + if (params->len != 3) { put_packet("E22"); return; } - res = gdb_breakpoint_insert(gdb_ctx->params[0].val_ul, - gdb_ctx->params[1].val_ull, - gdb_ctx->params[2].val_ull); + res = gdb_breakpoint_insert(get_param(params, 0)->val_ul, + get_param(params, 1)->val_ull, + get_param(params, 2)->val_ull); if (res >= 0) { put_packet("OK"); return; @@ -1692,18 +1682,18 @@ static void handle_insert_bp(GdbCmdContext *gdb_ctx, void *user_ctx) put_packet("E22"); } -static void handle_remove_bp(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_remove_bp(GArray *params, void *user_ctx) { int res; - if (gdb_ctx->num_params != 3) { + if (params->len != 3) { put_packet("E22"); return; } - res = gdb_breakpoint_remove(gdb_ctx->params[0].val_ul, - gdb_ctx->params[1].val_ull, - gdb_ctx->params[2].val_ull); + res = gdb_breakpoint_remove(get_param(params, 0)->val_ul, + get_param(params, 1)->val_ull, + get_param(params, 2)->val_ull); if (res >= 0) { put_packet("OK"); return; @@ -1726,7 +1716,7 @@ static void handle_remove_bp(GdbCmdContext *gdb_ctx, void *user_ctx) * the remote gdb to fallback to older methods. */ -static void handle_set_reg(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_set_reg(GArray *params, void *user_ctx) { int reg_size; @@ -1735,19 +1725,19 @@ static void handle_set_reg(GdbCmdContext *gdb_ctx, void *user_ctx) return; } - if (gdb_ctx->num_params != 2) { + if (params->len != 2) { put_packet("E22"); return; } - reg_size = strlen(gdb_ctx->params[1].data) / 2; - hextomem(gdbserver_state.mem_buf, gdb_ctx->params[1].data, reg_size); + reg_size = strlen(get_param(params, 1)->data) / 2; + hextomem(gdbserver_state.mem_buf, get_param(params, 1)->data, reg_size); gdb_write_register(gdbserver_state.g_cpu, gdbserver_state.mem_buf->data, - gdb_ctx->params[0].val_ull); + get_param(params, 0)->val_ull); put_packet("OK"); } -static void handle_get_reg(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_get_reg(GArray *params, void *user_ctx) { int reg_size; @@ -1756,14 +1746,14 @@ static void handle_get_reg(GdbCmdContext *gdb_ctx, void *user_ctx) return; } - if (!gdb_ctx->num_params) { + if (!params->len) { put_packet("E14"); return; } reg_size = gdb_read_register(gdbserver_state.g_cpu, gdbserver_state.mem_buf, - gdb_ctx->params[0].val_ull); + get_param(params, 0)->val_ull); if (!reg_size) { put_packet("E14"); return; @@ -1775,22 +1765,24 @@ static void handle_get_reg(GdbCmdContext *gdb_ctx, void *user_ctx) put_strbuf(); } -static void handle_write_mem(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_write_mem(GArray *params, void *user_ctx) { - if (gdb_ctx->num_params != 3) { + if (params->len != 3) { put_packet("E22"); return; } /* hextomem() reads 2*len bytes */ - if (gdb_ctx->params[1].val_ull > strlen(gdb_ctx->params[2].data) / 2) { + if (get_param(params, 1)->val_ull > + strlen(get_param(params, 2)->data) / 2) { put_packet("E22"); return; } - hextomem(gdbserver_state.mem_buf, gdb_ctx->params[2].data, - gdb_ctx->params[1].val_ull); - if (target_memory_rw_debug(gdbserver_state.g_cpu, gdb_ctx->params[0].val_ull, + hextomem(gdbserver_state.mem_buf, get_param(params, 2)->data, + get_param(params, 1)->val_ull); + if (target_memory_rw_debug(gdbserver_state.g_cpu, + get_param(params, 0)->val_ull, gdbserver_state.mem_buf->data, gdbserver_state.mem_buf->len, true)) { put_packet("E14"); @@ -1800,22 +1792,24 @@ static void handle_write_mem(GdbCmdContext *gdb_ctx, void *user_ctx) put_packet("OK"); } -static void handle_read_mem(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_read_mem(GArray *params, void *user_ctx) { - if (gdb_ctx->num_params != 2) { + if (params->len != 2) { put_packet("E22"); return; } /* memtohex() doubles the required space */ - if (gdb_ctx->params[1].val_ull > MAX_PACKET_LENGTH / 2) { + if (get_param(params, 1)->val_ull > MAX_PACKET_LENGTH / 2) { put_packet("E22"); return; } - g_byte_array_set_size(gdbserver_state.mem_buf, gdb_ctx->params[1].val_ull); + g_byte_array_set_size(gdbserver_state.mem_buf, + get_param(params, 1)->val_ull); - if (target_memory_rw_debug(gdbserver_state.g_cpu, gdb_ctx->params[0].val_ull, + if (target_memory_rw_debug(gdbserver_state.g_cpu, + get_param(params, 0)->val_ull, gdbserver_state.mem_buf->data, gdbserver_state.mem_buf->len, false)) { put_packet("E14"); @@ -1827,19 +1821,19 @@ static void handle_read_mem(GdbCmdContext *gdb_ctx, void *user_ctx) put_strbuf(); } -static void handle_write_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_write_all_regs(GArray *params, void *user_ctx) { target_ulong addr, len; uint8_t *registers; int reg_size; - if (!gdb_ctx->num_params) { + if (!params->len) { return; } cpu_synchronize_state(gdbserver_state.g_cpu); - len = strlen(gdb_ctx->params[0].data) / 2; - hextomem(gdbserver_state.mem_buf, gdb_ctx->params[0].data, len); + len = strlen(get_param(params, 0)->data) / 2; + hextomem(gdbserver_state.mem_buf, get_param(params, 0)->data, len); registers = gdbserver_state.mem_buf->data; for (addr = 0; addr < gdbserver_state.g_cpu->gdb_num_g_regs && len > 0; addr++) { @@ -1850,7 +1844,7 @@ static void handle_write_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx) put_packet("OK"); } -static void handle_read_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_read_all_regs(GArray *params, void *user_ctx) { target_ulong addr, len; @@ -1868,14 +1862,14 @@ static void handle_read_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx) put_strbuf(); } -static void handle_file_io(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_file_io(GArray *params, void *user_ctx) { - if (gdb_ctx->num_params >= 1 && gdbserver_state.current_syscall_cb) { + if (params->len >= 1 && gdbserver_state.current_syscall_cb) { target_ulong ret, err; - ret = (target_ulong)gdb_ctx->params[0].val_ull; - if (gdb_ctx->num_params >= 2) { - err = (target_ulong)gdb_ctx->params[1].val_ull; + ret = (target_ulong)get_param(params, 0)->val_ull; + if (params->len >= 2) { + err = (target_ulong)get_param(params, 1)->val_ull; } else { err = 0; } @@ -1883,7 +1877,7 @@ static void handle_file_io(GdbCmdContext *gdb_ctx, void *user_ctx) gdbserver_state.current_syscall_cb = NULL; } - if (gdb_ctx->num_params >= 3 && gdb_ctx->params[2].opcode == (uint8_t)'C') { + if (params->len >= 3 && get_param(params, 2)->opcode == (uint8_t)'C') { put_packet("T02"); return; } @@ -1891,23 +1885,23 @@ static void handle_file_io(GdbCmdContext *gdb_ctx, void *user_ctx) gdb_continue(); } -static void handle_step(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_step(GArray *params, void *user_ctx) { - if (gdb_ctx->num_params) { - gdb_set_cpu_pc((target_ulong)gdb_ctx->params[0].val_ull); + if (params->len) { + gdb_set_cpu_pc((target_ulong)get_param(params, 0)->val_ull); } cpu_single_step(gdbserver_state.c_cpu, get_sstep_flags()); gdb_continue(); } -static void handle_backward(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_backward(GArray *params, void *user_ctx) { - if (replay_mode != REPLAY_MODE_PLAY) { + if (!stub_can_reverse()) { put_packet("E22"); } - if (gdb_ctx->num_params == 1) { - switch (gdb_ctx->params[0].opcode) { + if (params->len == 1) { + switch (get_param(params, 0)->opcode) { case 's': if (replay_reverse_step()) { gdb_continue(); @@ -1929,20 +1923,20 @@ static void handle_backward(GdbCmdContext *gdb_ctx, void *user_ctx) put_packet(""); } -static void handle_v_cont_query(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_v_cont_query(GArray *params, void *user_ctx) { put_packet("vCont;c;C;s;S"); } -static void handle_v_cont(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_v_cont(GArray *params, void *user_ctx) { int res; - if (!gdb_ctx->num_params) { + if (!params->len) { return; } - res = gdb_handle_vcont(gdb_ctx->params[0].data); + res = gdb_handle_vcont(get_param(params, 0)->data); if ((res == -EINVAL) || (res == -ERANGE)) { put_packet("E22"); } else if (res) { @@ -1950,17 +1944,17 @@ static void handle_v_cont(GdbCmdContext *gdb_ctx, void *user_ctx) } } -static void handle_v_attach(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_v_attach(GArray *params, void *user_ctx) { GDBProcess *process; CPUState *cpu; g_string_assign(gdbserver_state.str_buf, "E22"); - if (!gdb_ctx->num_params) { + if (!params->len) { goto cleanup; } - process = gdb_get_process(gdb_ctx->params[0].val_ul); + process = gdb_get_process(get_param(params, 0)->val_ul); if (!process) { goto cleanup; } @@ -1981,7 +1975,7 @@ static void handle_v_attach(GdbCmdContext *gdb_ctx, void *user_ctx) put_strbuf(); } -static void handle_v_kill(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_v_kill(GArray *params, void *user_ctx) { /* Kill the target */ put_packet("OK"); @@ -1990,7 +1984,7 @@ static void handle_v_kill(GdbCmdContext *gdb_ctx, void *user_ctx) exit(0); } -static GdbCmdParseEntry gdb_v_commands_table[] = { +static const GdbCmdParseEntry gdb_v_commands_table[] = { /* Order is important if has same prefix */ { .handler = handle_v_cont_query, @@ -2016,43 +2010,43 @@ static GdbCmdParseEntry gdb_v_commands_table[] = { }, }; -static void handle_v_commands(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_v_commands(GArray *params, void *user_ctx) { - if (!gdb_ctx->num_params) { + if (!params->len) { return; } - if (process_string_cmd(NULL, gdb_ctx->params[0].data, + if (process_string_cmd(NULL, get_param(params, 0)->data, gdb_v_commands_table, ARRAY_SIZE(gdb_v_commands_table))) { put_packet(""); } } -static void handle_query_qemu_sstepbits(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_qemu_sstepbits(GArray *params, void *user_ctx) { g_string_printf(gdbserver_state.str_buf, "ENABLE=%x,NOIRQ=%x,NOTIMER=%x", SSTEP_ENABLE, SSTEP_NOIRQ, SSTEP_NOTIMER); put_strbuf(); } -static void handle_set_qemu_sstep(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_set_qemu_sstep(GArray *params, void *user_ctx) { - if (!gdb_ctx->num_params) { + if (!params->len) { return; } - sstep_flags = gdb_ctx->params[0].val_ul; + sstep_flags = get_param(params, 0)->val_ul; put_packet("OK"); } -static void handle_query_qemu_sstep(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_qemu_sstep(GArray *params, void *user_ctx) { g_string_printf(gdbserver_state.str_buf, "0x%x", sstep_flags); put_strbuf(); } -static void handle_query_curr_tid(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_curr_tid(GArray *params, void *user_ctx) { CPUState *cpu; GDBProcess *process; @@ -2069,7 +2063,7 @@ static void handle_query_curr_tid(GdbCmdContext *gdb_ctx, void *user_ctx) put_strbuf(); } -static void handle_query_threads(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_threads(GArray *params, void *user_ctx) { if (!gdbserver_state.query_cpu) { put_packet("l"); @@ -2082,25 +2076,25 @@ static void handle_query_threads(GdbCmdContext *gdb_ctx, void *user_ctx) gdbserver_state.query_cpu = gdb_next_attached_cpu(gdbserver_state.query_cpu); } -static void handle_query_first_threads(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_first_threads(GArray *params, void *user_ctx) { gdbserver_state.query_cpu = gdb_first_attached_cpu(); - handle_query_threads(gdb_ctx, user_ctx); + handle_query_threads(params, user_ctx); } -static void handle_query_thread_extra(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_thread_extra(GArray *params, void *user_ctx) { g_autoptr(GString) rs = g_string_new(NULL); CPUState *cpu; - if (!gdb_ctx->num_params || - gdb_ctx->params[0].thread_id.kind == GDB_READ_THREAD_ERR) { + if (!params->len || + get_param(params, 0)->thread_id.kind == GDB_READ_THREAD_ERR) { put_packet("E22"); return; } - cpu = gdb_get_cpu(gdb_ctx->params[0].thread_id.pid, - gdb_ctx->params[0].thread_id.tid); + cpu = gdb_get_cpu(get_param(params, 0)->thread_id.pid, + get_param(params, 0)->thread_id.tid); if (!cpu) { return; } @@ -2125,7 +2119,7 @@ static void handle_query_thread_extra(GdbCmdContext *gdb_ctx, void *user_ctx) } #ifdef CONFIG_USER_ONLY -static void handle_query_offsets(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_offsets(GArray *params, void *user_ctx) { TaskState *ts; @@ -2140,17 +2134,17 @@ static void handle_query_offsets(GdbCmdContext *gdb_ctx, void *user_ctx) put_strbuf(); } #else -static void handle_query_rcmd(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_rcmd(GArray *params, void *user_ctx) { const guint8 zero = 0; int len; - if (!gdb_ctx->num_params) { + if (!params->len) { put_packet("E22"); return; } - len = strlen(gdb_ctx->params[0].data); + len = strlen(get_param(params, 0)->data); if (len % 2) { put_packet("E01"); return; @@ -2158,7 +2152,7 @@ static void handle_query_rcmd(GdbCmdContext *gdb_ctx, void *user_ctx) g_assert(gdbserver_state.mem_buf->len == 0); len = len / 2; - hextomem(gdbserver_state.mem_buf, gdb_ctx->params[0].data, len); + hextomem(gdbserver_state.mem_buf, get_param(params, 0)->data, len); g_byte_array_append(gdbserver_state.mem_buf, &zero, 1); qemu_chr_be_write(gdbserver_state.mon_chr, gdbserver_state.mem_buf->data, gdbserver_state.mem_buf->len); @@ -2166,7 +2160,7 @@ static void handle_query_rcmd(GdbCmdContext *gdb_ctx, void *user_ctx) } #endif -static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_supported(GArray *params, void *user_ctx) { CPUClass *cc; @@ -2176,7 +2170,7 @@ static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx) g_string_append(gdbserver_state.str_buf, ";qXfer:features:read+"); } - if (replay_mode == REPLAY_MODE_PLAY) { + if (stub_can_reverse()) { g_string_append(gdbserver_state.str_buf, ";ReverseStep+;ReverseContinue+"); } @@ -2187,8 +2181,8 @@ static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx) } #endif - if (gdb_ctx->num_params && - strstr(gdb_ctx->params[0].data, "multiprocess+")) { + if (params->len && + strstr(get_param(params, 0)->data, "multiprocess+")) { gdbserver_state.multiprocess = true; } @@ -2200,7 +2194,7 @@ static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx) put_strbuf(); } -static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_xfer_features(GArray *params, void *user_ctx) { GDBProcess *process; CPUClass *cc; @@ -2208,7 +2202,7 @@ static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx) const char *xml; const char *p; - if (gdb_ctx->num_params < 3) { + if (params->len < 3) { put_packet("E22"); return; } @@ -2221,15 +2215,15 @@ static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx) } gdb_has_xml = true; - p = gdb_ctx->params[0].data; + p = get_param(params, 0)->data; xml = get_feature_xml(p, &p, process); if (!xml) { put_packet("E00"); return; } - addr = gdb_ctx->params[1].val_ul; - len = gdb_ctx->params[2].val_ul; + addr = get_param(params, 1)->val_ul; + len = get_param(params, 2)->val_ul; total_len = strlen(xml); if (addr > total_len) { put_packet("E00"); @@ -2253,18 +2247,18 @@ static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx) } #if defined(CONFIG_USER_ONLY) && defined(CONFIG_LINUX_USER) -static void handle_query_xfer_auxv(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_xfer_auxv(GArray *params, void *user_ctx) { TaskState *ts; unsigned long offset, len, saved_auxv, auxv_len; - if (gdb_ctx->num_params < 2) { + if (params->len < 2) { put_packet("E22"); return; } - offset = gdb_ctx->params[0].val_ul; - len = gdb_ctx->params[1].val_ul; + offset = get_param(params, 0)->val_ul; + len = get_param(params, 1)->val_ul; ts = gdbserver_state.c_cpu->opaque; saved_auxv = ts->info->saved_auxv; auxv_len = ts->info->auxv_len; @@ -2300,24 +2294,24 @@ static void handle_query_xfer_auxv(GdbCmdContext *gdb_ctx, void *user_ctx) #endif #if defined(TARGET_CHERI) && !defined(CONFIG_USER_ONLY) -static void handle_query_xfer_capa_read(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_xfer_capa_read(GArray *params, void *user_ctx) { uint8_t capbuf[CHERI_CAP_SIZE + 1]; uint64_t addr; unsigned long len, offset; - if (gdb_ctx->num_params != 3) { + if (params->len != 3) { put_packet("E22"); return; } - addr = gdb_ctx->params[0].val_ull; + addr = get_param(params, 0)->val_ull; if (addr % CHERI_CAP_SIZE != 0) { put_packet("E22"); return; } - offset = gdb_ctx->params[1].val_ul; + offset = get_param(params, 1)->val_ul; if (offset > sizeof(capbuf)) { put_packet("E22"); return; @@ -2333,7 +2327,7 @@ static void handle_query_xfer_capa_read(GdbCmdContext *gdb_ctx, void *user_ctx) return; } - len = gdb_ctx->params[2].val_ul; + len = get_param(params, 2)->val_ul; if (len > sizeof(capbuf) - offset) { len = sizeof(capbuf) - offset; } @@ -2349,12 +2343,12 @@ static void handle_query_xfer_capa_read(GdbCmdContext *gdb_ctx, void *user_ctx) } #endif -static void handle_query_attached(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_attached(GArray *params, void *user_ctx) { put_packet(GDB_ATTACHED); } -static void handle_query_qemu_supported(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_query_qemu_supported(GArray *params, void *user_ctx) { g_string_printf(gdbserver_state.str_buf, "sstepbits;sstep"); #ifndef CONFIG_USER_ONLY @@ -2364,21 +2358,21 @@ static void handle_query_qemu_supported(GdbCmdContext *gdb_ctx, void *user_ctx) } #ifndef CONFIG_USER_ONLY -static void handle_query_qemu_phy_mem_mode(GdbCmdContext *gdb_ctx, +static void handle_query_qemu_phy_mem_mode(GArray *params, void *user_ctx) { g_string_printf(gdbserver_state.str_buf, "%d", phy_memory_mode); put_strbuf(); } -static void handle_set_qemu_phy_mem_mode(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_set_qemu_phy_mem_mode(GArray *params, void *user_ctx) { - if (!gdb_ctx->num_params) { + if (!params->len) { put_packet("E22"); return; } - if (!gdb_ctx->params[0].val_ul) { + if (!get_param(params, 0)->val_ul) { phy_memory_mode = 0; } else { phy_memory_mode = 1; @@ -2387,7 +2381,7 @@ static void handle_set_qemu_phy_mem_mode(GdbCmdContext *gdb_ctx, void *user_ctx) } #endif -static GdbCmdParseEntry gdb_gen_query_set_common_table[] = { +static const GdbCmdParseEntry gdb_gen_query_set_common_table[] = { /* Order is important if has same prefix */ { .handler = handle_query_qemu_sstepbits, @@ -2405,7 +2399,7 @@ static GdbCmdParseEntry gdb_gen_query_set_common_table[] = { }, }; -static GdbCmdParseEntry gdb_gen_query_table[] = { +static const GdbCmdParseEntry gdb_gen_query_table[] = { { .handler = handle_query_curr_tid, .cmd = "C", @@ -2491,7 +2485,7 @@ static GdbCmdParseEntry gdb_gen_query_table[] = { #endif }; -static GdbCmdParseEntry gdb_gen_set_table[] = { +static const GdbCmdParseEntry gdb_gen_set_table[] = { /* Order is important if has same prefix */ { .handler = handle_set_qemu_sstep, @@ -2509,45 +2503,45 @@ static GdbCmdParseEntry gdb_gen_set_table[] = { #endif }; -static void handle_gen_query(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_gen_query(GArray *params, void *user_ctx) { - if (!gdb_ctx->num_params) { + if (!params->len) { return; } - if (!process_string_cmd(NULL, gdb_ctx->params[0].data, + if (!process_string_cmd(NULL, get_param(params, 0)->data, gdb_gen_query_set_common_table, ARRAY_SIZE(gdb_gen_query_set_common_table))) { return; } - if (process_string_cmd(NULL, gdb_ctx->params[0].data, + if (process_string_cmd(NULL, get_param(params, 0)->data, gdb_gen_query_table, ARRAY_SIZE(gdb_gen_query_table))) { put_packet(""); } } -static void handle_gen_set(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_gen_set(GArray *params, void *user_ctx) { - if (!gdb_ctx->num_params) { + if (!params->len) { return; } - if (!process_string_cmd(NULL, gdb_ctx->params[0].data, + if (!process_string_cmd(NULL, get_param(params, 0)->data, gdb_gen_query_set_common_table, ARRAY_SIZE(gdb_gen_query_set_common_table))) { return; } - if (process_string_cmd(NULL, gdb_ctx->params[0].data, + if (process_string_cmd(NULL, get_param(params, 0)->data, gdb_gen_set_table, ARRAY_SIZE(gdb_gen_set_table))) { put_packet(""); } } -static void handle_target_halt(GdbCmdContext *gdb_ctx, void *user_ctx) +static void handle_target_halt(GArray *params, void *user_ctx) { g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); gdb_append_thread_id(gdbserver_state.c_cpu, gdbserver_state.str_buf); @@ -3221,8 +3215,12 @@ gdb_handlesig(CPUState *cpu, int sig) tb_flush(cpu); if (sig != 0) { - snprintf(buf, sizeof(buf), "S%02x", target_signal_to_gdb(sig)); - put_packet(buf); + gdb_set_stop_cpu(cpu); + g_string_printf(gdbserver_state.str_buf, + "T%02xthread:", target_signal_to_gdb(sig)); + gdb_append_thread_id(cpu, gdbserver_state.str_buf); + g_string_append_c(gdbserver_state.str_buf, ';'); + put_strbuf(); } /* put_packet() might have detected that the peer terminated the connection. */ @@ -3301,7 +3299,7 @@ static bool gdb_accept_socket(int gdb_fd) static int gdbserver_open_socket(const char *path) { - struct sockaddr_un sockaddr; + struct sockaddr_un sockaddr = {}; int fd, ret; fd = socket(AF_UNIX, SOCK_STREAM, 0); @@ -3330,7 +3328,7 @@ static int gdbserver_open_socket(const char *path) static bool gdb_accept_tcp(int gdb_fd) { - struct sockaddr_in sockaddr; + struct sockaddr_in sockaddr = {}; socklen_t len; int fd; diff --git a/gitdm.config b/gitdm.config index c01c2190787..288b100d89d 100644 --- a/gitdm.config +++ b/gitdm.config @@ -28,20 +28,25 @@ EmailMap contrib/gitdm/domain-map # # Use GroupMap to map a file full of addresses to the # same employer. This is used for people that don't post from easily -# identifiable corporate emails. +# identifiable corporate emails. Please keep this list sorted. # -GroupMap contrib/gitdm/group-map-redhat Red Hat -GroupMap contrib/gitdm/group-map-wavecomp Wave Computing GroupMap contrib/gitdm/group-map-cadence Cadence Design Systems GroupMap contrib/gitdm/group-map-codeweavers CodeWeavers GroupMap contrib/gitdm/group-map-ibm IBM GroupMap contrib/gitdm/group-map-janustech Janus Technologies +GroupMap contrib/gitdm/group-map-netflix Netflix +GroupMap contrib/gitdm/group-map-redhat Red Hat +GroupMap contrib/gitdm/group-map-wavecomp Wave Computing # Also group together our prolific individual contributors -# and those working under academic auspices +# and those working under academic or intern auspices GroupMap contrib/gitdm/group-map-individuals (None) GroupMap contrib/gitdm/group-map-academics Academics (various) +GroupMap contrib/gitdm/group-map-interns GSoC/Outreachy Interns + +# Group together robots and other auto-reporters +GroupMap contrib/gitdm/group-map-robots Robots (various) # # diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index ab0c7aa5eea..407a1da800c 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -127,21 +127,6 @@ SRST Show local APIC state ERST -#if defined(TARGET_I386) - { - .name = "ioapic", - .args_type = "", - .params = "", - .help = "show io apic state", - .cmd = hmp_info_io_apic, - }, -#endif - -SRST - ``info ioapic`` - Show io APIC state -ERST - { .name = "cpus", .args_type = "", @@ -174,7 +159,7 @@ ERST .args_type = "", .params = "", .help = "show the interrupts statistics (if available)", - .cmd = hmp_info_irq, + .cmd_info_hrt = qmp_x_query_irq, }, SRST @@ -200,7 +185,7 @@ ERST .args_type = "", .params = "", .help = "show RDMA state", - .cmd = hmp_info_rdma, + .cmd_info_hrt = qmp_x_query_rdma, }, SRST @@ -274,7 +259,6 @@ ERST .args_type = "", .params = "", .help = "show dynamic compiler info", - .cmd = hmp_info_jit, }, #endif @@ -289,7 +273,6 @@ ERST .args_type = "", .params = "", .help = "show dynamic compiler opcode counters", - .cmd = hmp_info_opcount, }, #endif @@ -342,7 +325,7 @@ ERST .args_type = "", .params = "", .help = "show NUMA information", - .cmd = hmp_info_numa, + .cmd_info_hrt = qmp_x_query_numa, }, SRST @@ -355,7 +338,7 @@ ERST .args_type = "", .params = "", .help = "show guest USB devices", - .cmd = hmp_info_usb, + .cmd_info_hrt = qmp_x_query_usb, }, SRST @@ -368,7 +351,6 @@ ERST .args_type = "", .params = "", .help = "show host USB devices", - .cmd = hmp_info_usbhost, }, SRST @@ -381,7 +363,7 @@ ERST .args_type = "", .params = "", .help = "show profiling information", - .cmd = hmp_info_profile, + .cmd_info_hrt = qmp_x_query_profile, }, SRST @@ -500,19 +482,6 @@ SRST Show the current VM UUID. ERST - { - .name = "cpustats", - .args_type = "", - .params = "", - .help = "show CPU statistics", - .cmd = hmp_info_cpustats, - }, - -SRST - ``info cpustats`` - Show CPU statistics. -ERST - #if defined(CONFIG_SLIRP) { .name = "usernet", @@ -625,7 +594,7 @@ ERST .args_type = "", .params = "", .help = "show roms", - .cmd = hmp_info_roms, + .cmd_info_hrt = qmp_x_query_roms, }, SRST @@ -803,7 +772,7 @@ ERST .args_type = "", .params = "", .help = "Display system ramblock information", - .cmd = hmp_info_ramblock, + .cmd_info_hrt = qmp_x_query_ramblock, }, SRST @@ -880,3 +849,31 @@ SRST ``info replay`` Display the record/replay information: mode and the current icount. ERST + + { + .name = "dirty_rate", + .args_type = "", + .params = "", + .help = "show dirty rate information", + .cmd = hmp_info_dirty_rate, + }, + +SRST + ``info dirty_rate`` + Display the vcpu dirty rate information. +ERST + +#if defined(TARGET_I386) + { + .name = "sgx", + .args_type = "", + .params = "", + .help = "show intel SGX information", + .cmd = hmp_info_sgx, + }, +#endif + +SRST + ``info sgx`` + Show intel SGX information. +ERST diff --git a/hmp-commands.hx b/hmp-commands.hx index 18b7b783c57..a999f22df82 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -382,7 +382,7 @@ SRST ERST { - .name = "stop", + .name = "stop|s", .args_type = "", .params = "", .help = "stop emulation", @@ -390,7 +390,7 @@ ERST }, SRST -``stop`` +``stop`` or ``s`` Stop emulation. ERST @@ -1269,6 +1269,7 @@ ERST .help = "add host network device", .cmd = hmp_netdev_add, .command_completion = netdev_add_completion, + .flags = "p", }, SRST @@ -1283,6 +1284,7 @@ ERST .help = "remove host network device", .cmd = hmp_netdev_del, .command_completion = netdev_del_completion, + .flags = "p", }, SRST @@ -1297,6 +1299,7 @@ ERST .help = "create QOM object", .cmd = hmp_object_add, .command_completion = object_add_completion, + .flags = "p", }, SRST @@ -1311,6 +1314,7 @@ ERST .help = "destroy QOM object", .cmd = hmp_object_del, .command_completion = object_del_completion, + .flags = "p", }, SRST @@ -1518,12 +1522,11 @@ ERST SRST ``set_password [ vnc | spice ] password [ action-if-connected ]`` - Change spice/vnc password. Use zero to make the password stay valid - forever. *action-if-connected* specifies what should happen in - case a connection is established: *fail* makes the password change - fail. *disconnect* changes the password and disconnects the - client. *keep* changes the password and keeps the connection up. - *keep* is the default. + Change spice/vnc password. *action-if-connected* specifies what + should happen in case a connection is established: *fail* makes the + password change fail. *disconnect* changes the password and + disconnects the client. *keep* changes the password and keeps the + connection up. *keep* is the default. ERST { @@ -1667,7 +1670,7 @@ ERST { .name = "replay_break", - .args_type = "icount:i", + .args_type = "icount:l", .params = "icount", .help = "set breakpoint at the specified instruction count", .cmd = hmp_replay_break, @@ -1699,7 +1702,7 @@ ERST { .name = "replay_seek", - .args_type = "icount:i", + .args_type = "icount:l", .params = "icount", .help = "replay execution to the specified instruction count", .cmd = hmp_replay_seek, @@ -1725,6 +1728,23 @@ ERST .flags = "p", }, +SRST +``calc_dirty_rate`` *second* + Start a round of dirty rate measurement with the period specified in *second*. + The result of the dirty rate measurement may be observed with ``info + dirty_rate`` command. +ERST + + { + .name = "calc_dirty_rate", + .args_type = "dirty_ring:-r,dirty_bitmap:-b,second:l,sample_pages_per_GB:l?", + .params = "[-r] [-b] second [sample_pages_per_GB]", + .help = "start a round of guest dirty rate measurement (using -r to" + "\n\t\t\t specify dirty ring as the method of calculation and" + "\n\t\t\t -b to specify dirty bitmap as method of calculation)", + .cmd = hmp_calc_dirty_rate, + }, + { .name = "cheri_trace_buffer_size", .args_type = "buffer_size:i", @@ -1736,4 +1756,4 @@ ERST SRST ``cheri_trace_buffer_size`` *buffer_size* Set the instruction trace buffer size to the given number of entries.. -ERST +ERST \ No newline at end of file diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 890e25a6834..42b65e143b3 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -10,6 +10,11 @@ * the COPYING file in the top-level directory. */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "9p.h" #include "9p-local.h" diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c index bbf89064f7a..eadae270dde 100644 --- a/hw/9pfs/9p-posix-acl.c +++ b/hw/9pfs/9p-posix-acl.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "qemu/xattr.h" #include "9p.h" diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c index 9db07dfa845..be1546c1be5 100644 --- a/hw/9pfs/9p-proxy.c +++ b/hw/9pfs/9p-proxy.c @@ -10,6 +10,11 @@ * the COPYING file in the top-level directory. */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include #include diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index d538fd4f305..09b9c252885 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -12,6 +12,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "9p.h" #include "fsdev/qemu-fsdev.h" diff --git a/hw/9pfs/9p-util-linux.c b/hw/9pfs/9p-util-linux.c index 06399c59199..4f57d8c0471 100644 --- a/hw/9pfs/9p-util-linux.c +++ b/hw/9pfs/9p-util-linux.c @@ -10,6 +10,11 @@ * See the COPYING file in the top-level directory. */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "qemu/xattr.h" #include "9p-util.h" diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c index 2c90817b75a..f2ae9582e61 100644 --- a/hw/9pfs/9p-xattr-user.c +++ b/hw/9pfs/9p-xattr-user.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "9p.h" #include "fsdev/file-op-9p.h" diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c index c696d8f8460..9ae69dd8dbc 100644 --- a/hw/9pfs/9p-xattr.c +++ b/hw/9pfs/9p-xattr.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "9p.h" #include "fsdev/file-op-9p.h" diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 9f20338ac5c..71984bb7f84 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include #include "hw/virtio/virtio.h" @@ -48,6 +53,8 @@ enum { Oappend = 0x80, }; +P9ARRAY_DEFINE_TYPE(V9fsPath, v9fs_path_free); + static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) { ssize_t ret; @@ -978,23 +985,6 @@ static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) return 0; } -static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, - V9fsQID *qidp) -{ - struct stat stbuf; - int err; - - err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); - if (err < 0) { - return err; - } - err = stat_to_qid(pdu, &stbuf, qidp); - if (err < 0) { - return err; - } - return 0; -} - V9fsPDU *pdu_alloc(V9fsState *s) { V9fsPDU *pdu = NULL; @@ -1286,6 +1276,37 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path, #define P9_STATS_ALL 0x00003fffULL /* Mask for All fields above */ +/** + * Convert host filesystem's block size into an appropriate block size for + * 9p client (guest OS side). The value returned suggests an "optimum" block + * size for 9p I/O, i.e. to maximize performance. + * + * @pdu: 9p client request + * @blksize: host filesystem's block size + */ +static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize) +{ + int32_t iounit = 0; + V9fsState *s = pdu->s; + + /* + * iounit should be multiples of blksize (host filesystem block size) + * as well as less than (client msize - P9_IOHDRSZ) + */ + if (blksize) { + iounit = QEMU_ALIGN_DOWN(s->msize - P9_IOHDRSZ, blksize); + } + if (!iounit) { + iounit = s->msize - P9_IOHDRSZ; + } + return iounit; +} + +static int32_t stat_to_iounit(const V9fsPDU *pdu, const struct stat *stbuf) +{ + return blksize_to_iounit(pdu, stbuf->st_blksize); +} + static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, V9fsStatDotl *v9lstat) { @@ -1297,7 +1318,7 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, v9lstat->st_gid = stbuf->st_gid; v9lstat->st_rdev = stbuf->st_rdev; v9lstat->st_size = stbuf->st_size; - v9lstat->st_blksize = stbuf->st_blksize; + v9lstat->st_blksize = stat_to_iounit(pdu, stbuf); v9lstat->st_blocks = stbuf->st_blocks; v9lstat->st_atime_sec = stbuf->st_atime; v9lstat->st_mtime_sec = stbuf->st_mtime; @@ -1413,6 +1434,7 @@ static void coroutine_fn v9fs_attach(void *opaque) size_t offset = 7; V9fsQID qid; ssize_t err; + struct stat stbuf; v9fs_string_init(&uname); v9fs_string_init(&aname); @@ -1435,7 +1457,13 @@ static void coroutine_fn v9fs_attach(void *opaque) clunk_fid(s, fid); goto out; } - err = fid_to_qid(pdu, fidp, &qid); + err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); + if (err < 0) { + err = -EINVAL; + clunk_fid(s, fid); + goto out; + } + err = stat_to_qid(pdu, &stbuf, &qid); if (err < 0) { err = -EINVAL; clunk_fid(s, fid); @@ -1467,7 +1495,7 @@ static void coroutine_fn v9fs_attach(void *opaque) } err += offset; - memcpy(&s->root_qid, &qid, sizeof(qid)); + memcpy(&s->root_st, &stbuf, sizeof(stbuf)); trace_v9fs_attach_return(pdu->tag, pdu->id, qid.type, qid.version, qid.path); out: @@ -1718,25 +1746,24 @@ static bool name_is_illegal(const char *name) return !*name || strchr(name, '/') != NULL; } -static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2) +static bool same_stat_id(const struct stat *a, const struct stat *b) { - return - qid1->type != qid2->type || - qid1->version != qid2->version || - qid1->path != qid2->path; + return a->st_dev == b->st_dev && a->st_ino == b->st_ino; } static void coroutine_fn v9fs_walk(void *opaque) { int name_idx; - V9fsQID *qids = NULL; + g_autofree V9fsQID *qids = NULL; int i, err = 0; V9fsPath dpath, path; + P9ARRAY_REF(V9fsPath) pathes = NULL; uint16_t nwnames; - struct stat stbuf; + struct stat stbuf, fidst; + g_autofree struct stat *stbufs = NULL; size_t offset = 7; int32_t fid, newfid; - V9fsString *wnames = NULL; + P9ARRAY_REF(V9fsString) wnames = NULL; V9fsFidState *fidp; V9fsFidState *newfidp = NULL; V9fsPDU *pdu = opaque; @@ -1752,9 +1779,15 @@ static void coroutine_fn v9fs_walk(void *opaque) trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames); - if (nwnames && nwnames <= P9_MAXWELEM) { - wnames = g_new0(V9fsString, nwnames); + if (nwnames > P9_MAXWELEM) { + err = -EINVAL; + goto out_nofid; + } + if (nwnames) { + P9ARRAY_NEW(V9fsString, wnames, nwnames); qids = g_new0(V9fsQID, nwnames); + stbufs = g_new0(struct stat, nwnames); + P9ARRAY_NEW(V9fsPath, pathes, nwnames); for (i = 0; i < nwnames; i++) { err = pdu_unmarshal(pdu, offset, "s", &wnames[i]); if (err < 0) { @@ -1766,9 +1799,6 @@ static void coroutine_fn v9fs_walk(void *opaque) } offset += err; } - } else if (nwnames > P9_MAXWELEM) { - err = -EINVAL; - goto out_nofid; } fidp = get_fid(pdu, fid); if (fidp == NULL) { @@ -1778,35 +1808,85 @@ static void coroutine_fn v9fs_walk(void *opaque) v9fs_path_init(&dpath); v9fs_path_init(&path); + /* + * Both dpath and path initially point to fidp. + * Needed to handle request with nwnames == 0 + */ + v9fs_path_copy(&dpath, &fidp->path); + v9fs_path_copy(&path, &fidp->path); - err = fid_to_qid(pdu, fidp, &qid); + /* + * To keep latency (i.e. overall execution time for processing this + * Twalk client request) as small as possible, run all the required fs + * driver code altogether inside the following block. + */ + v9fs_co_run_in_worker({ + if (v9fs_request_cancelled(pdu)) { + err = -EINTR; + break; + } + err = s->ops->lstat(&s->ctx, &dpath, &fidst); + if (err < 0) { + err = -errno; + break; + } + stbuf = fidst; + for (name_idx = 0; name_idx < nwnames; name_idx++) { + if (v9fs_request_cancelled(pdu)) { + err = -EINTR; + break; + } + if (!same_stat_id(&pdu->s->root_st, &stbuf) || + strcmp("..", wnames[name_idx].data)) + { + err = s->ops->name_to_path(&s->ctx, &dpath, + wnames[name_idx].data, + &pathes[name_idx]); + if (err < 0) { + err = -errno; + break; + } + if (v9fs_request_cancelled(pdu)) { + err = -EINTR; + break; + } + err = s->ops->lstat(&s->ctx, &pathes[name_idx], &stbuf); + if (err < 0) { + err = -errno; + break; + } + stbufs[name_idx] = stbuf; + v9fs_path_copy(&dpath, &pathes[name_idx]); + } + } + }); + /* + * Handle all the rest of this Twalk request on main thread ... + */ if (err < 0) { goto out; } - /* - * Both dpath and path initially poin to fidp. - * Needed to handle request with nwnames == 0 - */ + err = stat_to_qid(pdu, &fidst, &qid); + if (err < 0) { + goto out; + } + stbuf = fidst; + + /* reset dpath and path */ v9fs_path_copy(&dpath, &fidp->path); v9fs_path_copy(&path, &fidp->path); - for (name_idx = 0; name_idx < nwnames; name_idx++) { - if (not_same_qid(&pdu->s->root_qid, &qid) || - strcmp("..", wnames[name_idx].data)) { - err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, - &path); - if (err < 0) { - goto out; - } - err = v9fs_co_lstat(pdu, &path, &stbuf); - if (err < 0) { - goto out; - } + for (name_idx = 0; name_idx < nwnames; name_idx++) { + if (!same_stat_id(&pdu->s->root_st, &stbuf) || + strcmp("..", wnames[name_idx].data)) + { + stbuf = stbufs[name_idx]; err = stat_to_qid(pdu, &stbuf, &qid); if (err < 0) { goto out; } + v9fs_path_copy(&path, &pathes[name_idx]); v9fs_path_copy(&dpath, &path); } memcpy(&qids[name_idx], &qid, sizeof(qid)); @@ -1839,35 +1919,14 @@ static void coroutine_fn v9fs_walk(void *opaque) v9fs_path_free(&path); out_nofid: pdu_complete(pdu, err); - if (nwnames && nwnames <= P9_MAXWELEM) { - for (name_idx = 0; name_idx < nwnames; name_idx++) { - v9fs_string_free(&wnames[name_idx]); - } - g_free(wnames); - g_free(qids); - } } static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path) { struct statfs stbuf; - int32_t iounit = 0; - V9fsState *s = pdu->s; + int err = v9fs_co_statfs(pdu, path, &stbuf); - /* - * iounit should be multiples of f_bsize (host filesystem block size - * and as well as less than (client msize - P9_IOHDRSZ)) - */ - if (!v9fs_co_statfs(pdu, path, &stbuf)) { - if (stbuf.f_bsize) { - iounit = stbuf.f_bsize; - iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize; - } - } - if (!iounit) { - iounit = s->msize - P9_IOHDRSZ; - } - return iounit; + return blksize_to_iounit(pdu, (err >= 0) ? stbuf.f_bsize : 0); } static void coroutine_fn v9fs_open(void *opaque) diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 00381591ffa..1567b678411 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -355,7 +355,7 @@ struct V9fsState { int32_t root_fid; Error *migration_blocker; V9fsConf fsconf; - V9fsQID root_qid; + struct stat root_st; dev_t dev_id; struct qht qpd_table; struct qht qpp_table; diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c index 77a82ca4356..b078b294112 100644 --- a/hw/9pfs/codir.c +++ b/hw/9pfs/codir.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "fsdev/qemu-fsdev.h" #include "qemu/thread.h" diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c index 83bb6c14e02..20f93a90e75 100644 --- a/hw/9pfs/cofile.c +++ b/hw/9pfs/cofile.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "fsdev/qemu-fsdev.h" #include "qemu/thread.h" diff --git a/hw/9pfs/cofs.c b/hw/9pfs/cofs.c index 0b321b456e3..9d0adc2e786 100644 --- a/hw/9pfs/cofs.c +++ b/hw/9pfs/cofs.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "fsdev/qemu-fsdev.h" #include "qemu/thread.h" diff --git a/hw/9pfs/coth.c b/hw/9pfs/coth.c index 9778f24b000..2802d41cce2 100644 --- a/hw/9pfs/coth.c +++ b/hw/9pfs/coth.c @@ -12,6 +12,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "block/thread-pool.h" #include "qemu/coroutine.h" diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h index c9b2a8d4148..cb134ad7054 100644 --- a/hw/9pfs/coth.h +++ b/hw/9pfs/coth.h @@ -51,7 +51,9 @@ */ \ qemu_coroutine_yield(); \ qemu_bh_delete(co_bh); \ - code_block; \ + do { \ + code_block; \ + } while (0); \ /* re-enter back to qemu thread */ \ qemu_coroutine_yield(); \ } while (0) diff --git a/hw/9pfs/coxattr.c b/hw/9pfs/coxattr.c index 0e00ffaa0d4..dbcd09e0fd5 100644 --- a/hw/9pfs/coxattr.c +++ b/hw/9pfs/coxattr.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "fsdev/qemu-fsdev.h" #include "qemu/thread.h" diff --git a/hw/9pfs/trace-events b/hw/9pfs/trace-events index 10188daf7fa..6c77966c0b2 100644 --- a/hw/9pfs/trace-events +++ b/hw/9pfs/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # 9p.c v9fs_rcancel(uint16_t tag, uint8_t id) "tag %d id %d" diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 14371a78efd..54ee93b71fc 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -11,6 +11,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "hw/virtio/virtio.h" #include "qemu/sockets.h" diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index a969fcc54c3..65c4979c3c5 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -8,6 +8,11 @@ * */ +/* + * Not so fast! You might want to read the 9p developer docs first: + * https://wiki.qemu.org/Documentation/9p + */ + #include "qemu/osdep.h" #include "hw/9pfs/9p.h" diff --git a/hw/Kconfig b/hw/Kconfig index ff40bd3f7bb..ad20cce0a95 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -21,6 +21,7 @@ source mem/Kconfig source misc/Kconfig source net/Kconfig source nubus/Kconfig +source nvme/Kconfig source nvram/Kconfig source pci-bridge/Kconfig source pci-host/Kconfig @@ -31,6 +32,7 @@ source remote/Kconfig source rtc/Kconfig source scsi/Kconfig source sd/Kconfig +source sensor/Kconfig source smbios/Kconfig source ssi/Kconfig source timer/Kconfig @@ -47,11 +49,9 @@ source avr/Kconfig source cris/Kconfig source hppa/Kconfig source i386/Kconfig -source lm32/Kconfig source m68k/Kconfig source microblaze/Kconfig source mips/Kconfig -source moxie/Kconfig source nios2/Kconfig source openrisc/Kconfig source ppc/Kconfig @@ -62,7 +62,6 @@ source sh4/Kconfig source sparc/Kconfig source sparc64/Kconfig source tricore/Kconfig -source unicore32/Kconfig source xtensa/Kconfig # Symbols used by multiple targets @@ -82,3 +81,5 @@ config XLNX_ZYNQMP select REGISTER select CAN_BUS select PTIMER + select XLNX_BBRAM + select XLNX_EFUSE_ZYNQMP diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index 1932f66af8d..622b0b50b75 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -8,6 +8,8 @@ config ACPI_X86 select ACPI_CPU_HOTPLUG select ACPI_MEMORY_HOTPLUG select ACPI_HMAT + select ACPI_PIIX4 + select ACPI_PCIHP config ACPI_X86_ICH bool @@ -24,6 +26,14 @@ config ACPI_NVDIMM bool depends on ACPI +config ACPI_PIIX4 + bool + depends on ACPI + +config ACPI_PCIHP + bool + depends on ACPI + config ACPI_HMAT bool depends on ACPI @@ -41,4 +51,12 @@ config ACPI_VMGENID default y depends on PC +config ACPI_VIOT + bool + depends on ACPI + config ACPI_HW_REDUCED + bool + select ACPI + select ACPI_MEMORY_HOTPLUG + select ACPI_NVDIMM diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c new file mode 100644 index 00000000000..3fc4b14c260 --- /dev/null +++ b/hw/acpi/acpi-cpu-hotplug-stub.c @@ -0,0 +1,50 @@ +#include "qemu/osdep.h" +#include "hw/acpi/cpu_hotplug.h" +#include "migration/vmstate.h" + + +/* Following stubs are all related to ACPI cpu hotplug */ +const VMStateDescription vmstate_cpu_hotplug; + +void acpi_switch_to_modern_cphp(AcpiCpuHotplug *gpe_cpu, + CPUHotplugState *cpuhp_state, + uint16_t io_port) +{ + return; +} + +void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, + AcpiCpuHotplug *gpe_cpu, uint16_t base) +{ + return; +} + +void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) +{ + return; +} + +void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, DeviceState *dev, Error **errp) +{ + return; +} + +void legacy_acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + AcpiCpuHotplug *g, DeviceState *dev, Error **errp) +{ + return; +} + +void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp) +{ + return; +} + +void acpi_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp) +{ + return; +} diff --git a/hw/acpi/acpi-mem-hotplug-stub.c b/hw/acpi/acpi-mem-hotplug-stub.c new file mode 100644 index 00000000000..73a076a2657 --- /dev/null +++ b/hw/acpi/acpi-mem-hotplug-stub.c @@ -0,0 +1,35 @@ +#include "qemu/osdep.h" +#include "hw/acpi/memory_hotplug.h" +#include "migration/vmstate.h" + +const VMStateDescription vmstate_memory_hotplug; + +void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, + MemHotplugState *state, hwaddr io_base) +{ + return; +} + +void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list) +{ + return; +} + +void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st, + DeviceState *dev, Error **errp) +{ + return; +} + +void acpi_memory_unplug_cb(MemHotplugState *mem_st, + DeviceState *dev, Error **errp) +{ + return; +} + +void acpi_memory_unplug_request_cb(HotplugHandler *hotplug_dev, + MemHotplugState *mem_st, + DeviceState *dev, Error **errp) +{ + return; +} diff --git a/hw/acpi/acpi-nvdimm-stub.c b/hw/acpi/acpi-nvdimm-stub.c new file mode 100644 index 00000000000..8baff9be6f4 --- /dev/null +++ b/hw/acpi/acpi-nvdimm-stub.c @@ -0,0 +1,8 @@ +#include "qemu/osdep.h" +#include "hw/mem/nvdimm.h" +#include "hw/hotplug.h" + +void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + return; +} diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c new file mode 100644 index 00000000000..734e4c59868 --- /dev/null +++ b/hw/acpi/acpi-pci-hotplug-stub.c @@ -0,0 +1,47 @@ +#include "qemu/osdep.h" +#include "hw/acpi/pcihp.h" +#include "migration/vmstate.h" + +const VMStateDescription vmstate_acpi_pcihp_pci_status; + +void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, + MemoryRegion *address_space_io, bool bridges_enabled, + uint16_t io_base) +{ + return; +} + +void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + DeviceState *dev, Error **errp) +{ + return; +} + +void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + return; +} + +void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + DeviceState *dev, Error **errp) +{ + return; +} + +void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, + AcpiPciHpState *s, DeviceState *dev, + Error **errp) +{ + return; +} + +void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off) +{ + return; +} + +bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id) +{ + return false; +} diff --git a/hw/acpi/acpi-x86-stub.c b/hw/acpi/acpi-x86-stub.c index f88d6a090b3..3df1e090f46 100644 --- a/hw/acpi/acpi-x86-stub.c +++ b/hw/acpi/acpi-x86-stub.c @@ -1,7 +1,14 @@ #include "qemu/osdep.h" #include "hw/i386/pc.h" +#include "hw/i386/acpi-build.h" void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *apic_ids, GArray *entry) + const CPUArchIdList *apic_ids, GArray *entry, + bool force_enabled) { } + +Object *acpi_get_i386_pci_host(void) +{ + return NULL; +} diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index d33ce8954aa..b3b3310df32 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -52,6 +52,19 @@ static void build_append_byte(GArray *array, uint8_t val) g_array_append_val(array, val); } +static void build_append_padded_str(GArray *array, const char *str, + size_t maxlen, char pad) +{ + size_t i; + size_t len = strlen(str); + + g_assert(len <= maxlen); + g_array_append_vals(array, str, len); + for (i = maxlen - len; i > 0; i--) { + g_array_append_val(array, pad); + } +} + static void build_append_array(GArray *array, GArray *val) { g_array_append_vals(array, val->data, val->len); @@ -1692,27 +1705,53 @@ Aml *aml_object_type(Aml *object) return var; } -void -build_header(BIOSLinker *linker, GArray *table_data, - AcpiTableHeader *h, const char *sig, int len, uint8_t rev, - const char *oem_id, const char *oem_table_id) -{ - unsigned tbl_offset = (char *)h - table_data->data; - unsigned checksum_offset = (char *)&h->checksum - table_data->data; - memcpy(&h->signature, sig, 4); - h->length = cpu_to_le32(len); - h->revision = rev; - - strpadcpy((char *)h->oem_id, sizeof h->oem_id, oem_id, ' '); - strpadcpy((char *)h->oem_table_id, sizeof h->oem_table_id, - oem_table_id, ' '); - - h->oem_revision = cpu_to_le32(1); - memcpy(h->asl_compiler_id, ACPI_BUILD_APPNAME8, 4); - h->asl_compiler_revision = cpu_to_le32(1); - /* Checksum to be filled in by Guest linker */ +void acpi_table_begin(AcpiTable *desc, GArray *array) +{ + + desc->array = array; + desc->table_offset = array->len; + + /* + * ACPI spec 1.0b + * 5.2.3 System Description Table Header + */ + g_assert(strlen(desc->sig) == 4); + g_array_append_vals(array, desc->sig, 4); /* Signature */ + /* + * reserve space for Length field, which will be patched by + * acpi_table_end() when the table creation is finished. + */ + build_append_int_noprefix(array, 0, 4); /* Length */ + build_append_int_noprefix(array, desc->rev, 1); /* Revision */ + build_append_int_noprefix(array, 0, 1); /* Checksum */ + build_append_padded_str(array, desc->oem_id, 6, ' '); /* OEMID */ + /* OEM Table ID */ + build_append_padded_str(array, desc->oem_table_id, 8, ' '); + build_append_int_noprefix(array, 1, 4); /* OEM Revision */ + g_array_append_vals(array, ACPI_BUILD_APPNAME8, 4); /* Creator ID */ + build_append_int_noprefix(array, 1, 4); /* Creator Revision */ +} + +void acpi_table_end(BIOSLinker *linker, AcpiTable *desc) +{ + /* + * ACPI spec 1.0b + * 5.2.3 System Description Table Header + * Table 5-2 DESCRIPTION_HEADER Fields + */ + const unsigned checksum_offset = 9; + uint32_t table_len = desc->array->len - desc->table_offset; + uint32_t table_len_le = cpu_to_le32(table_len); + gchar *len_ptr = &desc->array->data[desc->table_offset + 4]; + + /* patch "Length" field that has been reserved by acpi_table_begin() + * to the actual length, i.e. accumulated table length from + * acpi_table_begin() till acpi_table_end() + */ + memcpy(len_ptr, &table_len_le, sizeof table_len_le); + bios_linker_loader_add_checksum(linker, ACPI_BUILD_TABLE_FILE, - tbl_offset, len, checksum_offset); + desc->table_offset, table_len, desc->table_offset + checksum_offset); } void *acpi_data_push(GArray *table_data, unsigned size) @@ -1822,69 +1861,81 @@ build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) 32); } -/* Build rsdt table */ +/* + * ACPI 1.0 Root System Description Table (RSDT) + */ void build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, const char *oem_id, const char *oem_table_id) { int i; - unsigned rsdt_entries_offset; - AcpiRsdtDescriptorRev1 *rsdt; - const unsigned table_data_len = (sizeof(uint32_t) * table_offsets->len); - const unsigned rsdt_entry_size = sizeof(rsdt->table_offset_entry[0]); - const size_t rsdt_len = sizeof(*rsdt) + table_data_len; - - rsdt = acpi_data_push(table_data, rsdt_len); - rsdt_entries_offset = (char *)rsdt->table_offset_entry - table_data->data; + AcpiTable table = { .sig = "RSDT", .rev = 1, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); for (i = 0; i < table_offsets->len; ++i) { uint32_t ref_tbl_offset = g_array_index(table_offsets, uint32_t, i); - uint32_t rsdt_entry_offset = rsdt_entries_offset + rsdt_entry_size * i; + uint32_t rsdt_entry_offset = table.array->len; - /* rsdt->table_offset_entry to be filled by Guest linker */ + /* reserve space for entry */ + build_append_int_noprefix(table.array, 0, 4); + + /* mark position of RSDT entry to be filled by Guest linker */ bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, rsdt_entry_offset, rsdt_entry_size, + ACPI_BUILD_TABLE_FILE, rsdt_entry_offset, 4, ACPI_BUILD_TABLE_FILE, ref_tbl_offset); + } - build_header(linker, table_data, - (void *)rsdt, "RSDT", rsdt_len, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } -/* Build xsdt table */ +/* + * ACPI 2.0 eXtended System Description Table (XSDT) + */ void build_xsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, const char *oem_id, const char *oem_table_id) { int i; - unsigned xsdt_entries_offset; - AcpiXsdtDescriptorRev2 *xsdt; - const unsigned table_data_len = (sizeof(uint64_t) * table_offsets->len); - const unsigned xsdt_entry_size = sizeof(xsdt->table_offset_entry[0]); - const size_t xsdt_len = sizeof(*xsdt) + table_data_len; - - xsdt = acpi_data_push(table_data, xsdt_len); - xsdt_entries_offset = (char *)xsdt->table_offset_entry - table_data->data; + AcpiTable table = { .sig = "XSDT", .rev = 1, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); + for (i = 0; i < table_offsets->len; ++i) { uint64_t ref_tbl_offset = g_array_index(table_offsets, uint32_t, i); - uint64_t xsdt_entry_offset = xsdt_entries_offset + xsdt_entry_size * i; + uint64_t xsdt_entry_offset = table.array->len; + + /* reserve space for entry */ + build_append_int_noprefix(table.array, 0, 8); - /* xsdt->table_offset_entry to be filled by Guest linker */ + /* mark position of RSDT entry to be filled by Guest linker */ bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, xsdt_entry_offset, xsdt_entry_size, + ACPI_BUILD_TABLE_FILE, xsdt_entry_offset, 8, ACPI_BUILD_TABLE_FILE, ref_tbl_offset); } - build_header(linker, table_data, - (void *)xsdt, "XSDT", xsdt_len, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } -void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, +/* + * ACPI spec, Revision 4.0 + * 5.2.16.2 Memory Affinity Structure + */ +void build_srat_memory(GArray *table_data, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags) { - numamem->type = ACPI_SRAT_MEMORY; - numamem->length = sizeof(*numamem); - numamem->proximity = cpu_to_le32(node); - numamem->flags = cpu_to_le32(flags); - numamem->base_addr = cpu_to_le64(base); - numamem->range_length = cpu_to_le64(len); + build_append_int_noprefix(table_data, 1, 1); /* Type */ + build_append_int_noprefix(table_data, 40, 1); /* Length */ + build_append_int_noprefix(table_data, node, 4); /* Proximity Domain */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, base, 4); /* Base Address Low */ + /* Base Address High */ + build_append_int_noprefix(table_data, base >> 32, 4); + build_append_int_noprefix(table_data, len, 4); /* Length Low */ + build_append_int_noprefix(table_data, len >> 32, 4); /* Length High */ + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + build_append_int_noprefix(table_data, flags, 4); /* Flags */ + build_append_int_noprefix(table_data, 0, 8); /* Reserved */ } /* @@ -1894,11 +1945,12 @@ void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, const char *oem_id, const char *oem_table_id) { - int slit_start, i, j; - slit_start = table_data->len; + int i, j; int nb_numa_nodes = ms->numa_state->num_nodes; + AcpiTable table = { .sig = "SLIT", .rev = 1, + .oem_id = oem_id, .oem_table_id = oem_table_id }; - acpi_data_push(table_data, sizeof(AcpiTableHeader)); + acpi_table_begin(&table, table_data); build_append_int_noprefix(table_data, nb_numa_nodes, 8); for (i = 0; i < nb_numa_nodes; i++) { @@ -1909,11 +1961,96 @@ void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, 1); } } + acpi_table_end(linker, &table); +} + +/* + * ACPI spec, Revision 6.3 + * 5.2.29.1 Processor hierarchy node structure (Type 0) + */ +static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + uint32_t parent, uint32_t id, + uint32_t *priv_rsrc, + uint32_t priv_num) +{ + int i; + + build_append_byte(tbl, 0); /* Type 0 - processor */ + build_append_byte(tbl, 20 + priv_num * 4); /* Length */ + build_append_int_noprefix(tbl, 0, 2); /* Reserved */ + build_append_int_noprefix(tbl, flags, 4); /* Flags */ + build_append_int_noprefix(tbl, parent, 4); /* Parent */ + build_append_int_noprefix(tbl, id, 4); /* ACPI Processor ID */ + + /* Number of private resources */ + build_append_int_noprefix(tbl, priv_num, 4); + + /* Private resources[N] */ + if (priv_num > 0) { + assert(priv_rsrc); + for (i = 0; i < priv_num; i++) { + build_append_int_noprefix(tbl, priv_rsrc[i], 4); + } + } +} + +/* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) + */ +void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id) +{ + int pptt_start = table_data->len; + int uid = 0; + int socket; + AcpiTable table = { .sig = "PPTT", .rev = 2, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); + + for (socket = 0; socket < ms->smp.sockets; socket++) { + uint32_t socket_offset = table_data->len - pptt_start; + int core; + + build_processor_hierarchy_node( + table_data, + /* + * Physical package - represents the boundary + * of a physical package + */ + (1 << 0), + 0, socket, NULL, 0); + + for (core = 0; core < ms->smp.cores; core++) { + uint32_t core_offset = table_data->len - pptt_start; + int thread; + + if (ms->smp.threads > 1) { + build_processor_hierarchy_node( + table_data, + (0 << 0), /* not a physical package */ + socket_offset, core, NULL, 0); + + for (thread = 0; thread < ms->smp.threads; thread++) { + build_processor_hierarchy_node( + table_data, + (1 << 1) | /* ACPI Processor ID valid */ + (1 << 2) | /* Processor is a Thread */ + (1 << 3), /* Node is a Leaf */ + core_offset, uid++, NULL, 0); + } + } else { + build_processor_hierarchy_node( + table_data, + (1 << 1) | /* ACPI Processor ID valid */ + (1 << 3), /* Node is a Leaf */ + socket_offset, uid++, NULL, 0); + } + } + } - build_header(linker, table_data, - (void *)(table_data->data + slit_start), - "SLIT", - table_data->len - slit_start, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } /* build rev1/rev3/rev5.1 FADT */ @@ -1921,9 +2058,10 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, const char *oem_id, const char *oem_table_id) { int off; - int fadt_start = tbl->len; + AcpiTable table = { .sig = "FACP", .rev = f->rev, + .oem_id = oem_id, .oem_table_id = oem_table_id }; - acpi_data_push(tbl, sizeof(AcpiTableHeader)); + acpi_table_begin(&table, tbl); /* FACS address to be filled by Guest linker at runtime */ off = tbl->len; @@ -1987,7 +2125,7 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, build_append_int_noprefix(tbl, f->flags, 4); /* Flags */ if (f->rev == 1) { - goto build_hdr; + goto done; } build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */ @@ -2024,7 +2162,7 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */ if (f->rev <= 4) { - goto build_hdr; + goto done; } /* SLEEP_CONTROL_REG */ @@ -2035,11 +2173,11 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, /* TODO: extra fields need to be added to support revisions above rev5 */ assert(f->rev == 5); -build_hdr: - build_header(linker, tbl, (void *)(tbl->data + fadt_start), - "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); +done: + acpi_table_end(linker, &table); } +#ifdef CONFIG_TPM /* * build_tpm2 - Build the TPM2 table as specified in * table 7: TCG Hardware Interface Description Table Format for TPM 2.0 @@ -2049,14 +2187,14 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog, const char *oem_id, const char *oem_table_id) { uint8_t start_method_params[12] = {}; - unsigned log_addr_offset, tpm2_start; + unsigned log_addr_offset; uint64_t control_area_start_address; TPMIf *tpmif = tpm_find(); uint32_t start_method; - void *tpm2_ptr; + AcpiTable table = { .sig = "TPM2", .rev = 4, + .oem_id = oem_id, .oem_table_id = oem_table_id }; - tpm2_start = table_data->len; - tpm2_ptr = acpi_data_push(table_data, sizeof(AcpiTableHeader)); + acpi_table_begin(&table, table_data); /* Platform Class */ build_append_int_noprefix(table_data, TPM2_ACPI_CLASS_CLIENT, 2); @@ -2094,10 +2232,9 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog, bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, log_addr_offset, 8, ACPI_BUILD_TPMLOG_FILE, 0); - build_header(linker, table_data, - tpm2_ptr, "TPM2", table_data->len - tpm2_start, 4, oem_id, - oem_table_id); + acpi_table_end(linker, &table); } +#endif Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set, uint32_t io_offset, uint32_t mmio32_offset, uint64_t mmio64_offset, diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index e2317be546b..b20903ea303 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -1,5 +1,4 @@ #include "qemu/osdep.h" -#include "hw/boards.h" #include "migration/vmstate.h" #include "hw/acpi/cpu.h" #include "qapi/error.h" @@ -670,21 +669,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, /* build _MAT object */ assert(adevc && adevc->madt_cpu); - adevc->madt_cpu(adev, i, arch_ids, madt_buf); - switch (madt_buf->data[0]) { - case ACPI_APIC_PROCESSOR: { - AcpiMadtProcessorApic *apic = (void *)madt_buf->data; - apic->flags = cpu_to_le32(1); - break; - } - case ACPI_APIC_LOCAL_X2APIC: { - AcpiMadtProcessorX2Apic *apic = (void *)madt_buf->data; - apic->flags = cpu_to_le32(1); - break; - } - default: - assert(0); - } + adevc->madt_cpu(adev, i, arch_ids, madt_buf, + true); /* set enabled flag */ aml_append(dev, aml_name_decl("_MAT", aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); g_array_free(madt_buf, true); diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 5454be67d5f..e28457a7d10 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "exec/address-spaces.h" #include "hw/acpi/acpi.h" #include "hw/acpi/generic_event_device.h" #include "hw/irq.h" @@ -208,7 +207,7 @@ static void ged_regs_write(void *opaque, hwaddr addr, uint64_t data, return; case ACPI_GED_REG_RESET: if (data == ACPI_GED_RESET_VALUE) { - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } return; } diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c new file mode 100644 index 00000000000..c315de1802d --- /dev/null +++ b/hw/acpi/ghes-stub.c @@ -0,0 +1,22 @@ +/* + * Support for generating APEI tables and recording CPER for Guests: + * stub functions. + * + * Copyright (c) 2021 Linaro, Ltd + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/acpi/ghes.h" + +int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) +{ + return -1; +} + +bool acpi_ghes_present(void) +{ + return false; +} diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index a4dac6bf15e..45d9a809cc9 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -362,18 +362,16 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) void acpi_build_hest(GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { - uint64_t hest_start = table_data->len; + AcpiTable table = { .sig = "HEST", .rev = 1, + .oem_id = oem_id, .oem_table_id = oem_table_id }; - /* Hardware Error Source Table header*/ - acpi_data_push(table_data, sizeof(AcpiTableHeader)); + acpi_table_begin(&table, table_data); /* Error Source Count */ build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4); - build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker); - build_header(linker, table_data, (void *)(table_data->data + hest_start), - "HEST", table_data->len - hest_start, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, @@ -386,6 +384,8 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, /* Create a read-write fw_cfg file for Address */ fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL, NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false); + + ags->present = true; } int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) @@ -443,3 +443,18 @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) return ret; } + +bool acpi_ghes_present(void) +{ + AcpiGedState *acpi_ged_state; + AcpiGhesState *ags; + + acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, + NULL)); + + if (!acpi_ged_state) { + return false; + } + ags = &acpi_ged_state->ghes_state; + return ags->present; +} diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c index edb3fd91b21..6913ebf7308 100644 --- a/hw/acpi/hmat.c +++ b/hw/acpi/hmat.c @@ -200,6 +200,8 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) HMAT_LB_Info *hmat_lb; NumaHmatCacheOptions *hmat_cache; + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + for (i = 0; i < numa_state->num_nodes; i++) { flags = 0; @@ -256,14 +258,10 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state, const char *oem_id, const char *oem_table_id) { - int hmat_start = table_data->len; - - /* reserve space for HMAT header */ - acpi_data_push(table_data, 40); + AcpiTable table = { .sig = "HMAT", .rev = 2, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + acpi_table_begin(&table, table_data); hmat_build_table_structs(table_data, numa_state); - - build_header(linker, table_data, - (void *)(table_data->data + hmat_start), - "HMAT", table_data->len - hmat_start, 2, oem_id, oem_table_id); + acpi_table_end(linker, &table); } diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 853447cf9d2..ebe08ed831f 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -35,7 +35,6 @@ #include "sysemu/runstate.h" #include "hw/acpi/acpi.h" #include "hw/acpi/tco.h" -#include "exec/address-spaces.h" #include "hw/i386/ich9.h" #include "hw/mem/pc-dimm.h" @@ -218,6 +217,26 @@ static const VMStateDescription vmstate_cpuhp_state = { } }; +static bool vmstate_test_use_pcihp(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + + return s->use_acpi_hotplug_bridge; +} + +static const VMStateDescription vmstate_pcihp_state = { + .name = "ich9_pm/pcihp", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmstate_test_use_pcihp, + .fields = (VMStateField[]) { + VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, + ICH9LPCPMRegs, + NULL, NULL), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ich9_pm = { .name = "ich9_pm", .version_id = 1, @@ -239,6 +258,7 @@ const VMStateDescription vmstate_ich9_pm = { &vmstate_memhp_state, &vmstate_tco_io_state, &vmstate_cpuhp_state, + &vmstate_pcihp_state, NULL } }; @@ -260,6 +280,10 @@ static void pm_reset(void *opaque) } pm->smi_en_wmask = ~0; + if (pm->use_acpi_hotplug_bridge) { + acpi_pcihp_reset(&pm->acpi_pci_hotplug, true); + } + acpi_update_sci(&pm->acpi_regs, pm->irq); } @@ -298,6 +322,18 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, pm->enable_tco = true; acpi_pm_tco_init(&pm->tco_regs, &pm->io); + if (pm->use_acpi_hotplug_bridge) { + acpi_pcihp_init(OBJECT(lpc_pci), + &pm->acpi_pci_hotplug, + pci_get_bus(lpc_pci), + pci_address_space_io(lpc_pci), + true, + ACPI_PCIHP_ADDR_ICH9); + + qbus_set_hotplug_handler(BUS(pci_get_bus(lpc_pci)), + OBJECT(lpc_pci)); + } + pm->irq = sci_irq; qemu_register_reset(pm_reset, pm); pm->powerdown_notifier.notify = pm_powerdown_req; @@ -369,6 +405,34 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) s->pm.enable_tco = value; } +static bool ich9_pm_get_acpi_pci_hotplug(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.use_acpi_hotplug_bridge; +} + +static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.use_acpi_hotplug_bridge = value; +} + +static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.keep_pci_slot_hpc; +} + +static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.keep_pci_slot_hpc = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; @@ -377,6 +441,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) pm->disable_s3 = 0; pm->disable_s4 = 0; pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, OBJ_PROP_FLAG_READ); @@ -400,6 +466,12 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) object_property_add_bool(obj, ACPI_PM_PROP_TCO_ENABLED, ich9_pm_get_enable_tco, ich9_pm_set_enable_tco); + object_property_add_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, + ich9_pm_get_acpi_pci_hotplug, + ich9_pm_set_acpi_pci_hotplug); + object_property_add_bool(obj, "x-keep-pci-slot-hpc", + ich9_pm_get_keep_pci_slot_hpc, + ich9_pm_set_keep_pci_slot_hpc); } void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -407,6 +479,11 @@ void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, { ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp); + return; + } + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && !lpc->pm.acpi_memory_hotplug.is_enabled) { error_setg(errp, @@ -442,6 +519,9 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } else { acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.cpuhp_state, dev, errp); } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_plug_cb(hotplug_dev, &lpc->pm.acpi_pci_hotplug, + dev, errp); } else { error_setg(errp, "acpi: device plug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -474,6 +554,10 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_cpu_unplug_request_cb(hotplug_dev, &lpc->pm.cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_request_cb(hotplug_dev, + &lpc->pm.acpi_pci_hotplug, + dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -491,6 +575,9 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && !lpc->pm.cpu_hotplug_legacy) { acpi_cpu_unplug_cb(&lpc->pm.cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(hotplug_dev, &lpc->pm.acpi_pci_hotplug, + dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index 0bdcf15528f..d0fffcf7870 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -2,13 +2,13 @@ #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/pc-hotplug.h" #include "hw/mem/pc-dimm.h" -#include "hw/boards.h" #include "hw/qdev-core.h" #include "migration/vmstate.h" #include "trace.h" #include "qapi/error.h" #include "qapi/qapi-events-acpi.h" #include "qapi/qapi-events-machine.h" +#include "qapi/qapi-events-qdev.h" #define MEMORY_SLOTS_NUMBER "MDNR" #define MEMORY_HOTPLUG_IO_REGION "HPMR" @@ -179,8 +179,16 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); if (local_err) { trace_mhp_acpi_pc_dimm_delete_failed(mem_st->selector); - qapi_event_send_mem_unplug_error(dev->id, + + /* + * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_GUEST_ERROR + * while the deprecation of MEM_UNPLUG_ERROR is + * pending. + */ + qapi_event_send_mem_unplug_error(dev->id ? : "", error_get_pretty(local_err)); + qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id, + dev->canonical_path); error_free(local_err); break; } diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index dd69577212a..adf6347bc42 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -3,23 +3,31 @@ acpi_ss.add(files( 'acpi_interface.c', 'aml-build.c', 'bios-linker-loader.c', + 'core.c', 'utils.c', )) -acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_true: files('cpu.c')) -acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_true: files('cpu_hotplug.c')) +acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_true: files('cpu.c', 'cpu_hotplug.c')) +acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_false: files('acpi-cpu-hotplug-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_true: files('memory_hotplug.c')) +acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_false: files('acpi-mem-hotplug-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_true: files('nvdimm.c')) +acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c')) acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c')) acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c')) acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c')) -acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c')) -acpi_ss.add(when: 'CONFIG_ACPI_X86', if_true: files('core.c', 'piix4.c', 'pcihp.c'), if_false: files('acpi-stub.c')) +acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false: files('ghes-stub.c')) +acpi_ss.add(when: 'CONFIG_ACPI_PIIX4', if_true: files('piix4.c')) +acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_true: files('pcihp.c')) +acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_false: files('acpi-pci-hotplug-stub.c')) +acpi_ss.add(when: 'CONFIG_ACPI_VIOT', if_true: files('viot.c')) acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c')) acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) acpi_ss.add(when: 'CONFIG_TPM', if_true: files('tpm.c')) -softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c')) +softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c')) softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss) softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c', - 'acpi-x86-stub.c', 'ipmi-stub.c')) + 'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c', + 'acpi-mem-hotplug-stub.c', 'acpi-cpu-hotplug-stub.c', + 'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c')) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index e3d5fe19392..0d43da19ea4 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -44,22 +44,6 @@ static const uint8_t nvdimm_nfit_spa_uuid[] = UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33, 0x18, 0xb7, 0x8c, 0xdb); -/* - * NVDIMM Firmware Interface Table - * @signature: "NFIT" - * - * It provides information that allows OSPM to enumerate NVDIMM present in - * the platform and associate system physical address ranges created by the - * NVDIMMs. - * - * It is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT) - */ -struct NvdimmNfitHeader { - ACPI_TABLE_HEADER_DEF - uint32_t reserved; -} QEMU_PACKED; -typedef struct NvdimmNfitHeader NvdimmNfitHeader; - /* * define NFIT structures according to ACPI 6.0: 5.2.25 NVDIMM Firmware * Interface Table (NFIT). @@ -355,10 +339,10 @@ nvdimm_build_structure_caps(GArray *structures, uint32_t capabilities) static GArray *nvdimm_build_device_structure(NVDIMMState *state) { - GSList *device_list = nvdimm_get_device_list(); + GSList *device_list, *list = nvdimm_get_device_list(); GArray *structures = g_array_new(false, true /* clear */, 1); - for (; device_list; device_list = device_list->next) { + for (device_list = list; device_list; device_list = device_list->next) { DeviceState *dev = device_list->data; /* build System Physical Address Range Structure. */ @@ -373,7 +357,7 @@ static GArray *nvdimm_build_device_structure(NVDIMMState *state) /* build NVDIMM Control Region Structure. */ nvdimm_build_structure_dcr(structures, dev); } - g_slist_free(device_list); + g_slist_free(list); if (state->persistence) { nvdimm_build_structure_caps(structures, state->persistence); @@ -401,25 +385,33 @@ void nvdimm_plug(NVDIMMState *state) nvdimm_build_fit_buffer(state); } +/* + * NVDIMM Firmware Interface Table + * @signature: "NFIT" + * + * It provides information that allows OSPM to enumerate NVDIMM present in + * the platform and associate system physical address ranges created by the + * NVDIMMs. + * + * It is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT) + */ + static void nvdimm_build_nfit(NVDIMMState *state, GArray *table_offsets, GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { NvdimmFitBuffer *fit_buf = &state->fit_buf; - unsigned int header; + AcpiTable table = { .sig = "NFIT", .rev = 1, + .oem_id = oem_id, .oem_table_id = oem_table_id }; acpi_add_table(table_offsets, table_data); - /* NFIT header. */ - header = table_data->len; - acpi_data_push(table_data, sizeof(NvdimmNfitHeader)); + acpi_table_begin(&table, table_data); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); /* NVDIMM device structures. */ g_array_append_vals(table_data, fit_buf->fit->data, fit_buf->fit->len); - - build_header(linker, table_data, - (void *)(table_data->data + header), "NFIT", - sizeof(NvdimmNfitHeader) + fit_buf->fit->len, 1, oem_id, - oem_table_id); + acpi_table_end(linker, &table); } #define NVDIMM_DSM_MEMORY_SIZE 4096 @@ -1282,14 +1274,15 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, NVDIMMState *nvdimm_state, uint32_t ram_slots, const char *oem_id) { + int mem_addr_offset; Aml *ssdt, *sb_scope, *dev; - int mem_addr_offset, nvdimm_ssdt; + AcpiTable table = { .sig = "SSDT", .rev = 1, + .oem_id = oem_id, .oem_table_id = "NVDIMM" }; acpi_add_table(table_offsets, table_data); + acpi_table_begin(&table, table_data); ssdt = init_aml_allocator(); - acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); - sb_scope = aml_scope("\\_SB"); dev = aml_device("NVDR"); @@ -1318,8 +1311,6 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, aml_append(sb_scope, dev); aml_append(ssdt, sb_scope); - nvdimm_ssdt = table_data->len; - /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); mem_addr_offset = build_append_named_dword(table_data, @@ -1331,18 +1322,20 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, mem_addr_offset, sizeof(uint32_t), NVDIMM_DSM_MEM_FILE, 0); - build_header(linker, table_data, - (void *)(table_data->data + nvdimm_ssdt), - "SSDT", table_data->len - nvdimm_ssdt, 1, oem_id, "NVDIMM"); free_aml_allocator(); + /* + * must be executed as the last so that pointer patching command above + * would be executed by guest before it recalculated checksum which were + * scheduled by acpi_table_end() + */ + acpi_table_end(linker, &table); } void nvdimm_build_srat(GArray *table_data) { - GSList *device_list = nvdimm_get_device_list(); + GSList *device_list, *list = nvdimm_get_device_list(); - for (; device_list; device_list = device_list->next) { - AcpiSratMemoryAffinity *numamem = NULL; + for (device_list = list; device_list; device_list = device_list->next) { DeviceState *dev = device_list->data; Object *obj = OBJECT(dev); uint64_t addr, size; @@ -1352,11 +1345,10 @@ void nvdimm_build_srat(GArray *table_data) addr = object_property_get_uint(obj, PC_DIMM_ADDR_PROP, &error_abort); size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, &error_abort); - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, addr, size, node, + build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED | MEM_AFFINITY_NON_VOLATILE); } - g_slist_free(device_list); + g_slist_free(list); } void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index 75b1103ec45..20b70dcd815 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -28,19 +28,20 @@ #include "hw/acpi/pci.h" #include "hw/pci/pcie_host.h" +/* + * PCI Firmware Specification, Revision 3.0 + * 4.1.2 MCFG Table Description. + */ void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info, const char *oem_id, const char *oem_table_id) { - int mcfg_start = table_data->len; + AcpiTable table = { .sig = "MCFG", .rev = 1, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); - /* - * PCI Firmware Specification, Revision 3.0 - * 4.1.2 MCFG Table Description. - */ - acpi_data_push(table_data, sizeof(AcpiTableHeader)); /* Reserved */ build_append_int_noprefix(table_data, 0, 8); - /* * Memory Mapped Enhanced Configuration Space Base Address Allocation * Structure @@ -56,6 +57,5 @@ void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info, /* Reserved */ build_append_int_noprefix(table_data, 0, 4); - build_header(linker, table_data, (void *)(table_data->data + mcfg_start), - "MCFG", table_data->len - mcfg_start, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index f4cb3c979d0..30405b5113d 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -30,15 +30,16 @@ #include "hw/pci-host/i440fx.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/i386/acpi-build.h" #include "hw/acpi/acpi.h" -#include "exec/address-spaces.h" #include "hw/pci/pci_bus.h" #include "migration/vmstate.h" #include "qapi/error.h" #include "qom/qom-qobject.h" #include "trace.h" -#define ACPI_PCIHP_ADDR 0xae00 #define ACPI_PCIHP_SIZE 0x0018 #define PCI_UP_BASE 0x0000 #define PCI_DOWN_BASE 0x0004 @@ -105,6 +106,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque) static void acpi_set_pci_info(void) { static bool bsel_is_set; + Object *host = acpi_get_i386_pci_host(); PCIBus *bus; unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT; @@ -113,7 +115,11 @@ static void acpi_set_pci_info(void) } bsel_is_set = true; - bus = find_i440fx(); /* TODO: Q35 support */ + if (!host) { + return; + } + + bus = PCI_HOST_BRIDGE(host)->bus; if (bus) { /* Scan all PCI buses. Set property to enable acpi based hotplug. */ pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &bsel_alloc); @@ -123,13 +129,14 @@ static void acpi_set_pci_info(void) static void acpi_pcihp_disable_root_bus(void) { static bool root_hp_disabled; + Object *host = acpi_get_i386_pci_host(); PCIBus *bus; if (root_hp_disabled) { return; } - bus = find_i440fx(); + bus = PCI_HOST_BRIDGE(host)->bus; if (bus) { /* setting the hotplug handler to NULL makes the bus non-hotpluggable */ qbus_set_hotplug_handler(BUS(bus), NULL); @@ -215,9 +222,27 @@ static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slo PCIDevice *dev = PCI_DEVICE(qdev); if (PCI_SLOT(dev->devfn) == slot) { if (!acpi_pcihp_pc_no_hotplug(s, dev)) { - hotplug_ctrl = qdev_get_hotplug_handler(qdev); - hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort); - object_unparent(OBJECT(qdev)); + /* + * partially_hotplugged is used by virtio-net failover: + * failover has asked the guest OS to unplug the device + * but we need to keep some references to the device + * to be able to plug it back in case of failure so + * we don't execute hotplug_handler_unplug(). + */ + if (dev->partially_hotplugged) { + /* + * pending_deleted_event is set to true when + * virtio-net failover asks to unplug the device, + * and set to false here when the operation is done + * This is used by the migration loop to detect the + * end of the operation and really start the migration. + */ + qdev->pending_deleted_event = false; + } else { + hotplug_ctrl = qdev_get_hotplug_handler(qdev); + hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort); + object_unparent(OBJECT(qdev)); + } } } } @@ -276,7 +301,7 @@ void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, /* Only hotplugged devices need the hotplug capability. */ if (dev->hotplugged && - acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))) < 0) { + acpi_pcihp_get_bsel(pci_get_bus(pdev)) < 0) { error_setg(errp, "Unsupported bus. Bus doesn't have property '" ACPI_PCIHP_PROP_BSEL "' set"); return; @@ -330,6 +355,13 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + /* Remove all hot-plug handlers if hot-plug is disabled on slot */ + if (object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT) && + !PCIE_SLOT(pdev)->hotplug) { + qbus_set_hotplug_handler(BUS(sec), NULL); + return; + } + qbus_set_hotplug_handler(BUS(sec), OBJECT(hotplug_dev)); /* We don't have to overwrite any other hotplug handler yet */ assert(QLIST_EMPTY(&sec->child)); @@ -349,8 +381,8 @@ void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, { PCIDevice *pdev = PCI_DEVICE(dev); - trace_acpi_pci_unplug(PCI_SLOT(PCI_DEVICE(dev)->devfn), - acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev)))); + trace_acpi_pci_unplug(PCI_SLOT(pdev->devfn), + acpi_pcihp_get_bsel(pci_get_bus(pdev))); /* * clean up acpi-index so it could reused by another device @@ -382,6 +414,12 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + /* + * pending_deleted_event is used by virtio-net failover to detect the + * end of the unplug operation, the flag is set to false in + * acpi_pcihp_eject_slot() when the operation is completed. + */ + pdev->qdev.pending_deleted_event = true; s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } @@ -489,10 +527,11 @@ static const MemoryRegionOps acpi_pcihp_io_ops = { }; void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, - MemoryRegion *address_space_io, bool bridges_enabled) + MemoryRegion *address_space_io, bool bridges_enabled, + uint16_t io_base) { s->io_len = ACPI_PCIHP_SIZE; - s->io_base = ACPI_PCIHP_ADDR; + s->io_base = io_base; s->root = root_bus; s->legacy_piix = !bridges_enabled; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 8f8b0e95e52..f0b5fac44a1 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -33,7 +33,6 @@ #include "sysemu/xen.h" #include "qapi/error.h" #include "qemu/range.h" -#include "exec/address-spaces.h" #include "hw/acpi/pcihp.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/acpi/cpu.h" @@ -50,6 +49,8 @@ #define GPE_BASE 0xafe0 #define GPE_LEN 4 +#define ACPI_PCIHP_ADDR_PIIX4 0xae00 + struct pci_status { uint32_t up; /* deprecated, maintained for migration compatibility */ uint32_t down; @@ -608,7 +609,7 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) { acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent, - s->use_acpi_hotplug_bridge); + s->use_acpi_hotplug_bridge, ACPI_PCIHP_ADDR_PIIX4); } s->cpu_hotplug_legacy = true; @@ -646,9 +647,9 @@ static Property piix4_pm_properties[] = { DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, use_acpi_hotplug_bridge, true), - DEFINE_PROP_BOOL("acpi-root-pci-hotplug", PIIX4PMState, + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState, use_acpi_root_pci_hotplug, true), DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState, acpi_memory_hotplug.is_enabled, true), diff --git a/hw/acpi/tpm.c b/hw/acpi/tpm.c index b96459e45be..cdc02275365 100644 --- a/hw/acpi/tpm.c +++ b/hw/acpi/tpm.c @@ -57,7 +57,7 @@ void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev) aml_operation_region( "TPP3", AML_SYSTEM_MEMORY, aml_int(TPM_PPI_ADDR_BASE + - 0x15a /* movv, docs/specs/tpm.txt */), + 0x15a /* movv, docs/specs/tpm.rst */), 0x1)); field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); aml_append(field, aml_named_field("MOVV", 8)); diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index dcc1438f3a0..974d770e8b4 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # memory_hotplug.c mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 diff --git a/hw/acpi/viot.c b/hw/acpi/viot.c new file mode 100644 index 00000000000..c1af75206e5 --- /dev/null +++ b/hw/acpi/viot.c @@ -0,0 +1,114 @@ +/* + * ACPI Virtual I/O Translation table implementation + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/viot.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" + +struct viot_pci_ranges { + GArray *blob; + size_t count; + uint16_t output_node; +}; + +/* Build PCI range for a given PCI host bridge */ +static int build_pci_range_node(Object *obj, void *opaque) +{ + struct viot_pci_ranges *pci_ranges = opaque; + GArray *blob = pci_ranges->blob; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + int min_bus, max_bus; + + pci_bus_range(bus, &min_bus, &max_bus); + + /* Type */ + build_append_int_noprefix(blob, 1 /* PCI range */, 1); + /* Reserved */ + build_append_int_noprefix(blob, 0, 1); + /* Length */ + build_append_int_noprefix(blob, 24, 2); + /* Endpoint start */ + build_append_int_noprefix(blob, PCI_BUILD_BDF(min_bus, 0), 4); + /* PCI Segment start */ + build_append_int_noprefix(blob, 0, 2); + /* PCI Segment end */ + build_append_int_noprefix(blob, 0, 2); + /* PCI BDF start */ + build_append_int_noprefix(blob, PCI_BUILD_BDF(min_bus, 0), 2); + /* PCI BDF end */ + build_append_int_noprefix(blob, PCI_BUILD_BDF(max_bus, 0xff), 2); + /* Output node */ + build_append_int_noprefix(blob, pci_ranges->output_node, 2); + /* Reserved */ + build_append_int_noprefix(blob, 0, 6); + + pci_ranges->count++; + } + } + + return 0; +} + +/* + * Generate a VIOT table with one PCI-based virtio-iommu that manages PCI + * endpoints. + * + * Defined in the ACPI Specification (Version TBD) + */ +void build_viot(MachineState *ms, GArray *table_data, BIOSLinker *linker, + uint16_t virtio_iommu_bdf, const char *oem_id, + const char *oem_table_id) +{ + /* The virtio-iommu node follows the 48-bytes header */ + int viommu_off = 48; + AcpiTable table = { .sig = "VIOT", .rev = 0, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + struct viot_pci_ranges pci_ranges = { + .output_node = viommu_off, + .blob = g_array_new(false, true /* clear */, 1), + }; + + /* Build the list of PCI ranges that this viommu manages */ + object_child_foreach_recursive(OBJECT(ms), build_pci_range_node, + &pci_ranges); + + /* ACPI table header */ + acpi_table_begin(&table, table_data); + /* Node count */ + build_append_int_noprefix(table_data, pci_ranges.count + 1, 2); + /* Node offset */ + build_append_int_noprefix(table_data, viommu_off, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 8); + + /* Virtio-iommu node */ + /* Type */ + build_append_int_noprefix(table_data, 3 /* virtio-pci IOMMU */, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + /* Length */ + build_append_int_noprefix(table_data, 16, 2); + /* PCI Segment */ + build_append_int_noprefix(table_data, 0, 2); + /* PCI BDF number */ + build_append_int_noprefix(table_data, virtio_iommu_bdf, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 8); + + /* PCI ranges found above */ + g_array_append_vals(table_data, pci_ranges.blob->data, + pci_ranges.blob->len); + g_array_free(pci_ranges.blob, true); + + acpi_table_end(linker, &table); +} + diff --git a/hw/acpi/viot.h b/hw/acpi/viot.h new file mode 100644 index 00000000000..9fe565bb878 --- /dev/null +++ b/hw/acpi/viot.h @@ -0,0 +1,13 @@ +/* + * ACPI Virtual I/O Translation Table implementation + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef VIOT_H +#define VIOT_H + +void build_viot(MachineState *ms, GArray *table_data, BIOSLinker *linker, + uint16_t virtio_iommu_bdf, const char *oem_id, + const char *oem_table_id); + +#endif /* VIOT_H */ diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c index 4f41a13ea05..0c9f158ac9e 100644 --- a/hw/acpi/vmgenid.c +++ b/hw/acpi/vmgenid.c @@ -29,6 +29,8 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx; uint32_t vgia_offset; QemuUUID guid_le; + AcpiTable table = { .sig = "SSDT", .rev = 1, + .oem_id = oem_id, .oem_table_id = "VMGENID" }; /* Fill in the GUID values. These need to be converted to little-endian * first, since that's what the guest expects @@ -42,12 +44,10 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data, ARRAY_SIZE(guid_le.data)); - /* Put this in a separate SSDT table */ + /* Put VMGNEID into a separate SSDT table */ + acpi_table_begin(&table, table_data); ssdt = init_aml_allocator(); - /* Reserve space for header */ - acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); - /* Storage for the GUID address */ vgia_offset = table_data->len + build_append_named_dword(ssdt->buf, "VGIA"); @@ -116,9 +116,8 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t), VMGENID_GUID_FW_CFG_FILE, 0); - build_header(linker, table_data, - (void *)(table_data->data + table_data->len - ssdt->buf->len), - "SSDT", ssdt->buf->len, 1, oem_id, "VMGENID"); + /* must be called after above command to ensure correct table checksum */ + acpi_table_end(linker, &table); free_aml_allocator(); } diff --git a/hw/adc/Kconfig b/hw/adc/Kconfig index 25d2229fb83..a825bd3d343 100644 --- a/hw/adc/Kconfig +++ b/hw/adc/Kconfig @@ -1,2 +1,5 @@ config STM32F2XX_ADC bool + +config MAX111X + bool diff --git a/hw/adc/aspeed_adc.c b/hw/adc/aspeed_adc.c new file mode 100644 index 00000000000..c5fcae29f63 --- /dev/null +++ b/hw/adc/aspeed_adc.c @@ -0,0 +1,427 @@ +/* + * Aspeed ADC + * + * Copyright 2017-2021 IBM Corp. + * + * Andrew Jeffery + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "hw/adc/aspeed_adc.h" +#include "trace.h" + +#define ASPEED_ADC_MEMORY_REGION_SIZE 0x1000 +#define ASPEED_ADC_ENGINE_MEMORY_REGION_SIZE 0x100 +#define ASPEED_ADC_ENGINE_CH_EN_MASK 0xffff0000 +#define ASPEED_ADC_ENGINE_CH_EN(x) ((BIT(x)) << 16) +#define ASPEED_ADC_ENGINE_INIT BIT(8) +#define ASPEED_ADC_ENGINE_AUTO_COMP BIT(5) +#define ASPEED_ADC_ENGINE_COMP BIT(4) +#define ASPEED_ADC_ENGINE_MODE_MASK 0x0000000e +#define ASPEED_ADC_ENGINE_MODE_OFF (0b000 << 1) +#define ASPEED_ADC_ENGINE_MODE_STANDBY (0b001 << 1) +#define ASPEED_ADC_ENGINE_MODE_NORMAL (0b111 << 1) +#define ASPEED_ADC_ENGINE_EN BIT(0) +#define ASPEED_ADC_HYST_EN BIT(31) + +#define ASPEED_ADC_L_MASK ((1 << 10) - 1) +#define ASPEED_ADC_L(x) ((x) & ASPEED_ADC_L_MASK) +#define ASPEED_ADC_H(x) (((x) >> 16) & ASPEED_ADC_L_MASK) +#define ASPEED_ADC_LH_MASK (ASPEED_ADC_L_MASK << 16 | ASPEED_ADC_L_MASK) +#define LOWER_CHANNEL_MASK ((1 << 10) - 1) +#define LOWER_CHANNEL_DATA(x) ((x) & LOWER_CHANNEL_MASK) +#define UPPER_CHANNEL_DATA(x) (((x) >> 16) & LOWER_CHANNEL_MASK) + +#define TO_REG(addr) (addr >> 2) + +#define ENGINE_CONTROL TO_REG(0x00) +#define INTERRUPT_CONTROL TO_REG(0x04) +#define VGA_DETECT_CONTROL TO_REG(0x08) +#define CLOCK_CONTROL TO_REG(0x0C) +#define DATA_CHANNEL_1_AND_0 TO_REG(0x10) +#define DATA_CHANNEL_7_AND_6 TO_REG(0x1C) +#define DATA_CHANNEL_9_AND_8 TO_REG(0x20) +#define DATA_CHANNEL_15_AND_14 TO_REG(0x2C) +#define BOUNDS_CHANNEL_0 TO_REG(0x30) +#define BOUNDS_CHANNEL_7 TO_REG(0x4C) +#define BOUNDS_CHANNEL_8 TO_REG(0x50) +#define BOUNDS_CHANNEL_15 TO_REG(0x6C) +#define HYSTERESIS_CHANNEL_0 TO_REG(0x70) +#define HYSTERESIS_CHANNEL_7 TO_REG(0x8C) +#define HYSTERESIS_CHANNEL_8 TO_REG(0x90) +#define HYSTERESIS_CHANNEL_15 TO_REG(0xAC) +#define INTERRUPT_SOURCE TO_REG(0xC0) +#define COMPENSATING_AND_TRIMMING TO_REG(0xC4) + +static inline uint32_t update_channels(uint32_t current) +{ + return ((((current >> 16) & ASPEED_ADC_L_MASK) + 7) << 16) | + ((current + 5) & ASPEED_ADC_L_MASK); +} + +static bool breaks_threshold(AspeedADCEngineState *s, int reg) +{ + assert(reg >= DATA_CHANNEL_1_AND_0 && + reg < DATA_CHANNEL_1_AND_0 + s->nr_channels / 2); + + int a_bounds_reg = BOUNDS_CHANNEL_0 + (reg - DATA_CHANNEL_1_AND_0) * 2; + int b_bounds_reg = a_bounds_reg + 1; + uint32_t a_and_b = s->regs[reg]; + uint32_t a_bounds = s->regs[a_bounds_reg]; + uint32_t b_bounds = s->regs[b_bounds_reg]; + uint32_t a = ASPEED_ADC_L(a_and_b); + uint32_t b = ASPEED_ADC_H(a_and_b); + uint32_t a_lower = ASPEED_ADC_L(a_bounds); + uint32_t a_upper = ASPEED_ADC_H(a_bounds); + uint32_t b_lower = ASPEED_ADC_L(b_bounds); + uint32_t b_upper = ASPEED_ADC_H(b_bounds); + + return (a < a_lower || a > a_upper) || + (b < b_lower || b > b_upper); +} + +static uint32_t read_channel_sample(AspeedADCEngineState *s, int reg) +{ + assert(reg >= DATA_CHANNEL_1_AND_0 && + reg < DATA_CHANNEL_1_AND_0 + s->nr_channels / 2); + + /* Poor man's sampling */ + uint32_t value = s->regs[reg]; + s->regs[reg] = update_channels(s->regs[reg]); + + if (breaks_threshold(s, reg)) { + s->regs[INTERRUPT_CONTROL] |= BIT(reg - DATA_CHANNEL_1_AND_0); + qemu_irq_raise(s->irq); + } + + return value; +} + +static uint64_t aspeed_adc_engine_read(void *opaque, hwaddr addr, + unsigned int size) +{ + AspeedADCEngineState *s = ASPEED_ADC_ENGINE(opaque); + int reg = TO_REG(addr); + uint32_t value = 0; + + switch (reg) { + case BOUNDS_CHANNEL_8 ... BOUNDS_CHANNEL_15: + if (s->nr_channels <= 8) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: " + "bounds register %u invalid, only 0...7 valid\n", + __func__, s->engine_id, reg - BOUNDS_CHANNEL_0); + break; + } + /* fallthrough */ + case HYSTERESIS_CHANNEL_8 ... HYSTERESIS_CHANNEL_15: + if (s->nr_channels <= 8) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: " + "hysteresis register %u invalid, only 0...7 valid\n", + __func__, s->engine_id, reg - HYSTERESIS_CHANNEL_0); + break; + } + /* fallthrough */ + case BOUNDS_CHANNEL_0 ... BOUNDS_CHANNEL_7: + case HYSTERESIS_CHANNEL_0 ... HYSTERESIS_CHANNEL_7: + case ENGINE_CONTROL: + case INTERRUPT_CONTROL: + case VGA_DETECT_CONTROL: + case CLOCK_CONTROL: + case INTERRUPT_SOURCE: + case COMPENSATING_AND_TRIMMING: + value = s->regs[reg]; + break; + case DATA_CHANNEL_9_AND_8 ... DATA_CHANNEL_15_AND_14: + if (s->nr_channels <= 8) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: " + "data register %u invalid, only 0...3 valid\n", + __func__, s->engine_id, reg - DATA_CHANNEL_1_AND_0); + break; + } + /* fallthrough */ + case DATA_CHANNEL_1_AND_0 ... DATA_CHANNEL_7_AND_6: + value = read_channel_sample(s, reg); + /* Allow 16-bit reads of the data registers */ + if (addr & 0x2) { + assert(size == 2); + value >>= 16; + } + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: engine[%u]: 0x%" HWADDR_PRIx "\n", + __func__, s->engine_id, addr); + break; + } + + trace_aspeed_adc_engine_read(s->engine_id, addr, value); + return value; +} + +static void aspeed_adc_engine_write(void *opaque, hwaddr addr, uint64_t value, + unsigned int size) +{ + AspeedADCEngineState *s = ASPEED_ADC_ENGINE(opaque); + int reg = TO_REG(addr); + uint32_t init = 0; + + trace_aspeed_adc_engine_write(s->engine_id, addr, value); + + switch (reg) { + case ENGINE_CONTROL: + init = !!(value & ASPEED_ADC_ENGINE_EN); + init *= ASPEED_ADC_ENGINE_INIT; + + value &= ~ASPEED_ADC_ENGINE_INIT; + value |= init; + + value &= ~ASPEED_ADC_ENGINE_AUTO_COMP; + break; + case INTERRUPT_CONTROL: + case VGA_DETECT_CONTROL: + case CLOCK_CONTROL: + break; + case DATA_CHANNEL_9_AND_8 ... DATA_CHANNEL_15_AND_14: + if (s->nr_channels <= 8) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: " + "data register %u invalid, only 0...3 valid\n", + __func__, s->engine_id, reg - DATA_CHANNEL_1_AND_0); + return; + } + /* fallthrough */ + case BOUNDS_CHANNEL_8 ... BOUNDS_CHANNEL_15: + if (s->nr_channels <= 8) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: " + "bounds register %u invalid, only 0...7 valid\n", + __func__, s->engine_id, reg - BOUNDS_CHANNEL_0); + return; + } + /* fallthrough */ + case DATA_CHANNEL_1_AND_0 ... DATA_CHANNEL_7_AND_6: + case BOUNDS_CHANNEL_0 ... BOUNDS_CHANNEL_7: + value &= ASPEED_ADC_LH_MASK; + break; + case HYSTERESIS_CHANNEL_8 ... HYSTERESIS_CHANNEL_15: + if (s->nr_channels <= 8) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: " + "hysteresis register %u invalid, only 0...7 valid\n", + __func__, s->engine_id, reg - HYSTERESIS_CHANNEL_0); + return; + } + /* fallthrough */ + case HYSTERESIS_CHANNEL_0 ... HYSTERESIS_CHANNEL_7: + value &= (ASPEED_ADC_HYST_EN | ASPEED_ADC_LH_MASK); + break; + case INTERRUPT_SOURCE: + value &= 0xffff; + break; + case COMPENSATING_AND_TRIMMING: + value &= 0xf; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: engine[%u]: " + "0x%" HWADDR_PRIx " 0x%" PRIx64 "\n", + __func__, s->engine_id, addr, value); + break; + } + + s->regs[reg] = value; +} + +static const MemoryRegionOps aspeed_adc_engine_ops = { + .read = aspeed_adc_engine_read, + .write = aspeed_adc_engine_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 2, + .max_access_size = 4, + .unaligned = false, + }, +}; + +static const uint32_t aspeed_adc_resets[ASPEED_ADC_NR_REGS] = { + [ENGINE_CONTROL] = 0x00000000, + [INTERRUPT_CONTROL] = 0x00000000, + [VGA_DETECT_CONTROL] = 0x0000000f, + [CLOCK_CONTROL] = 0x0000000f, +}; + +static void aspeed_adc_engine_reset(DeviceState *dev) +{ + AspeedADCEngineState *s = ASPEED_ADC_ENGINE(dev); + + memcpy(s->regs, aspeed_adc_resets, sizeof(aspeed_adc_resets)); +} + +static void aspeed_adc_engine_realize(DeviceState *dev, Error **errp) +{ + AspeedADCEngineState *s = ASPEED_ADC_ENGINE(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + g_autofree char *name = g_strdup_printf(TYPE_ASPEED_ADC_ENGINE ".%d", + s->engine_id); + + assert(s->engine_id < 2); + + sysbus_init_irq(sbd, &s->irq); + + memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_adc_engine_ops, s, name, + ASPEED_ADC_ENGINE_MEMORY_REGION_SIZE); + + sysbus_init_mmio(sbd, &s->mmio); +} + +static const VMStateDescription vmstate_aspeed_adc_engine = { + .name = TYPE_ASPEED_ADC, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, AspeedADCEngineState, ASPEED_ADC_NR_REGS), + VMSTATE_END_OF_LIST(), + } +}; + +static Property aspeed_adc_engine_properties[] = { + DEFINE_PROP_UINT32("engine-id", AspeedADCEngineState, engine_id, 0), + DEFINE_PROP_UINT32("nr-channels", AspeedADCEngineState, nr_channels, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void aspeed_adc_engine_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = aspeed_adc_engine_realize; + dc->reset = aspeed_adc_engine_reset; + device_class_set_props(dc, aspeed_adc_engine_properties); + dc->desc = "Aspeed Analog-to-Digital Engine"; + dc->vmsd = &vmstate_aspeed_adc_engine; +} + +static const TypeInfo aspeed_adc_engine_info = { + .name = TYPE_ASPEED_ADC_ENGINE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AspeedADCEngineState), + .class_init = aspeed_adc_engine_class_init, +}; + +static void aspeed_adc_instance_init(Object *obj) +{ + AspeedADCState *s = ASPEED_ADC(obj); + AspeedADCClass *aac = ASPEED_ADC_GET_CLASS(obj); + uint32_t nr_channels = ASPEED_ADC_NR_CHANNELS / aac->nr_engines; + + for (int i = 0; i < aac->nr_engines; i++) { + AspeedADCEngineState *engine = &s->engines[i]; + object_initialize_child(obj, "engine[*]", engine, + TYPE_ASPEED_ADC_ENGINE); + qdev_prop_set_uint32(DEVICE(engine), "engine-id", i); + qdev_prop_set_uint32(DEVICE(engine), "nr-channels", nr_channels); + } +} + +static void aspeed_adc_set_irq(void *opaque, int n, int level) +{ + AspeedADCState *s = opaque; + AspeedADCClass *aac = ASPEED_ADC_GET_CLASS(s); + uint32_t pending = 0; + + /* TODO: update Global IRQ status register on AST2600 (Need specs) */ + for (int i = 0; i < aac->nr_engines; i++) { + uint32_t irq_status = s->engines[i].regs[INTERRUPT_CONTROL] & 0xFF; + pending |= irq_status << (i * 8); + } + + qemu_set_irq(s->irq, !!pending); +} + +static void aspeed_adc_realize(DeviceState *dev, Error **errp) +{ + AspeedADCState *s = ASPEED_ADC(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + AspeedADCClass *aac = ASPEED_ADC_GET_CLASS(dev); + + qdev_init_gpio_in_named_with_opaque(DEVICE(sbd), aspeed_adc_set_irq, + s, NULL, aac->nr_engines); + + sysbus_init_irq(sbd, &s->irq); + + memory_region_init(&s->mmio, OBJECT(s), TYPE_ASPEED_ADC, + ASPEED_ADC_MEMORY_REGION_SIZE); + + sysbus_init_mmio(sbd, &s->mmio); + + for (int i = 0; i < aac->nr_engines; i++) { + Object *eng = OBJECT(&s->engines[i]); + + if (!sysbus_realize(SYS_BUS_DEVICE(eng), errp)) { + return; + } + sysbus_connect_irq(SYS_BUS_DEVICE(eng), 0, + qdev_get_gpio_in(DEVICE(sbd), i)); + memory_region_add_subregion(&s->mmio, + i * ASPEED_ADC_ENGINE_MEMORY_REGION_SIZE, + &s->engines[i].mmio); + } +} + +static void aspeed_adc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedADCClass *aac = ASPEED_ADC_CLASS(klass); + + dc->realize = aspeed_adc_realize; + dc->desc = "Aspeed Analog-to-Digital Converter"; + aac->nr_engines = 1; +} + +static void aspeed_2600_adc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedADCClass *aac = ASPEED_ADC_CLASS(klass); + + dc->desc = "ASPEED 2600 ADC Controller"; + aac->nr_engines = 2; +} + +static const TypeInfo aspeed_adc_info = { + .name = TYPE_ASPEED_ADC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = aspeed_adc_instance_init, + .instance_size = sizeof(AspeedADCState), + .class_init = aspeed_adc_class_init, + .class_size = sizeof(AspeedADCClass), + .abstract = true, +}; + +static const TypeInfo aspeed_2400_adc_info = { + .name = TYPE_ASPEED_2400_ADC, + .parent = TYPE_ASPEED_ADC, +}; + +static const TypeInfo aspeed_2500_adc_info = { + .name = TYPE_ASPEED_2500_ADC, + .parent = TYPE_ASPEED_ADC, +}; + +static const TypeInfo aspeed_2600_adc_info = { + .name = TYPE_ASPEED_2600_ADC, + .parent = TYPE_ASPEED_ADC, + .class_init = aspeed_2600_adc_class_init, +}; + +static void aspeed_adc_register_types(void) +{ + type_register_static(&aspeed_adc_engine_info); + type_register_static(&aspeed_adc_info); + type_register_static(&aspeed_2400_adc_info); + type_register_static(&aspeed_2500_adc_info); + type_register_static(&aspeed_2600_adc_info); +} + +type_init(aspeed_adc_register_types); diff --git a/hw/misc/max111x.c b/hw/adc/max111x.c similarity index 99% rename from hw/misc/max111x.c rename to hw/adc/max111x.c index 1b3234a5196..e8bf4cccd44 100644 --- a/hw/misc/max111x.c +++ b/hw/adc/max111x.c @@ -11,7 +11,7 @@ */ #include "qemu/osdep.h" -#include "hw/misc/max111x.h" +#include "hw/adc/max111x.h" #include "hw/irq.h" #include "migration/vmstate.h" #include "qemu/module.h" diff --git a/hw/adc/meson.build b/hw/adc/meson.build index 6ddee238139..b29ac7ccdf5 100644 --- a/hw/adc/meson.build +++ b/hw/adc/meson.build @@ -1,2 +1,5 @@ softmmu_ss.add(when: 'CONFIG_STM32F2XX_ADC', if_true: files('stm32f2xx_adc.c')) +softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_adc.c')) softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_adc.c')) +softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq-xadc.c')) +softmmu_ss.add(when: 'CONFIG_MAX111X', if_true: files('max111x.c')) diff --git a/hw/adc/trace-events b/hw/adc/trace-events index 4c3279ece2c..5a4c444d773 100644 --- a/hw/adc/trace-events +++ b/hw/adc/trace-events @@ -1,5 +1,8 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # npcm7xx_adc.c npcm7xx_adc_read(const char *id, uint64_t offset, uint32_t value) " %s offset: 0x%04" PRIx64 " value 0x%04" PRIx32 npcm7xx_adc_write(const char *id, uint64_t offset, uint32_t value) "%s offset: 0x%04" PRIx64 " value 0x%04" PRIx32 + +aspeed_adc_engine_read(uint32_t engine_id, uint64_t addr, uint64_t value) "engine[%u] 0x%" PRIx64 " 0x%" PRIx64 +aspeed_adc_engine_write(uint32_t engine_id, uint64_t addr, uint64_t value) "engine[%u] 0x%" PRIx64 " 0x%" PRIx64 diff --git a/hw/misc/zynq-xadc.c b/hw/adc/zynq-xadc.c similarity index 99% rename from hw/misc/zynq-xadc.c rename to hw/adc/zynq-xadc.c index 7b1972ce063..cfc7bab0651 100644 --- a/hw/misc/zynq-xadc.c +++ b/hw/adc/zynq-xadc.c @@ -15,7 +15,7 @@ #include "qemu/osdep.h" #include "hw/irq.h" -#include "hw/misc/zynq-xadc.h" +#include "hw/adc/zynq-xadc.h" #include "migration/vmstate.h" #include "qemu/timer.h" #include "qemu/log.h" diff --git a/hw/alpha/Kconfig b/hw/alpha/Kconfig index 15c59ff2645..9af650c94ec 100644 --- a/hw/alpha/Kconfig +++ b/hw/alpha/Kconfig @@ -3,9 +3,7 @@ config DP264 imply PCI_DEVICES imply TEST_DEVICES imply E1000_PCI - select I82374 - select I8254 - select I8259 + select I82378 select IDE_CMD646 select MC146818RTC select PCI diff --git a/hw/alpha/alpha_sys.h b/hw/alpha/alpha_sys.h index e2c02e2bbe1..2263e821da5 100644 --- a/hw/alpha/alpha_sys.h +++ b/hw/alpha/alpha_sys.h @@ -10,8 +10,8 @@ #include "hw/intc/i8259.h" -PCIBus *typhoon_init(MemoryRegion *, ISABus **, qemu_irq *, AlphaCPU *[4], - pci_map_irq_fn); +PCIBus *typhoon_init(MemoryRegion *, qemu_irq *, qemu_irq *, AlphaCPU *[4], + pci_map_irq_fn, uint8_t devfn_min); /* alpha_pci.c. */ extern const MemoryRegionOps alpha_pci_ignore_ops; diff --git a/hw/alpha/dp264.c b/hw/alpha/dp264.c index c8e300d93f6..c78ed96d0ec 100644 --- a/hw/alpha/dp264.c +++ b/hw/alpha/dp264.c @@ -13,12 +13,9 @@ #include "hw/loader.h" #include "alpha_sys.h" #include "qemu/error-report.h" -#include "sysemu/sysemu.h" #include "hw/rtc/mc146818rtc.h" #include "hw/ide/pci.h" -#include "hw/timer/i8254.h" #include "hw/isa/superio.h" -#include "hw/dma/i8257.h" #include "net/net.h" #include "qemu/cutils.h" #include "qemu/datadir.h" @@ -59,8 +56,10 @@ static void clipper_init(MachineState *machine) AlphaCPU *cpus[4]; PCIBus *pci_bus; PCIDevice *pci_dev; + DeviceState *i82378_dev; ISABus *isa_bus; qemu_irq rtc_irq; + qemu_irq isa_irq; long size, i; char *palcode_filename; uint64_t palcode_entry; @@ -73,19 +72,57 @@ static void clipper_init(MachineState *machine) cpus[i] = ALPHA_CPU(cpu_create(machine->cpu_type)); } + /* + * arg0 -> memory size + * arg1 -> kernel entry point + * arg2 -> config word + * + * Config word: bits 0-5 -> ncpus + * bit 6 -> nographics option (for HWRPB CTB) + * + * See init_hwrpb() in the PALcode. + */ cpus[0]->env.trap_arg0 = ram_size; cpus[0]->env.trap_arg1 = 0; - cpus[0]->env.trap_arg2 = smp_cpus; - - /* Init the chipset. */ - pci_bus = typhoon_init(machine->ram, &isa_bus, &rtc_irq, cpus, - clipper_pci_map_irq); + cpus[0]->env.trap_arg2 = smp_cpus | (!machine->enable_graphics << 6); + + /* + * Init the chipset. Because we're using CLIPPER IRQ mappings, + * the minimum PCI device IdSel is 1. + */ + pci_bus = typhoon_init(machine->ram, &isa_irq, &rtc_irq, cpus, + clipper_pci_map_irq, PCI_DEVFN(1, 0)); + + /* + * Init the PCI -> ISA bridge. + * + * Technically, PCI-based Alphas shipped with one of three different + * PCI-ISA bridges: + * + * - Intel i82378 SIO + * - Cypress CY82c693UB + * - ALI M1533 + * + * (An Intel i82375 PCI-EISA bridge was also used on some models.) + * + * For simplicity, we model an i82378 here, even though it wouldn't + * have been on any Tsunami/Typhoon systems; it's close enough, and + * we don't want to deal with modelling the CY82c693UB (which has + * incompatible edge/level control registers, plus other peripherals + * like IDE and USB) or the M1533 (which also has IDE and USB). + * + * Importantly, we need to provide a PCI device node for it, otherwise + * some operating systems won't notice there's an ISA bus to configure. + */ + i82378_dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(7, 0), "i82378")); + isa_bus = ISA_BUS(qdev_get_child_bus(i82378_dev, "isa.0")); + + /* Connect the ISA PIC to the Typhoon IRQ used for ISA interrupts. */ + qdev_connect_gpio_out(i82378_dev, 0, isa_irq); /* Since we have an SRM-compatible PALcode, use the SRM epoch. */ mc146818_rtc_init(isa_bus, 1900, rtc_irq); - i8254_pit_init(isa_bus, 0x40, 0, NULL); - /* VGA setup. Don't bother loading the bios. */ pci_vga_init(pci_bus); @@ -94,9 +131,6 @@ static void clipper_init(MachineState *machine) pci_nic_init_nofail(&nd_table[i], pci_bus, "e1000", NULL); } - /* 2 82C37 (dma) */ - isa_create_simple(isa_bus, "i82374"); - /* Super I/O */ isa_create_simple(isa_bus, TYPE_SMC37C669_SUPERIO); diff --git a/hw/alpha/trace-events b/hw/alpha/trace-events index 5b8315f27fc..952a8164075 100644 --- a/hw/alpha/trace-events +++ b/hw/alpha/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # pci.c alpha_pci_iack_write(void) "" diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index a42b3198121..bd39c8ca862 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -11,10 +11,8 @@ #include "qemu/units.h" #include "qapi/error.h" #include "cpu.h" -#include "hw/boards.h" #include "hw/irq.h" #include "alpha_sys.h" -#include "exec/address-spaces.h" #include "qom/object.h" @@ -816,8 +814,9 @@ static void typhoon_alarm_timer(void *opaque) cpu_interrupt(CPU(s->cchip.cpu[cpu]), CPU_INTERRUPT_TIMER); } -PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq, - AlphaCPU *cpus[4], pci_map_irq_fn sys_map_irq) +PCIBus *typhoon_init(MemoryRegion *ram, qemu_irq *p_isa_irq, + qemu_irq *p_rtc_irq, AlphaCPU *cpus[4], + pci_map_irq_fn sys_map_irq, uint8_t devfn_min) { MemoryRegion *addr_space = get_system_memory(); DeviceState *dev; @@ -845,6 +844,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq, } } + *p_isa_irq = qemu_allocate_irq(typhoon_set_isa_irq, s, 0); *p_rtc_irq = qemu_allocate_irq(typhoon_set_timer_irq, s, 0); /* Main memory region, 0x00.0000.0000. Real hardware supports 32GB, @@ -887,7 +887,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq, b = pci_register_root_bus(dev, "pci", typhoon_set_irq, sys_map_irq, s, &s->pchip.reg_mem, &s->pchip.reg_io, - 0, 64, TYPE_PCI_BUS); + devfn_min, 64, TYPE_PCI_BUS); phb->bus = b; sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); @@ -920,18 +920,6 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq, /* Pchip1 PCI I/O, 0x802.FC00.0000, 32MB. */ /* Pchip1 PCI configuration, 0x802.FE00.0000, 16MB. */ - /* Init the ISA bus. */ - /* ??? Technically there should be a cy82c693ub pci-isa bridge. */ - { - qemu_irq *isa_irqs; - - *isa_bus = isa_bus_new(NULL, get_system_memory(), &s->pchip.reg_io, - &error_abort); - isa_irqs = i8259_init(*isa_bus, - qemu_allocate_irq(typhoon_set_isa_irq, s, 0)); - isa_bus_irqs(*isa_bus, isa_irqs); - } - return b; } diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 0cc28e3ea55..5a06777cff6 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -6,6 +6,7 @@ config ARM_VIRT imply VFIO_PLATFORM imply VFIO_XGMAC imply TPM_TIS_SYSBUS + imply NVDIMM select ARM_GIC select ACPI select ARM_SMMUV3 @@ -24,9 +25,7 @@ config ARM_VIRT select ACPI_PCI select MEM_DEVICE select DIMM - select ACPI_MEMORY_HOTPLUG select ACPI_HW_REDUCED - select ACPI_NVDIMM select ACPI_APEI config CHEETAH @@ -142,6 +141,7 @@ config PXA2XX select SD select SSI select USB_OHCI + select PCMCIA config GUMSTIX bool @@ -235,8 +235,13 @@ config STELLARIS select SSI_SD select STELLARIS_INPUT select STELLARIS_ENET # ethernet + select STELLARIS_GPTM # general purpose timer module select UNIMP +config STM32VLDISCOVERY + bool + select STM32F100_SOC + config STRONGARM bool select PXA2XX @@ -294,7 +299,10 @@ config ZYNQ config ARM_V7M bool + # currently v7M must be included in a TCG build due to translate.c + default y if TCG && (ARM || AARCH64) select PTIMER + select ARM_COMPATIBLE_SEMIHOSTING config ALLWINNER_A10 bool @@ -324,6 +332,12 @@ config RASPI select SDHCI select USB_DWC2 +config STM32F100_SOC + bool + select ARM_V7M + select STM32F2XX_USART + select STM32F2XX_SPI + config STM32F205_SOC bool select ARM_V7M @@ -366,16 +380,24 @@ config XLNX_VERSAL select UNIMP select XLNX_ZDMA select XLNX_ZYNQMP + select OR_IRQ + select XLNX_BBRAM + select XLNX_EFUSE_VERSAL config NPCM7XX bool select A9MPCORE + select ADM1272 select ARM_GIC + select SMBUS select AT24C # EEPROM + select MAX34451 select PL310 # cache controller + select PMBUS select SERIAL select SSI select UNIMP + select PCA954X config FSL_IMX25 bool @@ -383,7 +405,7 @@ config FSL_IMX25 select IMX_FEC select IMX_I2C select WDT_IMX2 - select DS1338 + select SDHCI config FSL_IMX31 bool @@ -408,9 +430,11 @@ config ASPEED_SOC select DS1338 select FTGMAC100 select I2C + select DPS310 select PCA9552 select SERIAL select SMBUS_EEPROM + select PCA954X select SSI select SSI_M25P80 select TMP105 diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c index d404f31e02f..05e84728cb3 100644 --- a/hw/arm/allwinner-a10.c +++ b/hw/arm/allwinner-a10.c @@ -16,10 +16,8 @@ */ #include "qemu/osdep.h" -#include "exec/address-spaces.h" #include "qapi/error.h" #include "qemu/module.h" -#include "cpu.h" #include "hw/sysbus.h" #include "hw/arm/allwinner-a10.h" #include "hw/misc/unimp.h" diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c index 88259a9c0d1..f9b7ed18711 100644 --- a/hw/arm/allwinner-h3.c +++ b/hw/arm/allwinner-h3.c @@ -18,13 +18,11 @@ */ #include "qemu/osdep.h" -#include "exec/address-spaces.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "qemu/units.h" #include "hw/qdev-core.h" -#include "cpu.h" #include "hw/sysbus.h" #include "hw/char/serial.h" #include "hw/misc/unimp.h" @@ -239,7 +237,7 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp) /* Provide Power State Coordination Interface */ qdev_prop_set_int32(DEVICE(&s->cpus[i]), "psci-conduit", - QEMU_PSCI_CONDUIT_HVC); + QEMU_PSCI_CONDUIT_SMC); /* Disable secondary CPUs */ qdev_prop_set_bit(DEVICE(&s->cpus[i]), "start-powered-off", diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index 2e5d0679e7b..aecdeb9815a 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -13,6 +13,7 @@ #include "qemu/log.h" #include "qemu/module.h" #include "qemu/bitops.h" +#include "qemu/units.h" #include "qapi/error.h" #include "trace.h" #include "hw/sysbus.h" @@ -59,6 +60,7 @@ struct ARMSSEInfo { const char *cpu_type; uint32_t sse_version; int sram_banks; + uint32_t sram_bank_base; int num_cpus; uint32_t sys_version; uint32_t iidr; @@ -69,6 +71,7 @@ struct ARMSSEInfo { bool has_cpuid; bool has_cpu_pwrctrl; bool has_sse_counter; + bool has_tcms; Property *props; const ARMSSEDeviceInfo *devinfo; const bool *irq_is_common; @@ -102,7 +105,7 @@ static Property sse300_properties[] = { DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64), - DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15), + DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 18), DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000), DEFINE_PROP_BOOL("CPU0_FPU", ARMSSE, cpu_fpu[0], true), DEFINE_PROP_BOOL("CPU0_DSP", ARMSSE, cpu_dsp[0], true), @@ -504,6 +507,7 @@ static const ARMSSEInfo armsse_variants[] = { .sse_version = ARMSSE_IOTKIT, .cpu_type = ARM_CPU_TYPE_NAME("cortex-m33"), .sram_banks = 1, + .sram_bank_base = 0x20000000, .num_cpus = 1, .sys_version = 0x41743, .iidr = 0, @@ -514,6 +518,7 @@ static const ARMSSEInfo armsse_variants[] = { .has_cpuid = false, .has_cpu_pwrctrl = false, .has_sse_counter = false, + .has_tcms = false, .props = iotkit_properties, .devinfo = iotkit_devices, .irq_is_common = sse200_irq_is_common, @@ -523,6 +528,7 @@ static const ARMSSEInfo armsse_variants[] = { .sse_version = ARMSSE_SSE200, .cpu_type = ARM_CPU_TYPE_NAME("cortex-m33"), .sram_banks = 4, + .sram_bank_base = 0x20000000, .num_cpus = 2, .sys_version = 0x22041743, .iidr = 0, @@ -533,6 +539,7 @@ static const ARMSSEInfo armsse_variants[] = { .has_cpuid = true, .has_cpu_pwrctrl = false, .has_sse_counter = false, + .has_tcms = false, .props = sse200_properties, .devinfo = sse200_devices, .irq_is_common = sse200_irq_is_common, @@ -542,6 +549,7 @@ static const ARMSSEInfo armsse_variants[] = { .sse_version = ARMSSE_SSE300, .cpu_type = ARM_CPU_TYPE_NAME("cortex-m55"), .sram_banks = 2, + .sram_bank_base = 0x21000000, .num_cpus = 1, .sys_version = 0x7e00043b, .iidr = 0x74a0043b, @@ -552,6 +560,7 @@ static const ARMSSEInfo armsse_variants[] = { .has_cpuid = true, .has_cpu_pwrctrl = true, .has_sse_counter = true, + .has_tcms = true, .props = sse300_properties, .devinfo = sse300_devices, .irq_is_common = sse300_irq_is_common, @@ -680,17 +689,6 @@ static void armsse_forward_sec_resp_cfg(ARMSSE *s) qdev_connect_gpio_out(dev_splitter, 2, s->sec_resp_cfg_in); } -static void armsse_mainclk_update(void *opaque, ClockEvent event) -{ - ARMSSE *s = ARM_SSE(opaque); - - /* - * Set system_clock_scale from our Clock input; this is what - * controls the tick rate of the CPU SysTick timer. - */ - system_clock_scale = clock_ticks_to_ns(s->mainclk, 1); -} - static void armsse_init(Object *obj) { ARMSSE *s = ARM_SSE(obj); @@ -702,8 +700,7 @@ static void armsse_init(Object *obj) assert(info->sram_banks <= MAX_SRAM_BANKS); assert(info->num_cpus <= SSE_MAX_CPUS); - s->mainclk = qdev_init_clock_in(DEVICE(s), "MAINCLK", - armsse_mainclk_update, s, ClockUpdate); + s->mainclk = qdev_init_clock_in(DEVICE(s), "MAINCLK", NULL, NULL, 0); s->s32kclk = qdev_init_clock_in(DEVICE(s), "S32KCLK", NULL, NULL, 0); memory_region_init(&s->container, obj, "armsse-container", UINT64_MAX); @@ -909,7 +906,6 @@ static void armsse_realize(DeviceState *dev, Error **errp) const ARMSSEDeviceInfo *devinfo; int i; MemoryRegion *mr; - Error *err = NULL; SysBusDevice *sbd_apb_ppc0; SysBusDevice *sbd_secctl; DeviceState *dev_apb_ppc0; @@ -918,6 +914,8 @@ static void armsse_realize(DeviceState *dev, Error **errp) DeviceState *dev_splitter; uint32_t addr_width_max; + ERRP_GUARD(); + if (!s->board_memory) { error_setg(errp, "memory property was not set"); return; @@ -985,6 +983,9 @@ static void armsse_realize(DeviceState *dev, Error **errp) int j; char *gpioname; + qdev_connect_clock_in(cpudev, "cpuclk", s->mainclk); + /* The SSE subsystems do not wire up a systick refclk */ + qdev_prop_set_uint32(cpudev, "num-irq", s->exp_numirq + NUM_SSE_IRQS); /* * In real hardware the initial Secure VTOR is set from the INITSVTOR* @@ -1147,10 +1148,9 @@ static void armsse_realize(DeviceState *dev, Error **errp) uint32_t sram_bank_size = 1 << s->sram_addr_width; memory_region_init_ram(&s->sram[i], NULL, ramname, - sram_bank_size, &err); + sram_bank_size, errp); g_free(ramname); - if (err) { - error_propagate(errp, err); + if (*errp) { return; } object_property_set_link(OBJECT(&s->mpc[i]), "downstream", @@ -1161,7 +1161,7 @@ static void armsse_realize(DeviceState *dev, Error **errp) /* Map the upstream end of the MPC into the right place... */ sbd_mpc = SYS_BUS_DEVICE(&s->mpc[i]); memory_region_add_subregion(&s->container, - 0x20000000 + i * sram_bank_size, + info->sram_bank_base + i * sram_bank_size, sysbus_mmio_get_region(sbd_mpc, 1)); /* ...and its register interface */ memory_region_add_subregion(&s->container, 0x50083000 + i * 0x1000, @@ -1210,6 +1210,20 @@ static void armsse_realize(DeviceState *dev, Error **errp) sysbus_mmio_get_region(sbd, 1)); } + if (info->has_tcms) { + /* The SSE-300 has an ITCM at 0x0000_0000 and a DTCM at 0x2000_0000 */ + memory_region_init_ram(&s->itcm, NULL, "sse300-itcm", 512 * KiB, errp); + if (*errp) { + return; + } + memory_region_init_ram(&s->dtcm, NULL, "sse300-dtcm", 512 * KiB, errp); + if (*errp) { + return; + } + memory_region_add_subregion(&s->container, 0x00000000, &s->itcm); + memory_region_add_subregion(&s->container, 0x20000000, &s->dtcm); + } + /* Devices behind APB PPC0: * 0x40000000: timer0 * 0x40001000: timer1 @@ -1628,9 +1642,6 @@ static void armsse_realize(DeviceState *dev, Error **errp) * devices in the ARMSSE. */ sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->container); - - /* Set initial system_clock_scale from MAINCLK */ - armsse_mainclk_update(s, ClockUpdate); } static void armsse_idau_check(IDAUInterface *ii, uint32_t address, diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index 6dd10d84703..8d08db80be8 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -10,17 +10,18 @@ #include "qemu/osdep.h" #include "hw/arm/armv7m.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/sysbus.h" #include "hw/arm/boot.h" #include "hw/loader.h" #include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" #include "elf.h" #include "sysemu/reset.h" #include "qemu/error-report.h" #include "qemu/module.h" -#include "exec/address-spaces.h" +#include "qemu/log.h" #include "target/arm/idau.h" +#include "migration/vmstate.h" /* Bitbanded IO. Each word corresponds to a single bit. */ @@ -126,6 +127,122 @@ static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = { 0x22000000, 0x42000000 }; +static MemTxResult v7m_sysreg_ns_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + MemoryRegion *mr = opaque; + + if (attrs.secure) { + /* S accesses to the alias act like NS accesses to the real region */ + attrs.secure = 0; + return memory_region_dispatch_write(mr, addr, value, + size_memop(size) | MO_TE, attrs); + } else { + /* NS attrs are RAZ/WI for privileged, and BusFault for user */ + if (attrs.user) { + return MEMTX_ERROR; + } + return MEMTX_OK; + } +} + +static MemTxResult v7m_sysreg_ns_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + MemoryRegion *mr = opaque; + + if (attrs.secure) { + /* S accesses to the alias act like NS accesses to the real region */ + attrs.secure = 0; + return memory_region_dispatch_read(mr, addr, data, + size_memop(size) | MO_TE, attrs); + } else { + /* NS attrs are RAZ/WI for privileged, and BusFault for user */ + if (attrs.user) { + return MEMTX_ERROR; + } + *data = 0; + return MEMTX_OK; + } +} + +static const MemoryRegionOps v7m_sysreg_ns_ops = { + .read_with_attrs = v7m_sysreg_ns_read, + .write_with_attrs = v7m_sysreg_ns_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static MemTxResult v7m_systick_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + ARMv7MState *s = opaque; + MemoryRegion *mr; + + /* Direct the access to the correct systick */ + mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0); + return memory_region_dispatch_write(mr, addr, value, + size_memop(size) | MO_TE, attrs); +} + +static MemTxResult v7m_systick_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + ARMv7MState *s = opaque; + MemoryRegion *mr; + + /* Direct the access to the correct systick */ + mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0); + return memory_region_dispatch_read(mr, addr, data, size_memop(size) | MO_TE, + attrs); +} + +static const MemoryRegionOps v7m_systick_ops = { + .read_with_attrs = v7m_systick_read, + .write_with_attrs = v7m_systick_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +/* + * Unassigned portions of the PPB space are RAZ/WI for privileged + * accesses, and fault for non-privileged accesses. + */ +static MemTxResult ppb_default_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + qemu_log_mask(LOG_UNIMP, "Read of unassigned area of PPB: offset 0x%x\n", + (uint32_t)addr); + if (attrs.user) { + return MEMTX_ERROR; + } + *data = 0; + return MEMTX_OK; +} + +static MemTxResult ppb_default_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + qemu_log_mask(LOG_UNIMP, "Write of unassigned area of PPB: offset 0x%x\n", + (uint32_t)addr); + if (attrs.user) { + return MEMTX_ERROR; + } + return MEMTX_OK; +} + +static const MemoryRegionOps ppb_default_ops = { + .read_with_attrs = ppb_default_read, + .write_with_attrs = ppb_default_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 8, +}; + static void armv7m_instance_init(Object *obj) { ARMv7MState *s = ARMV7M(obj); @@ -139,10 +256,20 @@ static void armv7m_instance_init(Object *obj) object_property_add_alias(obj, "num-irq", OBJECT(&s->nvic), "num-irq"); + object_initialize_child(obj, "systick-reg-ns", &s->systick[M_REG_NS], + TYPE_SYSTICK); + /* + * We can't initialize the secure systick here, as we don't know + * yet if we need it. + */ + for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { object_initialize_child(obj, "bitband[*]", &s->bitband[i], TYPE_BITBAND); } + + s->refclk = qdev_init_clock_in(DEVICE(obj), "refclk", NULL, NULL, 0); + s->cpuclk = qdev_init_clock_in(DEVICE(obj), "cpuclk", NULL, NULL, 0); } static void armv7m_realize(DeviceState *dev, Error **errp) @@ -178,6 +305,12 @@ static void armv7m_realize(DeviceState *dev, Error **errp) return; } } + if (object_property_find(OBJECT(s->cpu), "init-nsvtor")) { + if (!object_property_set_uint(OBJECT(s->cpu), "init-nsvtor", + s->init_nsvtor, errp)) { + return; + } + } if (object_property_find(OBJECT(s->cpu), "start-powered-off")) { if (!object_property_set_bool(OBJECT(s->cpu), "start-powered-off", s->start_powered_off, errp)) { @@ -219,13 +352,130 @@ static void armv7m_realize(DeviceState *dev, Error **errp) qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ"); qdev_pass_gpios(DEVICE(&s->nvic), dev, "NMI"); + /* + * We map various devices into the container MR at their architected + * addresses. In particular, we map everything corresponding to the + * "System PPB" space. This is the range from 0xe0000000 to 0xe00fffff + * and includes the NVIC, the System Control Space (system registers), + * the systick timer, and for CPUs with the Security extension an NS + * banked version of all of these. + * + * The default behaviour for unimplemented registers/ranges + * (for instance the Data Watchpoint and Trace unit at 0xe0001000) + * is to RAZ/WI for privileged access and BusFault for non-privileged + * access. + * + * The NVIC and System Control Space (SCS) starts at 0xe000e000 + * and looks like this: + * 0x004 - ICTR + * 0x010 - 0xff - systick + * 0x100..0x7ec - NVIC + * 0x7f0..0xcff - Reserved + * 0xd00..0xd3c - SCS registers + * 0xd40..0xeff - Reserved or Not implemented + * 0xf00 - STIR + * + * Some registers within this space are banked between security states. + * In v8M there is a second range 0xe002e000..0xe002efff which is the + * NonSecure alias SCS; secure accesses to this behave like NS accesses + * to the main SCS range, and non-secure accesses (including when + * the security extension is not implemented) are RAZ/WI. + * Note that both the main SCS range and the alias range are defined + * to be exempt from memory attribution (R_BLJT) and so the memory + * transaction attribute always matches the current CPU security + * state (attrs.secure == env->v7m.secure). In the v7m_sysreg_ns_ops + * wrappers we change attrs.secure to indicate the NS access; so + * generally code determining which banked register to use should + * use attrs.secure; code determining actual behaviour of the system + * should use env->v7m.secure. + * + * Within the PPB space, some MRs overlap, and the priority + * of overlapping regions is: + * - default region (for RAZ/WI and BusFault) : -1 + * - system register regions (provided by the NVIC) : 0 + * - systick : 1 + * This is because the systick device is a small block of registers + * in the middle of the other system control registers. + */ + + memory_region_init_io(&s->defaultmem, OBJECT(s), &ppb_default_ops, s, + "nvic-default", 0x100000); + memory_region_add_subregion_overlap(&s->container, 0xe0000000, + &s->defaultmem, -1); + /* Wire the NVIC up to the CPU */ sbd = SYS_BUS_DEVICE(&s->nvic); sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ)); - memory_region_add_subregion(&s->container, 0xe0000000, + memory_region_add_subregion(&s->container, 0xe000e000, sysbus_mmio_get_region(sbd, 0)); + if (arm_feature(&s->cpu->env, ARM_FEATURE_V8)) { + /* Create the NS alias region for the NVIC sysregs */ + memory_region_init_io(&s->sysreg_ns_mem, OBJECT(s), + &v7m_sysreg_ns_ops, + sysbus_mmio_get_region(sbd, 0), + "nvic_sysregs_ns", 0x1000); + memory_region_add_subregion(&s->container, 0xe002e000, + &s->sysreg_ns_mem); + } + + /* Create and map the systick devices */ + qdev_connect_clock_in(DEVICE(&s->systick[M_REG_NS]), "refclk", s->refclk); + qdev_connect_clock_in(DEVICE(&s->systick[M_REG_NS]), "cpuclk", s->cpuclk); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), errp)) { + return; + } + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), 0, + qdev_get_gpio_in_named(DEVICE(&s->nvic), + "systick-trigger", M_REG_NS)); + + if (arm_feature(&s->cpu->env, ARM_FEATURE_M_SECURITY)) { + /* + * We couldn't init the secure systick device in instance_init + * as we didn't know then if the CPU had the security extensions; + * so we have to do it here. + */ + object_initialize_child(OBJECT(dev), "systick-reg-s", + &s->systick[M_REG_S], TYPE_SYSTICK); + qdev_connect_clock_in(DEVICE(&s->systick[M_REG_S]), "refclk", + s->refclk); + qdev_connect_clock_in(DEVICE(&s->systick[M_REG_S]), "cpuclk", + s->cpuclk); + + if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_S]), errp)) { + return; + } + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_S]), 0, + qdev_get_gpio_in_named(DEVICE(&s->nvic), + "systick-trigger", M_REG_S)); + } + + memory_region_init_io(&s->systickmem, OBJECT(s), + &v7m_systick_ops, s, + "v7m_systick", 0xe0); + + memory_region_add_subregion_overlap(&s->container, 0xe000e010, + &s->systickmem, 1); + if (arm_feature(&s->cpu->env, ARM_FEATURE_V8)) { + memory_region_init_io(&s->systick_ns_mem, OBJECT(s), + &v7m_sysreg_ns_ops, &s->systickmem, + "v7m_systick_ns", 0xe0); + memory_region_add_subregion_overlap(&s->container, 0xe002e010, + &s->systick_ns_mem, 1); + } + + /* If the CPU has RAS support, create the RAS register block */ + if (cpu_isar_feature(aa32_ras, s->cpu)) { + object_initialize_child(OBJECT(dev), "armv7m-ras", + &s->ras, TYPE_ARMV7M_RAS); + sbd = SYS_BUS_DEVICE(&s->ras); + if (!sysbus_realize(sbd, errp)) { + return; + } + memory_region_add_subregion_overlap(&s->container, 0xe0005000, + sysbus_mmio_get_region(sbd, 0), 1); + } for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { if (s->enable_bitband) { @@ -256,6 +506,7 @@ static Property armv7m_properties[] = { MemoryRegion *), DEFINE_PROP_LINK("idau", ARMv7MState, idau, TYPE_IDAU_INTERFACE, Object *), DEFINE_PROP_UINT32("init-svtor", ARMv7MState, init_svtor, 0), + DEFINE_PROP_UINT32("init-nsvtor", ARMv7MState, init_nsvtor, 0), DEFINE_PROP_BOOL("enable-bitband", ARMv7MState, enable_bitband, false), DEFINE_PROP_BOOL("start-powered-off", ARMv7MState, start_powered_off, false), @@ -264,11 +515,23 @@ static Property armv7m_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static const VMStateDescription vmstate_armv7m = { + .name = "armv7m", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_CLOCK(refclk, SysTickState), + VMSTATE_CLOCK(cpuclk, SysTickState), + VMSTATE_END_OF_LIST() + } +}; + static void armv7m_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = armv7m_realize; + dc->vmsd = &vmstate_armv7m; device_class_set_props(dc, armv7m_properties); } diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index a17b75f4940..a77f46b3adb 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -11,20 +11,16 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" -#include "exec/address-spaces.h" #include "hw/arm/boot.h" #include "hw/arm/aspeed.h" #include "hw/arm/aspeed_soc.h" -#include "hw/boards.h" +#include "hw/i2c/i2c_mux_pca954x.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/misc/pca9552.h" -#include "hw/misc/tmp105.h" +#include "hw/sensor/tmp105.h" #include "hw/misc/led.h" #include "hw/qdev-properties.h" -#include "qemu/log.h" #include "sysemu/block-backend.h" -#include "sysemu/sysemu.h" #include "hw/loader.h" #include "qemu/error-report.h" #include "qemu/units.h" @@ -135,9 +131,37 @@ struct AspeedMachineState { SCU_HW_STRAP_VGA_SIZE_SET(VGA_64M_DRAM) | \ SCU_AST2500_HW_STRAP_RESERVED1) +/* FP5280G2 hardware value: 0XF100D286 */ +#define FP5280G2_BMC_HW_STRAP1 ( \ + SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE | \ + SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE | \ + SCU_AST2500_HW_STRAP_UART_DEBUG | \ + SCU_AST2500_HW_STRAP_RESERVED28 | \ + SCU_AST2500_HW_STRAP_DDR4_ENABLE | \ + SCU_HW_STRAP_VGA_CLASS_CODE | \ + SCU_HW_STRAP_LPC_RESET_PIN | \ + SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER) | \ + SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) | \ + SCU_HW_STRAP_MAC1_RGMII | \ + SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) | \ + SCU_AST2500_HW_STRAP_RESERVED1) + /* Witherspoon hardware value: 0xF10AD216 (but use romulus definition) */ #define WITHERSPOON_BMC_HW_STRAP1 ROMULUS_BMC_HW_STRAP1 +/* Quanta-Q71l hardware value */ +#define QUANTA_Q71L_BMC_HW_STRAP1 ( \ + SCU_AST2400_HW_STRAP_DRAM_SIZE(DRAM_SIZE_128MB) | \ + SCU_AST2400_HW_STRAP_DRAM_CONFIG(2/* DDR3 with CL=6, CWL=5 */) | \ + SCU_AST2400_HW_STRAP_ACPI_DIS | \ + SCU_AST2400_HW_STRAP_SET_CLK_SOURCE(AST2400_CLK_24M_IN) | \ + SCU_HW_STRAP_VGA_CLASS_CODE | \ + SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_PASS_THROUGH) | \ + SCU_AST2400_HW_STRAP_SET_CPU_AHB_RATIO(AST2400_CPU_AHB_RATIO_2_1) | \ + SCU_HW_STRAP_SPI_WIDTH | \ + SCU_HW_STRAP_VGA_SIZE_SET(VGA_8M_DRAM) | \ + SCU_AST2400_HW_STRAP_BOOT_MODE(AST2400_SPI_BOOT)) + /* AST2600 evb hardware value */ #define AST2600_EVB_HW_STRAP1 0x000000C0 #define AST2600_EVB_HW_STRAP2 0x00000003 @@ -146,6 +170,14 @@ struct AspeedMachineState { #define TACOMA_BMC_HW_STRAP1 0x00000000 #define TACOMA_BMC_HW_STRAP2 0x00000040 +/* Rainier hardware value: (QEMU prototype) */ +#define RAINIER_BMC_HW_STRAP1 0x00000000 +#define RAINIER_BMC_HW_STRAP2 0x00000000 + +/* Fuji hardware value */ +#define FUJI_BMC_HW_STRAP1 0x00000000 +#define FUJI_BMC_HW_STRAP2 0x00000000 + /* * The max ram region is for firmwares that scan the address space * with load/store to guess how much RAM the SoC has. @@ -257,18 +289,17 @@ static void aspeed_board_init_flashes(AspeedSMCState *s, int i ; for (i = 0; i < s->num_cs; ++i) { - AspeedSMCFlash *fl = &s->flashes[i]; DriveInfo *dinfo = drive_get_next(IF_MTD); qemu_irq cs_line; + DeviceState *dev; - fl->flash = qdev_new(flashtype); + dev = qdev_new(flashtype); if (dinfo) { - qdev_prop_set_drive(fl->flash, "drive", - blk_by_legacy_dinfo(dinfo)); + qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo)); } - qdev_realize_and_unref(fl->flash, BUS(s->spi), &error_fatal); + qdev_realize_and_unref(dev, BUS(s->spi), &error_fatal); - cs_line = qdev_get_gpio_in_named(fl->flash, SSI_GPIO_CS, 0); + cs_line = qdev_get_gpio_in_named(dev, SSI_GPIO_CS, 0); sysbus_connect_irq(SYS_BUS_DEVICE(s), i + 1, cs_line); } } @@ -327,7 +358,7 @@ static void aspeed_machine_init(MachineState *machine) object_property_set_int(OBJECT(&bmc->soc), "num-cs", amc->num_cs, &error_abort); object_property_set_link(OBJECT(&bmc->soc), "dram", - OBJECT(&bmc->ram_container), &error_abort); + OBJECT(machine->ram), &error_abort); if (machine->kernel_filename) { /* * When booting with a -kernel command line there is no u-boot @@ -337,6 +368,8 @@ static void aspeed_machine_init(MachineState *machine) object_property_set_int(OBJECT(&bmc->soc), "hw-prot-key", ASPEED_SCU_PROT_KEY, &error_abort); } + qdev_prop_set_uint32(DEVICE(&bmc->soc), "uart-default", + amc->uart_default); qdev_realize(DEVICE(&bmc->soc), NULL, &error_abort); memory_region_add_subregion(get_system_memory(), @@ -358,6 +391,7 @@ static void aspeed_machine_init(MachineState *machine) if (drive0) { AspeedSMCFlash *fl = &bmc->soc.fmc.flashes[0]; MemoryRegion *boot_rom = g_new(MemoryRegion, 1); + uint64_t size = memory_region_size(&fl->mmio); /* * create a ROM region using the default mapping window size of @@ -367,15 +401,15 @@ static void aspeed_machine_init(MachineState *machine) */ if (ASPEED_MACHINE(machine)->mmio_exec) { memory_region_init_alias(boot_rom, NULL, "aspeed.boot_rom", - &fl->mmio, 0, fl->size); + &fl->mmio, 0, size); memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR, boot_rom); } else { memory_region_init_rom(boot_rom, NULL, "aspeed.boot_rom", - fl->size, &error_abort); + size, &error_abort); memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR, boot_rom); - write_boot_rom(drive0, FIRMWARE_ADDR, fl->size, &error_abort); + write_boot_rom(drive0, FIRMWARE_ADDR, size, &error_abort); } } @@ -411,6 +445,15 @@ static void aspeed_machine_init(MachineState *machine) arm_load_kernel(ARM_CPU(first_cpu), machine, &aspeed_board_binfo); } +static void at24c_eeprom_init(I2CBus *bus, uint8_t addr, uint32_t rsize) +{ + I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr); + DeviceState *dev = DEVICE(i2c_dev); + + qdev_prop_set_uint32(dev, "rom-size", rsize); + i2c_slave_realize_and_unref(i2c_dev, bus, &error_abort); +} + static void palmetto_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; @@ -433,6 +476,38 @@ static void palmetto_bmc_i2c_init(AspeedMachineState *bmc) object_property_set_int(OBJECT(dev), "temperature3", 110000, &error_abort); } +static void quanta_q71l_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = &bmc->soc; + + /* + * The quanta-q71l platform expects tmp75s which are compatible with + * tmp105s. + */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 1), "tmp105", 0x4c); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 1), "tmp105", 0x4e); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 1), "tmp105", 0x4f); + + /* TODO: i2c-1: Add baseboard FRU eeprom@54 24c64 */ + /* TODO: i2c-1: Add Frontpanel FRU eeprom@57 24c64 */ + /* TODO: Add Memory Riser i2c mux and eeproms. */ + + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9546", 0x74); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9548", 0x77); + + /* TODO: i2c-3: Add BIOS FRU eeprom@56 24c64 */ + + /* i2c-7 */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9546", 0x70); + /* - i2c@0: pmbus@59 */ + /* - i2c@1: pmbus@58 */ + /* - i2c@2: pmbus@58 */ + /* - i2c@3: pmbus@59 */ + + /* TODO: i2c-7: Add PDB FRU eeprom@52 */ + /* TODO: i2c-8: Add BMC FRU eeprom@50 */ +} + static void ast2500_evb_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; @@ -557,7 +632,6 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState *bmc) /* Bus 3: TODO bmp280@77 */ /* Bus 3: TODO max31785@52 */ - /* Bus 3: TODO dps310@76 */ dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60)); qdev_prop_set_string(dev, "description", "pca1"); i2c_slave_realize_and_unref(I2C_SLAVE(dev), @@ -572,6 +646,7 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState *bmc) qdev_connect_gpio_out(dev, pca1_leds[i].gpio_id, qdev_get_gpio_in(DEVICE(led), 0)); } + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 3), "dps310", 0x76); i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), "tmp423", 0x4c); i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), "tmp423", 0x4c); @@ -629,6 +704,215 @@ static void g220a_bmc_i2c_init(AspeedMachineState *bmc) eeprom_buf); } +static void aspeed_eeprom_init(I2CBus *bus, uint8_t addr, uint32_t rsize) +{ + I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr); + DeviceState *dev = DEVICE(i2c_dev); + + qdev_prop_set_uint32(dev, "rom-size", rsize); + i2c_slave_realize_and_unref(i2c_dev, bus, &error_abort); +} + +static void fp5280g2_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = &bmc->soc; + I2CSlave *i2c_mux; + + /* The at24c256 */ + at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 1), 0x50, 32768); + + /* The fp5280g2 expects a TMP112 but a TMP105 is compatible */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), TYPE_TMP105, + 0x48); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), TYPE_TMP105, + 0x49); + + i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), + "pca9546", 0x70); + /* It expects a TMP112 but a TMP105 is compatible */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 0), TYPE_TMP105, + 0x4a); + + /* It expects a ds3232 but a ds1338 is good enough */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), "ds1338", 0x68); + + /* It expects a pca9555 but a pca9552 is compatible */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_PCA9552, + 0x20); +} + +static void rainier_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = &bmc->soc; + I2CSlave *i2c_mux; + + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 0), 0x51, 32 * KiB); + + /* The rainier expects a TMP275 but a TMP105 is compatible */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105, + 0x48); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105, + 0x49); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105, + 0x4a); + i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), + "pca9546", 0x70); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 2), 0x52, 64 * KiB); + + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), TYPE_TMP105, + 0x48); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), TYPE_TMP105, + 0x49); + i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), + "pca9546", 0x70); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB); + + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105, + 0x48); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105, + 0x4a); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105, + 0x4b); + i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), + "pca9546", 0x70); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 2), 0x50, 64 * KiB); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 3), 0x51, 64 * KiB); + + /* Bus 7: TODO max31785@52 */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9552", 0x61); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "dps310", 0x76); + /* Bus 7: TODO si7021-a20@20 */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), TYPE_TMP105, + 0x48); + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 7), 0x50, 64 * KiB); + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 7), 0x51, 64 * KiB); + + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_TMP105, + 0x48); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_TMP105, + 0x4a); + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 8), 0x50, 64 * KiB); + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 8), 0x51, 64 * KiB); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), "pca9552", 0x61); + /* Bus 8: ucd90320@11 */ + /* Bus 8: ucd90320@b */ + /* Bus 8: ucd90320@c */ + + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp423", 0x4c); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp423", 0x4d); + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 9), 0x50, 128 * KiB); + + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 10), "tmp423", 0x4c); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 10), "tmp423", 0x4d); + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 10), 0x50, 128 * KiB); + + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), TYPE_TMP105, + 0x48); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), TYPE_TMP105, + 0x49); + i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), + "pca9546", 0x70); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB); + aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB); + + + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 13), 0x50, 64 * KiB); + + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 14), 0x50, 64 * KiB); + + aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 15), 0x50, 64 * KiB); +} + +static void get_pca9548_channels(I2CBus *bus, uint8_t mux_addr, + I2CBus **channels) +{ + I2CSlave *mux = i2c_slave_create_simple(bus, "pca9548", mux_addr); + for (int i = 0; i < 8; i++) { + channels[i] = pca954x_i2c_get_bus(mux, i); + } +} + +#define TYPE_LM75 TYPE_TMP105 +#define TYPE_TMP75 TYPE_TMP105 +#define TYPE_TMP422 "tmp422" + +static void fuji_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = &bmc->soc; + I2CBus *i2c[144] = {}; + + for (int i = 0; i < 16; i++) { + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + I2CBus *i2c180 = i2c[2]; + I2CBus *i2c480 = i2c[8]; + I2CBus *i2c600 = i2c[11]; + + get_pca9548_channels(i2c180, 0x70, &i2c[16]); + get_pca9548_channels(i2c480, 0x70, &i2c[24]); + /* NOTE: The device tree skips [32, 40) in the alias numbering */ + get_pca9548_channels(i2c600, 0x77, &i2c[40]); + get_pca9548_channels(i2c[24], 0x71, &i2c[48]); + get_pca9548_channels(i2c[25], 0x72, &i2c[56]); + get_pca9548_channels(i2c[26], 0x76, &i2c[64]); + get_pca9548_channels(i2c[27], 0x76, &i2c[72]); + for (int i = 0; i < 8; i++) { + get_pca9548_channels(i2c[40 + i], 0x76, &i2c[80 + i * 8]); + } + + i2c_slave_create_simple(i2c[17], TYPE_LM75, 0x4c); + i2c_slave_create_simple(i2c[17], TYPE_LM75, 0x4d); + + aspeed_eeprom_init(i2c[19], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[20], 0x50, 2 * KiB); + aspeed_eeprom_init(i2c[22], 0x52, 2 * KiB); + + i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x48); + i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x49); + i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x4a); + i2c_slave_create_simple(i2c[3], TYPE_TMP422, 0x4c); + + aspeed_eeprom_init(i2c[8], 0x51, 64 * KiB); + i2c_slave_create_simple(i2c[8], TYPE_LM75, 0x4a); + + i2c_slave_create_simple(i2c[50], TYPE_LM75, 0x4c); + aspeed_eeprom_init(i2c[50], 0x52, 64 * KiB); + i2c_slave_create_simple(i2c[51], TYPE_TMP75, 0x48); + i2c_slave_create_simple(i2c[52], TYPE_TMP75, 0x49); + + i2c_slave_create_simple(i2c[59], TYPE_TMP75, 0x48); + i2c_slave_create_simple(i2c[60], TYPE_TMP75, 0x49); + + aspeed_eeprom_init(i2c[65], 0x53, 64 * KiB); + i2c_slave_create_simple(i2c[66], TYPE_TMP75, 0x49); + i2c_slave_create_simple(i2c[66], TYPE_TMP75, 0x48); + aspeed_eeprom_init(i2c[68], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[69], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[70], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[71], 0x52, 64 * KiB); + + aspeed_eeprom_init(i2c[73], 0x53, 64 * KiB); + i2c_slave_create_simple(i2c[74], TYPE_TMP75, 0x49); + i2c_slave_create_simple(i2c[74], TYPE_TMP75, 0x48); + aspeed_eeprom_init(i2c[76], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[77], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[78], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[79], 0x52, 64 * KiB); + aspeed_eeprom_init(i2c[28], 0x50, 2 * KiB); + + for (int i = 0; i < 8; i++) { + aspeed_eeprom_init(i2c[81 + i * 8], 0x56, 64 * KiB); + i2c_slave_create_simple(i2c[82 + i * 8], TYPE_TMP75, 0x48); + i2c_slave_create_simple(i2c[83 + i * 8], TYPE_TMP75, 0x4b); + i2c_slave_create_simple(i2c[84 + i * 8], TYPE_TMP75, 0x4a); + } +} + static bool aspeed_get_mmio_exec(Object *obj, Error **errp) { return ASPEED_MACHINE(obj)->mmio_exec; @@ -707,6 +991,7 @@ static void aspeed_machine_class_init(ObjectClass *oc, void *data) mc->no_parallel = 1; mc->default_ram_id = "ram"; amc->macs_mask = ASPEED_MAC0_ON; + amc->uart_default = ASPEED_DEV_UART5; aspeed_machine_class_props_init(oc); } @@ -728,6 +1013,23 @@ static void aspeed_machine_palmetto_class_init(ObjectClass *oc, void *data) aspeed_soc_num_cpus(amc->soc_name); }; +static void aspeed_machine_quanta_q71l_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Quanta-Q71l BMC (ARM926EJ-S)"; + amc->soc_name = "ast2400-a1"; + amc->hw_strap1 = QUANTA_Q71L_BMC_HW_STRAP1; + amc->fmc_model = "n25q256a"; + amc->spi_model = "mx25l25635e"; + amc->num_cs = 1; + amc->i2c_init = quanta_q71l_bmc_i2c_init; + mc->default_ram_size = 128 * MiB; + mc->default_cpus = mc->min_cpus = mc->max_cpus = + aspeed_soc_num_cpus(amc->soc_name); +} + static void aspeed_machine_supermicrox11_bmc_class_init(ObjectClass *oc, void *data) { @@ -811,6 +1113,9 @@ static void aspeed_machine_swift_class_init(ObjectClass *oc, void *data) mc->default_ram_size = 512 * MiB; mc->default_cpus = mc->min_cpus = mc->max_cpus = aspeed_soc_num_cpus(amc->soc_name); + + mc->deprecation_reason = "redundant system. Please use a similar " + "OpenPOWER BMC, Witherspoon or Romulus."; }; static void aspeed_machine_witherspoon_class_init(ObjectClass *oc, void *data) @@ -835,14 +1140,15 @@ static void aspeed_machine_ast2600_evb_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); - mc->desc = "Aspeed AST2600 EVB (Cortex A7)"; - amc->soc_name = "ast2600-a1"; + mc->desc = "Aspeed AST2600 EVB (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; amc->hw_strap1 = AST2600_EVB_HW_STRAP1; amc->hw_strap2 = AST2600_EVB_HW_STRAP2; amc->fmc_model = "w25q512jv"; amc->spi_model = "mx66u51235f"; amc->num_cs = 1; - amc->macs_mask = ASPEED_MAC1_ON | ASPEED_MAC2_ON | ASPEED_MAC3_ON; + amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON | ASPEED_MAC2_ON | + ASPEED_MAC3_ON; amc->i2c_init = ast2600_evb_i2c_init; mc->default_ram_size = 1 * GiB; mc->default_cpus = mc->min_cpus = mc->max_cpus = @@ -854,8 +1160,8 @@ static void aspeed_machine_tacoma_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); - mc->desc = "OpenPOWER Tacoma BMC (Cortex A7)"; - amc->soc_name = "ast2600-a1"; + mc->desc = "OpenPOWER Tacoma BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; amc->hw_strap1 = TACOMA_BMC_HW_STRAP1; amc->hw_strap2 = TACOMA_BMC_HW_STRAP2; amc->fmc_model = "mx66l1g45g"; @@ -879,13 +1185,77 @@ static void aspeed_machine_g220a_class_init(ObjectClass *oc, void *data) amc->fmc_model = "n25q512a"; amc->spi_model = "mx25l25635e"; amc->num_cs = 2; - amc->macs_mask = ASPEED_MAC1_ON | ASPEED_MAC2_ON; + amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON; amc->i2c_init = g220a_bmc_i2c_init; mc->default_ram_size = 1024 * MiB; mc->default_cpus = mc->min_cpus = mc->max_cpus = aspeed_soc_num_cpus(amc->soc_name); }; +static void aspeed_machine_fp5280g2_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Inspur FP5280G2 BMC (ARM1176)"; + amc->soc_name = "ast2500-a1"; + amc->hw_strap1 = FP5280G2_BMC_HW_STRAP1; + amc->fmc_model = "n25q512a"; + amc->spi_model = "mx25l25635e"; + amc->num_cs = 2; + amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON; + amc->i2c_init = fp5280g2_bmc_i2c_init; + mc->default_ram_size = 512 * MiB; + mc->default_cpus = mc->min_cpus = mc->max_cpus = + aspeed_soc_num_cpus(amc->soc_name); +}; + +static void aspeed_machine_rainier_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "IBM Rainier BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = RAINIER_BMC_HW_STRAP1; + amc->hw_strap2 = RAINIER_BMC_HW_STRAP2; + amc->fmc_model = "mx66l1g45g"; + amc->spi_model = "mx66l1g45g"; + amc->num_cs = 2; + amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON; + amc->i2c_init = rainier_bmc_i2c_init; + mc->default_ram_size = 1 * GiB; + mc->default_cpus = mc->min_cpus = mc->max_cpus = + aspeed_soc_num_cpus(amc->soc_name); +}; + +/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */ +#if HOST_LONG_BITS == 32 +#define FUJI_BMC_RAM_SIZE (1 * GiB) +#else +#define FUJI_BMC_RAM_SIZE (2 * GiB) +#endif + +static void aspeed_machine_fuji_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Facebook Fuji BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = FUJI_BMC_HW_STRAP1; + amc->hw_strap2 = FUJI_BMC_HW_STRAP2; + amc->fmc_model = "mx66l1g45g"; + amc->spi_model = "mx66l1g45g"; + amc->num_cs = 2; + amc->macs_mask = ASPEED_MAC3_ON; + amc->i2c_init = fuji_bmc_i2c_init; + amc->uart_default = ASPEED_DEV_UART1; + mc->default_ram_size = FUJI_BMC_RAM_SIZE; + mc->default_cpus = mc->min_cpus = mc->max_cpus = + aspeed_soc_num_cpus(amc->soc_name); +}; + static const TypeInfo aspeed_machine_types[] = { { .name = MACHINE_TYPE_NAME("palmetto-bmc"), @@ -927,6 +1297,22 @@ static const TypeInfo aspeed_machine_types[] = { .name = MACHINE_TYPE_NAME("g220a-bmc"), .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_g220a_class_init, + }, { + .name = MACHINE_TYPE_NAME("fp5280g2-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_fp5280g2_class_init, + }, { + .name = MACHINE_TYPE_NAME("quanta-q71l-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_quanta_q71l_class_init, + }, { + .name = MACHINE_TYPE_NAME("rainier-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_rainier_class_init, + }, { + .name = MACHINE_TYPE_NAME("fuji-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_fuji_class_init, }, { .name = TYPE_ASPEED_MACHINE, .parent = TYPE_MACHINE, diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c index bc87e754a3c..0384357a951 100644 --- a/hw/arm/aspeed_ast2600.c +++ b/hw/arm/aspeed_ast2600.c @@ -9,12 +9,9 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" -#include "exec/address-spaces.h" #include "hw/misc/unimp.h" #include "hw/arm/aspeed_soc.h" #include "hw/char/serial.h" -#include "qemu/log.h" #include "qemu/module.h" #include "qemu/error-report.h" #include "hw/i2c/aspeed_i2c.h" @@ -42,6 +39,7 @@ static const hwaddr aspeed_soc_ast2600_memmap[] = { [ASPEED_DEV_ETH2] = 0x1E680000, [ASPEED_DEV_ETH4] = 0x1E690000, [ASPEED_DEV_VIC] = 0x1E6C0000, + [ASPEED_DEV_HACE] = 0x1E6D0000, [ASPEED_DEV_SDMC] = 0x1E6E0000, [ASPEED_DEV_SCU] = 0x1E6E2000, [ASPEED_DEV_XDMA] = 0x1E6E7000, @@ -102,6 +100,7 @@ static const int aspeed_soc_ast2600_irqmap[] = { [ASPEED_DEV_I2C] = 110, /* 110 -> 125 */ [ASPEED_DEV_ETH1] = 2, [ASPEED_DEV_ETH2] = 3, + [ASPEED_DEV_HACE] = 4, [ASPEED_DEV_ETH3] = 32, [ASPEED_DEV_ETH4] = 33, [ASPEED_DEV_KCS] = 138, /* 138 -> 142 */ @@ -149,6 +148,9 @@ static void aspeed_soc_ast2600_init(Object *obj) snprintf(typename, sizeof(typename), "aspeed.timer-%s", socname); object_initialize_child(obj, "timerctrl", &s->timerctrl, typename); + snprintf(typename, sizeof(typename), "aspeed.adc-%s", socname); + object_initialize_child(obj, "adc", &s->adc, typename); + snprintf(typename, sizeof(typename), "aspeed.i2c-%s", socname); object_initialize_child(obj, "i2c", &s->i2c, typename); @@ -185,7 +187,8 @@ static void aspeed_soc_ast2600_init(Object *obj) object_initialize_child(obj, "mii[*]", &s->mii[i], TYPE_ASPEED_MII); } - object_initialize_child(obj, "xdma", &s->xdma, TYPE_ASPEED_XDMA); + snprintf(typename, sizeof(typename), TYPE_ASPEED_XDMA "-%s", socname); + object_initialize_child(obj, "xdma", &s->xdma, typename); snprintf(typename, sizeof(typename), "aspeed.gpio-%s", socname); object_initialize_child(obj, "gpio", &s->gpio, typename); @@ -213,6 +216,9 @@ static void aspeed_soc_ast2600_init(Object *obj) TYPE_SYSBUS_SDHCI); object_initialize_child(obj, "lpc", &s->lpc, TYPE_ASPEED_LPC); + + snprintf(typename, sizeof(typename), "aspeed.hace-%s", socname); + object_initialize_child(obj, "hace", &s->hace, typename); } /* @@ -319,10 +325,18 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq); } - /* UART - attach an 8250 to the IO space as our UART5 */ - serial_mm_init(get_system_memory(), sc->memmap[ASPEED_DEV_UART5], 2, - aspeed_soc_get_irq(s, ASPEED_DEV_UART5), - 38400, serial_hd(0), DEVICE_LITTLE_ENDIAN); + /* ADC */ + if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0, + aspeed_soc_get_irq(s, ASPEED_DEV_ADC)); + + /* UART - attach an 8250 to the IO space as our UART */ + serial_mm_init(get_system_memory(), sc->memmap[s->uart_default], 2, + aspeed_soc_get_irq(s, s->uart_default), 38400, + serial_hd(0), DEVICE_LITTLE_ENDIAN); /* I2C */ object_property_set_link(OBJECT(&s->i2c), "dram", OBJECT(s->dram_mr), @@ -334,26 +348,19 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp) for (i = 0; i < ASPEED_I2C_GET_CLASS(&s->i2c)->num_busses; i++) { qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->a7mpcore), sc->irqmap[ASPEED_DEV_I2C] + i); - /* - * The AST2600 SoC has one IRQ per I2C bus. Skip the common - * IRQ (AST2400 and AST2500) and connect all bussses. - */ - sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c), i + 1, irq); + /* The AST2600 I2C controller has one IRQ per bus. */ + sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c.busses[i]), 0, irq); } /* FMC, The number of CS is set at the board level */ object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(s->dram_mr), &error_abort); - if (!object_property_set_int(OBJECT(&s->fmc), "sdram-base", - sc->memmap[ASPEED_DEV_SDRAM], errp)) { - return; - } if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) { return; } sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]); sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 1, - s->fmc.ctrl->flash_window_base); + ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base); sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0, aspeed_soc_get_irq(s, ASPEED_DEV_FMC)); @@ -368,7 +375,7 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0, sc->memmap[ASPEED_DEV_SPI1 + i]); sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 1, - s->spi[i].ctrl->flash_window_base); + ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base); } /* EHCI */ @@ -498,6 +505,16 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->lpc), 1 + aspeed_lpc_kcs_4, qdev_get_gpio_in(DEVICE(&s->a7mpcore), sc->irqmap[ASPEED_DEV_KCS] + aspeed_lpc_kcs_4)); + + /* HACE */ + object_property_set_link(OBJECT(&s->hace), "dram", OBJECT(s->dram_mr), + &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->hace), 0, sc->memmap[ASPEED_DEV_HACE]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0, + aspeed_soc_get_irq(s, ASPEED_DEV_HACE)); } static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data) @@ -507,9 +524,9 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data) dc->realize = aspeed_soc_ast2600_realize; - sc->name = "ast2600-a1"; + sc->name = "ast2600-a3"; sc->cpu_type = ARM_CPU_TYPE_NAME("cortex-a7"); - sc->silicon_rev = AST2600_A1_SILICON_REV; + sc->silicon_rev = AST2600_A3_SILICON_REV; sc->sram_size = 0x16400; sc->spis_num = 2; sc->ehcis_num = 2; @@ -521,7 +538,7 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data) } static const TypeInfo aspeed_soc_ast2600_type_info = { - .name = "ast2600-a1", + .name = "ast2600-a3", .parent = TYPE_ASPEED_SOC, .instance_size = sizeof(AspeedSoCState), .instance_init = aspeed_soc_ast2600_init, diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c index 057d053c847..7d53cf2f513 100644 --- a/hw/arm/aspeed_soc.c +++ b/hw/arm/aspeed_soc.c @@ -12,12 +12,9 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" -#include "exec/address-spaces.h" #include "hw/misc/unimp.h" #include "hw/arm/aspeed_soc.h" #include "hw/char/serial.h" -#include "qemu/log.h" #include "qemu/module.h" #include "qemu/error-report.h" #include "hw/i2c/aspeed_i2c.h" @@ -34,6 +31,7 @@ static const hwaddr aspeed_soc_ast2400_memmap[] = { [ASPEED_DEV_VIC] = 0x1E6C0000, [ASPEED_DEV_SDMC] = 0x1E6E0000, [ASPEED_DEV_SCU] = 0x1E6E2000, + [ASPEED_DEV_HACE] = 0x1E6E3000, [ASPEED_DEV_XDMA] = 0x1E6E7000, [ASPEED_DEV_VIDEO] = 0x1E700000, [ASPEED_DEV_ADC] = 0x1E6E9000, @@ -65,6 +63,7 @@ static const hwaddr aspeed_soc_ast2500_memmap[] = { [ASPEED_DEV_VIC] = 0x1E6C0000, [ASPEED_DEV_SDMC] = 0x1E6E0000, [ASPEED_DEV_SCU] = 0x1E6E2000, + [ASPEED_DEV_HACE] = 0x1E6E3000, [ASPEED_DEV_XDMA] = 0x1E6E7000, [ASPEED_DEV_ADC] = 0x1E6E9000, [ASPEED_DEV_VIDEO] = 0x1E700000, @@ -117,6 +116,7 @@ static const int aspeed_soc_ast2400_irqmap[] = { [ASPEED_DEV_ETH2] = 3, [ASPEED_DEV_XDMA] = 6, [ASPEED_DEV_SDHCI] = 26, + [ASPEED_DEV_HACE] = 4, }; #define aspeed_soc_ast2500_irqmap aspeed_soc_ast2400_irqmap @@ -162,6 +162,9 @@ static void aspeed_soc_init(Object *obj) snprintf(typename, sizeof(typename), "aspeed.timer-%s", socname); object_initialize_child(obj, "timerctrl", &s->timerctrl, typename); + snprintf(typename, sizeof(typename), "aspeed.adc-%s", socname); + object_initialize_child(obj, "adc", &s->adc, typename); + snprintf(typename, sizeof(typename), "aspeed.i2c-%s", socname); object_initialize_child(obj, "i2c", &s->i2c, typename); @@ -196,7 +199,8 @@ static void aspeed_soc_init(Object *obj) TYPE_FTGMAC100); } - object_initialize_child(obj, "xdma", &s->xdma, TYPE_ASPEED_XDMA); + snprintf(typename, sizeof(typename), TYPE_ASPEED_XDMA "-%s", socname); + object_initialize_child(obj, "xdma", &s->xdma, typename); snprintf(typename, sizeof(typename), "aspeed.gpio-%s", socname); object_initialize_child(obj, "gpio", &s->gpio, typename); @@ -212,6 +216,9 @@ static void aspeed_soc_init(Object *obj) } object_initialize_child(obj, "lpc", &s->lpc, TYPE_ASPEED_LPC); + + snprintf(typename, sizeof(typename), "aspeed.hace-%s", socname); + object_initialize_child(obj, "hace", &s->hace, typename); } static void aspeed_soc_realize(DeviceState *dev, Error **errp) @@ -283,9 +290,17 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq); } - /* UART - attach an 8250 to the IO space as our UART5 */ - serial_mm_init(get_system_memory(), sc->memmap[ASPEED_DEV_UART5], 2, - aspeed_soc_get_irq(s, ASPEED_DEV_UART5), 38400, + /* ADC */ + if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0, + aspeed_soc_get_irq(s, ASPEED_DEV_ADC)); + + /* UART - attach an 8250 to the IO space as our UART */ + serial_mm_init(get_system_memory(), sc->memmap[s->uart_default], 2, + aspeed_soc_get_irq(s, s->uart_default), 38400, serial_hd(0), DEVICE_LITTLE_ENDIAN); /* I2C */ @@ -301,16 +316,12 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp) /* FMC, The number of CS is set at the board level */ object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(s->dram_mr), &error_abort); - if (!object_property_set_int(OBJECT(&s->fmc), "sdram-base", - sc->memmap[ASPEED_DEV_SDRAM], errp)) { - return; - } if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) { return; } sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]); sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 1, - s->fmc.ctrl->flash_window_base); + ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base); sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0, aspeed_soc_get_irq(s, ASPEED_DEV_FMC)); @@ -323,7 +334,7 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0, sc->memmap[ASPEED_DEV_SPI1 + i]); sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 1, - s->spi[i].ctrl->flash_window_base); + ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base); } /* EHCI */ @@ -425,10 +436,22 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->lpc), 1 + aspeed_lpc_kcs_4, qdev_get_gpio_in(DEVICE(&s->lpc), aspeed_lpc_kcs_4)); + + /* HACE */ + object_property_set_link(OBJECT(&s->hace), "dram", OBJECT(s->dram_mr), + &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->hace), 0, sc->memmap[ASPEED_DEV_HACE]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0, + aspeed_soc_get_irq(s, ASPEED_DEV_HACE)); } static Property aspeed_soc_properties[] = { DEFINE_PROP_LINK("dram", AspeedSoCState, dram_mr, TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_UINT32("uart-default", AspeedSoCState, uart_default, + ASPEED_DEV_UART5), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index dcff13433e5..48538c9360c 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -126,6 +126,10 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr", OBJECT(&s->gpu_bus_mr)); + + /* Power Management */ + object_initialize_child(obj, "powermgt", &s->powermgt, + TYPE_BCM2835_POWERMGT); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -364,9 +368,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, INTERRUPT_USB)); + /* Power Management */ + if (!sysbus_realize(SYS_BUS_DEVICE(&s->powermgt), errp)) { + return; + } + + memory_region_add_subregion(&s->peri_mr, PM_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->powermgt), 0)); + create_unimp(s, &s->txp, "bcm2835-txp", TXP_OFFSET, 0x1000); create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40); - create_unimp(s, &s->powermgt, "bcm2835-powermgt", PM_OFFSET, 0x114); create_unimp(s, &s->i2s, "bcm2835-i2s", I2S_OFFSET, 0x100); create_unimp(s, &s->smi, "bcm2835-smi", SMI_OFFSET, 0x100); create_unimp(s, &s->spi[0], "bcm2835-spi0", SPI0_OFFSET, 0x20); diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c index de7ade2878e..24354338cad 100644 --- a/hw/arm/bcm2836.c +++ b/hw/arm/bcm2836.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/module.h" -#include "cpu.h" #include "hw/arm/bcm2836.h" #include "hw/arm/raspi_platform.h" #include "hw/sysbus.h" diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 42e73248c0f..61ae93fa013 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -25,7 +25,6 @@ #include "sysemu/device_tree.h" #include "qemu/config-file.h" #include "qemu/option.h" -#include "exec/address-spaces.h" #include "qemu/units.h" /* Kernel boot protocol is specified in the kernel docs @@ -600,10 +599,23 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, } g_strfreev(node_path); + /* + * We drop all the memory nodes which correspond to empty NUMA nodes + * from the device tree, because the Linux NUMA binding document + * states they should not be generated. Linux will get the NUMA node + * IDs of the empty NUMA nodes from the distance map if they are needed. + * This means QEMU users may be obliged to provide command lines which + * configure distance maps when the empty NUMA node IDs are needed and + * Linux's default distance map isn't sufficient. + */ if (ms->numa_state != NULL && ms->numa_state->num_nodes > 0) { mem_base = binfo->loader_start; for (i = 0; i < ms->numa_state->num_nodes; i++) { mem_len = ms->numa_state->nodes[i].node_mem; + if (!mem_len) { + continue; + } + rc = fdt_add_memory_node(fdt, acells, mem_base, scells, mem_len, i); if (rc < 0) { @@ -1248,6 +1260,15 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info) bool try_decompressing_kernel; fw_cfg = fw_cfg_find(); + + if (!fw_cfg) { + error_report("This machine type does not support loading both " + "a guest firmware/BIOS image and a guest kernel at " + "the same time. You should change your QEMU command " + "line to specify one or the other, but not both."); + exit(1); + } + try_decompressing_kernel = arm_feature(&cpu->env, ARM_FEATURE_AARCH64); diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c index 9d0d728180b..294ba5de6ec 100644 --- a/hw/arm/cubieboard.c +++ b/hw/arm/cubieboard.c @@ -16,11 +16,7 @@ */ #include "qemu/osdep.h" -#include "exec/address-spaces.h" #include "qapi/error.h" -#include "cpu.h" -#include "sysemu/sysemu.h" -#include "hw/sysbus.h" #include "hw/boards.h" #include "hw/qdev-properties.h" #include "hw/arm/allwinner-a10.h" diff --git a/hw/arm/digic_boards.c b/hw/arm/digic_boards.c index 6cdc1d83fca..b771a3d8b74 100644 --- a/hw/arm/digic_boards.c +++ b/hw/arm/digic_boards.c @@ -27,14 +27,11 @@ #include "qapi/error.h" #include "qemu-common.h" #include "qemu/datadir.h" -#include "cpu.h" #include "hw/boards.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "hw/arm/digic.h" #include "hw/block/flash.h" #include "hw/loader.h" -#include "sysemu/sysemu.h" #include "sysemu/qtest.h" #include "qemu/units.h" #include "qemu/cutils.h" diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c index ced2769b102..0299e81f853 100644 --- a/hw/arm/exynos4210.c +++ b/hw/arm/exynos4210.c @@ -23,7 +23,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qemu/log.h" #include "cpu.h" #include "hw/cpu/a9mpcore.h" #include "hw/irq.h" @@ -174,6 +173,9 @@ static DeviceState *pl330_create(uint32_t base, qemu_or_irq *orgate, int i; dev = qdev_new("pl330"); + object_property_set_link(OBJECT(dev), "memory", + OBJECT(get_system_memory()), + &error_fatal); qdev_prop_set_uint8(dev, "num_events", nevents); qdev_prop_set_uint8(dev, "num_chnls", 8); qdev_prop_set_uint8(dev, "num_periph_req", nreq); diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c index 56b729141b5..35dd9875da1 100644 --- a/hw/arm/exynos4_boards.c +++ b/hw/arm/exynos4_boards.c @@ -25,8 +25,6 @@ #include "qemu/units.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "cpu.h" -#include "sysemu/sysemu.h" #include "hw/sysbus.h" #include "net/net.h" #include "hw/arm/boot.h" diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c index 08a98f828fc..24c43745903 100644 --- a/hw/arm/fsl-imx25.c +++ b/hw/arm/fsl-imx25.c @@ -24,10 +24,8 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/arm/fsl-imx25.h" #include "sysemu/sysemu.h" -#include "exec/address-spaces.h" #include "hw/qdev-properties.h" #include "chardev/char.h" diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c index 0983998bb4b..def27bb9136 100644 --- a/hw/arm/fsl-imx31.c +++ b/hw/arm/fsl-imx31.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/arm/fsl-imx31.h" #include "sysemu/sysemu.h" #include "exec/address-spaces.h" diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c index e0128d73161..1d1a708dd97 100644 --- a/hw/arm/fsl-imx6ul.c +++ b/hw/arm/fsl-imx6ul.c @@ -534,6 +534,13 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp) */ create_unimplemented_device("sdma", FSL_IMX6UL_SDMA_ADDR, 0x4000); + /* + * SAI (Audio SSI (Synchronous Serial Interface)) + */ + create_unimplemented_device("sai1", FSL_IMX6UL_SAI1_ADDR, 0x4000); + create_unimplemented_device("sai2", FSL_IMX6UL_SAI2_ADDR, 0x4000); + create_unimplemented_device("sai3", FSL_IMX6UL_SAI3_ADDR, 0x4000); + /* * PWM */ @@ -542,6 +549,11 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp) create_unimplemented_device("pwm3", FSL_IMX6UL_PWM3_ADDR, 0x4000); create_unimplemented_device("pwm4", FSL_IMX6UL_PWM4_ADDR, 0x4000); + /* + * Audio ASRC (asynchronous sample rate converter) + */ + create_unimplemented_device("asrc", FSL_IMX6UL_ASRC_ADDR, 0x4000); + /* * CAN */ diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c index 2ff2cab9246..149885f2b80 100644 --- a/hw/arm/fsl-imx7.c +++ b/hw/arm/fsl-imx7.c @@ -467,6 +467,13 @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp) create_unimplemented_device("can1", FSL_IMX7_CAN1_ADDR, FSL_IMX7_CANn_SIZE); create_unimplemented_device("can2", FSL_IMX7_CAN2_ADDR, FSL_IMX7_CANn_SIZE); + /* + * SAI (Audio SSI (Synchronous Serial Interface)) + */ + create_unimplemented_device("sai1", FSL_IMX7_SAI1_ADDR, FSL_IMX7_SAIn_SIZE); + create_unimplemented_device("sai2", FSL_IMX7_SAI2_ADDR, FSL_IMX7_SAIn_SIZE); + create_unimplemented_device("sai2", FSL_IMX7_SAI3_ADDR, FSL_IMX7_SAIn_SIZE); + /* * OCOTP */ diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index bf886268c57..c3cb315dbc6 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -29,7 +29,6 @@ #include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "hw/boards.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "hw/char/pl011.h" #include "hw/ide/ahci.h" @@ -170,7 +169,7 @@ struct HighbankRegsState { uint32_t regs[NUM_REGS]; }; -static VMStateDescription vmstate_highbank_regs = { +static const VMStateDescription vmstate_highbank_regs = { .name = "highbank-regs", .version_id = 0, .minimum_version_id = 0, diff --git a/hw/arm/imx25_pdk.c b/hw/arm/imx25_pdk.c index 1c201d0d8ed..bd16acd4d9f 100644 --- a/hw/arm/imx25_pdk.c +++ b/hw/arm/imx25_pdk.c @@ -25,12 +25,10 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/qdev-properties.h" #include "hw/arm/fsl-imx25.h" #include "hw/boards.h" #include "qemu/error-report.h" -#include "exec/address-spaces.h" #include "sysemu/qtest.h" #include "hw/i2c/i2c.h" #include "qemu/cutils.h" @@ -67,7 +65,6 @@ static struct arm_boot_info imx25_pdk_binfo; static void imx25_pdk_init(MachineState *machine) { - MachineClass *mc = MACHINE_GET_CLASS(machine); IMX25PDK *s = g_new0(IMX25PDK, 1); unsigned int ram_size; unsigned int alias_offset; @@ -79,8 +76,8 @@ static void imx25_pdk_init(MachineState *machine) /* We need to initialize our memory */ if (machine->ram_size > (FSL_IMX25_SDRAM0_SIZE + FSL_IMX25_SDRAM1_SIZE)) { - char *sz = size_to_str(mc->default_ram_size); - error_report("Invalid RAM size, should be %s", sz); + char *sz = size_to_str(FSL_IMX25_SDRAM0_SIZE + FSL_IMX25_SDRAM1_SIZE); + error_report("RAM size more than %s is not supported", sz); g_free(sz); exit(EXIT_FAILURE); } diff --git a/hw/arm/kzm.c b/hw/arm/kzm.c index e3f7d4ead23..39559c44c29 100644 --- a/hw/arm/kzm.c +++ b/hw/arm/kzm.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/arm/fsl-imx31.h" #include "hw/boards.h" #include "qemu/error-report.h" diff --git a/hw/arm/mcimx6ul-evk.c b/hw/arm/mcimx6ul-evk.c index ed69a7b037a..77fae874b16 100644 --- a/hw/arm/mcimx6ul-evk.c +++ b/hw/arm/mcimx6ul-evk.c @@ -15,7 +15,6 @@ #include "hw/arm/fsl-imx6ul.h" #include "hw/boards.h" #include "hw/qdev-properties.h" -#include "sysemu/sysemu.h" #include "qemu/error-report.h" #include "sysemu/qtest.h" @@ -68,7 +67,7 @@ static void mcimx6ul_evk_init(MachineState *machine) static void mcimx6ul_evk_machine_init(MachineClass *mc) { - mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex A7)"; + mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex-A7)"; mc->init = mcimx6ul_evk_init; mc->max_cpus = FSL_IMX6UL_NUM_CPUS; mc->default_ram_id = "mcimx6ul-evk.ram"; diff --git a/hw/arm/mcimx7d-sabre.c b/hw/arm/mcimx7d-sabre.c index e57d52b3441..935d4b0f1cd 100644 --- a/hw/arm/mcimx7d-sabre.c +++ b/hw/arm/mcimx7d-sabre.c @@ -17,7 +17,6 @@ #include "hw/arm/fsl-imx7.h" #include "hw/boards.h" #include "hw/qdev-properties.h" -#include "sysemu/sysemu.h" #include "qemu/error-report.h" #include "sysemu/qtest.h" @@ -68,7 +67,7 @@ static void mcimx7d_sabre_init(MachineState *machine) static void mcimx7d_sabre_machine_init(MachineClass *mc) { - mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex A7)"; + mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex-A7)"; mc->init = mcimx7d_sabre_init; mc->max_cpus = FSL_IMX7_NUM_CPUS; mc->default_ram_id = "mcimx7d-sabre.ram"; diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 79a9acd0be8..01ecc20324f 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -24,6 +24,7 @@ arm_ss.add(when: 'CONFIG_Z2', if_true: files('z2.c')) arm_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview.c')) arm_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa-ref.c')) arm_ss.add(when: 'CONFIG_STELLARIS', if_true: files('stellaris.c')) +arm_ss.add(when: 'CONFIG_STM32VLDISCOVERY', if_true: files('stm32vldiscovery.c')) arm_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c')) arm_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c')) arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) @@ -44,6 +45,7 @@ arm_ss.add(when: 'CONFIG_STRONGARM', if_true: files('strongarm.c')) arm_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 'cubieboard.c')) arm_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 'orangepi.c')) arm_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_peripherals.c', 'bcm2836.c', 'raspi.c')) +arm_ss.add(when: 'CONFIG_STM32F100_SOC', if_true: files('stm32f100_soc.c')) arm_ss.add(when: 'CONFIG_STM32F205_SOC', if_true: files('stm32f205_soc.c')) arm_ss.add(when: 'CONFIG_STM32F405_SOC', if_true: files('stm32f405_soc.c')) arm_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp.c', 'xlnx-zcu102.c')) diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c index 25016e464d9..f40e854dec7 100644 --- a/hw/arm/mps2-tz.c +++ b/hw/arm/mps2-tz.c @@ -55,6 +55,7 @@ #include "hw/boards.h" #include "exec/address-spaces.h" #include "sysemu/sysemu.h" +#include "sysemu/reset.h" #include "hw/misc/unimp.h" #include "hw/char/cmsdk-apb-uart.h" #include "hw/timer/cmsdk-apb-timer.h" @@ -72,6 +73,7 @@ #include "hw/core/split-irq.h" #include "hw/qdev-clock.h" #include "qom/object.h" +#include "hw/irq.h" #define MPS2TZ_NUMIRQ_MAX 96 #define MPS2TZ_RAM_MAX 5 @@ -121,8 +123,10 @@ struct MPS2TZMachineClass { int numirq; /* Number of external interrupts */ int uart_overflow_irq; /* number of the combined UART overflow IRQ */ uint32_t init_svtor; /* init-svtor setting for SSE */ + uint32_t sram_addr_width; /* SRAM_ADDR_WIDTH setting for SSE */ const RAMInfo *raminfo; const char *armsse_type; + uint32_t boot_ram_size; /* size of ram at address 0; 0 == find in raminfo */ }; struct MPS2TZMachineState { @@ -153,6 +157,9 @@ struct MPS2TZMachineState { SplitIRQ cpu_irq_splitter[MPS2TZ_NUMIRQ_MAX]; Clock *sysclk; Clock *s32kclk; + + bool remap; + qemu_irq remap_irq; }; #define TYPE_MPS2TZ_MACHINE "mps2tz" @@ -228,25 +235,23 @@ static const RAMInfo an505_raminfo[] = { { }, }; +/* + * Note that the addresses and MPC numbering here should match up + * with those used in remap_memory(), which can swap the BRAM and QSPI. + */ static const RAMInfo an524_raminfo[] = { { .name = "bram", .base = 0x00000000, .size = 512 * KiB, .mpc = 0, .mrindex = 0, - }, { - .name = "sram", - .base = 0x20000000, - .size = 32 * 4 * KiB, - .mpc = -1, - .mrindex = 1, }, { /* We don't model QSPI flash yet; for now expose it as simple ROM */ .name = "QSPI", .base = 0x28000000, .size = 8 * MiB, .mpc = 1, - .mrindex = 2, + .mrindex = 1, .flags = IS_ROM, }, { .name = "DDR", @@ -260,23 +265,11 @@ static const RAMInfo an524_raminfo[] = { { }; static const RAMInfo an547_raminfo[] = { { - .name = "itcm", - .base = 0x00000000, - .size = 512 * KiB, - .mpc = -1, - .mrindex = 0, - }, { .name = "sram", .base = 0x01000000, .size = 2 * MiB, .mpc = 0, .mrindex = 1, - }, { - .name = "dtcm", - .base = 0x20000000, - .size = 4 * 128 * KiB, - .mpc = -1, - .mrindex = 2, }, { .name = "sram 2", .base = 0x21000000, @@ -380,6 +373,11 @@ static qemu_irq get_sse_irq_in(MPS2TZMachineState *mms, int irqno) } } +/* Union describing the device-specific extra data we pass to the devfn. */ +typedef union PPCExtraData { + bool i2c_internal; +} PPCExtraData; + /* Most of the devices in the AN505 FPGA image sit behind * Peripheral Protection Controllers. These data structures * define the layout of which devices sit behind which PPCs. @@ -389,7 +387,8 @@ static qemu_irq get_sse_irq_in(MPS2TZMachineState *mms, int irqno) */ typedef MemoryRegion *MakeDevFn(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs); + const int *irqs, + const PPCExtraData *extradata); typedef struct PPCPortInfo { const char *name; @@ -398,6 +397,7 @@ typedef struct PPCPortInfo { hwaddr addr; hwaddr size; int irqs[3]; /* currently no device needs more IRQ lines than this */ + PPCExtraData extradata; /* to pass device-specific info to the devfn */ } PPCPortInfo; typedef struct PPCInfo { @@ -408,7 +408,8 @@ typedef struct PPCInfo { static MemoryRegion *make_unimp_dev(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, + const PPCExtraData *extradata) { /* Initialize, configure and realize a TYPE_UNIMPLEMENTED_DEVICE, * and return a pointer to its MemoryRegion. @@ -424,7 +425,7 @@ static MemoryRegion *make_unimp_dev(MPS2TZMachineState *mms, static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { /* The irq[] array is tx, rx, combined, in that order */ MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms); @@ -448,7 +449,7 @@ static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque, static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { MPS2SCC *scc = opaque; DeviceState *sccdev; @@ -457,6 +458,7 @@ static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque, object_initialize_child(OBJECT(mms), "scc", scc, TYPE_MPS2_SCC); sccdev = DEVICE(scc); + qdev_prop_set_uint32(sccdev, "scc-cfg0", mms->remap ? 1 : 0); qdev_prop_set_uint32(sccdev, "scc-cfg4", 0x2); qdev_prop_set_uint32(sccdev, "scc-aid", 0x00200008); qdev_prop_set_uint32(sccdev, "scc-id", mmc->scc_id); @@ -471,7 +473,7 @@ static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque, static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { MPS2FPGAIO *fpgaio = opaque; MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms); @@ -486,7 +488,8 @@ static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque, static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, + const PPCExtraData *extradata) { SysBusDevice *s; NICInfo *nd = &nd_table[0]; @@ -506,7 +509,8 @@ static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque, static MemoryRegion *make_eth_usb(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, + const PPCExtraData *extradata) { /* * The AN524 makes the ethernet and USB share a PPC port. @@ -549,7 +553,7 @@ static MemoryRegion *make_eth_usb(MPS2TZMachineState *mms, void *opaque, static MemoryRegion *make_mpc(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { TZMPC *mpc = opaque; int i = mpc - &mms->mpc[0]; @@ -573,9 +577,55 @@ static MemoryRegion *make_mpc(MPS2TZMachineState *mms, void *opaque, return sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 0); } +static hwaddr boot_mem_base(MPS2TZMachineState *mms) +{ + /* + * Return the canonical address of the block which will be mapped + * at address 0x0 (i.e. where the vector table is). + * This is usually 0, but if the AN524 alternate memory map is + * enabled it will be the base address of the QSPI block. + */ + return mms->remap ? 0x28000000 : 0; +} + +static void remap_memory(MPS2TZMachineState *mms, int map) +{ + /* + * Remap the memory for the AN524. 'map' is the value of + * SCC CFG_REG0 bit 0, i.e. 0 for the default map and 1 + * for the "option 1" mapping where QSPI is at address 0. + * + * Effectively we need to swap around the "upstream" ends of + * MPC 0 and MPC 1. + */ + MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms); + int i; + + if (mmc->fpga_type != FPGA_AN524) { + return; + } + + memory_region_transaction_begin(); + for (i = 0; i < 2; i++) { + TZMPC *mpc = &mms->mpc[i]; + MemoryRegion *upstream = sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 1); + hwaddr addr = (i ^ map) ? 0x28000000 : 0; + + memory_region_set_address(upstream, addr); + } + memory_region_transaction_commit(); +} + +static void remap_irq_fn(void *opaque, int n, int level) +{ + MPS2TZMachineState *mms = opaque; + + remap_memory(mms, level); +} + static MemoryRegion *make_dma(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { /* The irq[] array is DMACINTR, DMACINTERR, DMACINTTC, in that order */ PL080State *dma = opaque; @@ -632,7 +682,7 @@ static MemoryRegion *make_dma(MPS2TZMachineState *mms, void *opaque, static MemoryRegion *make_spi(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { /* * The AN505 has five PL022 SPI controllers. @@ -654,7 +704,7 @@ static MemoryRegion *make_spi(MPS2TZMachineState *mms, void *opaque, static MemoryRegion *make_i2c(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { ArmSbconI2CState *i2c = opaque; SysBusDevice *s; @@ -662,12 +712,26 @@ static MemoryRegion *make_i2c(MPS2TZMachineState *mms, void *opaque, object_initialize_child(OBJECT(mms), name, i2c, TYPE_ARM_SBCON_I2C); s = SYS_BUS_DEVICE(i2c); sysbus_realize(s, &error_fatal); + + /* + * If this is an internal-use-only i2c bus, mark it full + * so that user-created i2c devices are not plugged into it. + * If we implement models of any on-board i2c devices that + * plug in to one of the internal-use-only buses, then we will + * need to create and plugging those in here before we mark the + * bus as full. + */ + if (extradata->i2c_internal) { + BusState *qbus = qdev_get_child_bus(DEVICE(i2c), "i2c"); + qbus_mark_full(qbus); + } + return sysbus_mmio_get_region(s, 0); } static MemoryRegion *make_rtc(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, - const int *irqs) + const int *irqs, const PPCExtraData *extradata) { PL031State *pl031 = opaque; SysBusDevice *s; @@ -710,8 +774,16 @@ static uint32_t boot_ram_size(MPS2TZMachineState *mms) const RAMInfo *p; MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms); + /* + * Use a per-board specification (for when the boot RAM is in + * the SSE and so doesn't have a RAMInfo list entry) + */ + if (mmc->boot_ram_size) { + return mmc->boot_ram_size; + } + for (p = mmc->raminfo; p->name; p++) { - if (p->base == 0) { + if (p->base == boot_mem_base(mms)) { return p->size; } } @@ -756,6 +828,7 @@ static void mps2tz_common_init(MachineState *machine) OBJECT(system_memory), &error_abort); qdev_prop_set_uint32(iotkitdev, "EXP_NUMIRQ", mmc->numirq); qdev_prop_set_uint32(iotkitdev, "init-svtor", mmc->init_svtor); + qdev_prop_set_uint32(iotkitdev, "SRAM_ADDR_WIDTH", mmc->sram_addr_width); qdev_connect_clock_in(iotkitdev, "MAINCLK", mms->sysclk); qdev_connect_clock_in(iotkitdev, "S32KCLK", mms->s32kclk); sysbus_realize(SYS_BUS_DEVICE(&mms->iotkit), &error_fatal); @@ -863,10 +936,14 @@ static void mps2tz_common_init(MachineState *machine) { "uart2", make_uart, &mms->uart[2], 0x40202000, 0x1000, { 36, 37, 44 } }, { "uart3", make_uart, &mms->uart[3], 0x40203000, 0x1000, { 38, 39, 45 } }, { "uart4", make_uart, &mms->uart[4], 0x40204000, 0x1000, { 40, 41, 46 } }, - { "i2c0", make_i2c, &mms->i2c[0], 0x40207000, 0x1000 }, - { "i2c1", make_i2c, &mms->i2c[1], 0x40208000, 0x1000 }, - { "i2c2", make_i2c, &mms->i2c[2], 0x4020c000, 0x1000 }, - { "i2c3", make_i2c, &mms->i2c[3], 0x4020d000, 0x1000 }, + { "i2c0", make_i2c, &mms->i2c[0], 0x40207000, 0x1000, {}, + { .i2c_internal = true /* touchscreen */ } }, + { "i2c1", make_i2c, &mms->i2c[1], 0x40208000, 0x1000, {}, + { .i2c_internal = true /* audio conf */ } }, + { "i2c2", make_i2c, &mms->i2c[2], 0x4020c000, 0x1000, {}, + { .i2c_internal = false /* shield 0 */ } }, + { "i2c3", make_i2c, &mms->i2c[3], 0x4020d000, 0x1000, {}, + { .i2c_internal = false /* shield 1 */ } }, }, }, { .name = "apb_ppcexp2", @@ -907,15 +984,20 @@ static void mps2tz_common_init(MachineState *machine) }, { .name = "apb_ppcexp1", .ports = { - { "i2c0", make_i2c, &mms->i2c[0], 0x41200000, 0x1000 }, - { "i2c1", make_i2c, &mms->i2c[1], 0x41201000, 0x1000 }, + { "i2c0", make_i2c, &mms->i2c[0], 0x41200000, 0x1000, {}, + { .i2c_internal = true /* touchscreen */ } }, + { "i2c1", make_i2c, &mms->i2c[1], 0x41201000, 0x1000, {}, + { .i2c_internal = true /* audio conf */ } }, { "spi0", make_spi, &mms->spi[0], 0x41202000, 0x1000, { 52 } }, { "spi1", make_spi, &mms->spi[1], 0x41203000, 0x1000, { 53 } }, { "spi2", make_spi, &mms->spi[2], 0x41204000, 0x1000, { 54 } }, - { "i2c2", make_i2c, &mms->i2c[2], 0x41205000, 0x1000 }, - { "i2c3", make_i2c, &mms->i2c[3], 0x41206000, 0x1000 }, + { "i2c2", make_i2c, &mms->i2c[2], 0x41205000, 0x1000, {}, + { .i2c_internal = false /* shield 0 */ } }, + { "i2c3", make_i2c, &mms->i2c[3], 0x41206000, 0x1000, {}, + { .i2c_internal = false /* shield 1 */ } }, { /* port 7 reserved */ }, - { "i2c4", make_i2c, &mms->i2c[4], 0x41208000, 0x1000 }, + { "i2c4", make_i2c, &mms->i2c[4], 0x41208000, 0x1000, {}, + { .i2c_internal = true /* DDR4 EEPROM */ } }, }, }, { .name = "apb_ppcexp2", @@ -957,15 +1039,20 @@ static void mps2tz_common_init(MachineState *machine) }, { .name = "apb_ppcexp1", .ports = { - { "i2c0", make_i2c, &mms->i2c[0], 0x49200000, 0x1000 }, - { "i2c1", make_i2c, &mms->i2c[1], 0x49201000, 0x1000 }, + { "i2c0", make_i2c, &mms->i2c[0], 0x49200000, 0x1000, {}, + { .i2c_internal = true /* touchscreen */ } }, + { "i2c1", make_i2c, &mms->i2c[1], 0x49201000, 0x1000, {}, + { .i2c_internal = true /* audio conf */ } }, { "spi0", make_spi, &mms->spi[0], 0x49202000, 0x1000, { 53 } }, { "spi1", make_spi, &mms->spi[1], 0x49203000, 0x1000, { 54 } }, { "spi2", make_spi, &mms->spi[2], 0x49204000, 0x1000, { 55 } }, - { "i2c2", make_i2c, &mms->i2c[2], 0x49205000, 0x1000 }, - { "i2c3", make_i2c, &mms->i2c[3], 0x49206000, 0x1000 }, + { "i2c2", make_i2c, &mms->i2c[2], 0x49205000, 0x1000, {}, + { .i2c_internal = false /* shield 0 */ } }, + { "i2c3", make_i2c, &mms->i2c[3], 0x49206000, 0x1000, {}, + { .i2c_internal = false /* shield 1 */ } }, { /* port 7 reserved */ }, - { "i2c4", make_i2c, &mms->i2c[4], 0x49208000, 0x1000 }, + { "i2c4", make_i2c, &mms->i2c[4], 0x49208000, 0x1000, {}, + { .i2c_internal = true /* DDR4 EEPROM */ } }, }, }, { .name = "apb_ppcexp2", @@ -1035,7 +1122,7 @@ static void mps2tz_common_init(MachineState *machine) } mr = pinfo->devfn(mms, pinfo->opaque, pinfo->name, pinfo->size, - pinfo->irqs); + pinfo->irqs, &pinfo->extradata); portname = g_strdup_printf("port[%d]", port); object_property_set_link(OBJECT(ppc), portname, OBJECT(mr), &error_fatal); @@ -1095,6 +1182,16 @@ static void mps2tz_common_init(MachineState *machine) create_non_mpc_ram(mms); + if (mmc->fpga_type == FPGA_AN524) { + /* + * Connect the line from the SCC so that we can remap when the + * guest updates that register. + */ + mms->remap_irq = qemu_allocate_irq(remap_irq_fn, mms, 0); + qdev_connect_gpio_out_named(DEVICE(&mms->scc), "remap", 0, + mms->remap_irq); + } + armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, boot_ram_size(mms)); } @@ -1117,12 +1214,47 @@ static void mps2_tz_idau_check(IDAUInterface *ii, uint32_t address, *iregion = region; } +static char *mps2_get_remap(Object *obj, Error **errp) +{ + MPS2TZMachineState *mms = MPS2TZ_MACHINE(obj); + const char *val = mms->remap ? "QSPI" : "BRAM"; + return g_strdup(val); +} + +static void mps2_set_remap(Object *obj, const char *value, Error **errp) +{ + MPS2TZMachineState *mms = MPS2TZ_MACHINE(obj); + + if (!strcmp(value, "BRAM")) { + mms->remap = false; + } else if (!strcmp(value, "QSPI")) { + mms->remap = true; + } else { + error_setg(errp, "Invalid remap value"); + error_append_hint(errp, "Valid values are BRAM and QSPI.\n"); + } +} + +static void mps2_machine_reset(MachineState *machine) +{ + MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine); + + /* + * Set the initial memory mapping before triggering the reset of + * the rest of the system, so that the guest image loader and CPU + * reset see the correct mapping. + */ + remap_memory(mms, mms->remap); + qemu_devices_reset(); +} + static void mps2tz_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); IDAUInterfaceClass *iic = IDAU_INTERFACE_CLASS(oc); mc->init = mps2tz_common_init; + mc->reset = mps2_machine_reset; iic->check = mps2_tz_idau_check; } @@ -1168,8 +1300,10 @@ static void mps2tz_an505_class_init(ObjectClass *oc, void *data) mmc->numirq = 92; mmc->uart_overflow_irq = 47; mmc->init_svtor = 0x10000000; + mmc->sram_addr_width = 15; mmc->raminfo = an505_raminfo; mmc->armsse_type = TYPE_IOTKIT; + mmc->boot_ram_size = 0; mps2tz_set_default_ram_info(mmc); } @@ -1195,8 +1329,10 @@ static void mps2tz_an521_class_init(ObjectClass *oc, void *data) mmc->numirq = 92; mmc->uart_overflow_irq = 47; mmc->init_svtor = 0x10000000; + mmc->sram_addr_width = 15; mmc->raminfo = an505_raminfo; /* AN521 is the same as AN505 here */ mmc->armsse_type = TYPE_SSE200; + mmc->boot_ram_size = 0; mps2tz_set_default_ram_info(mmc); } @@ -1222,9 +1358,16 @@ static void mps3tz_an524_class_init(ObjectClass *oc, void *data) mmc->numirq = 95; mmc->uart_overflow_irq = 47; mmc->init_svtor = 0x10000000; + mmc->sram_addr_width = 15; mmc->raminfo = an524_raminfo; mmc->armsse_type = TYPE_SSE200; + mmc->boot_ram_size = 0; mps2tz_set_default_ram_info(mmc); + + object_class_property_add_str(oc, "remap", mps2_get_remap, mps2_set_remap); + object_class_property_set_description(oc, "remap", + "Set memory mapping. Valid values " + "are BRAM (default) and QSPI."); } static void mps3tz_an547_class_init(ObjectClass *oc, void *data) @@ -1249,8 +1392,10 @@ static void mps3tz_an547_class_init(ObjectClass *oc, void *data) mmc->numirq = 96; mmc->uart_overflow_irq = 48; mmc->init_svtor = 0x00000000; + mmc->sram_addr_width = 21; mmc->raminfo = an547_raminfo; mmc->armsse_type = TYPE_SSE300; + mmc->boot_ram_size = 512 * KiB; mps2tz_set_default_ram_info(mmc); } diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c index 81413b7133e..bb76fa68890 100644 --- a/hw/arm/mps2.c +++ b/hw/arm/mps2.c @@ -86,6 +86,7 @@ struct MPS2MachineState { CMSDKAPBWatchdog watchdog; CMSDKAPBTimer timer[2]; Clock *sysclk; + Clock *refclk; }; #define TYPE_MPS2_MACHINE "mps2" @@ -99,6 +100,15 @@ OBJECT_DECLARE_TYPE(MPS2MachineState, MPS2MachineClass, MPS2_MACHINE) /* Main SYSCLK frequency in Hz */ #define SYSCLK_FRQ 25000000 +/* + * The Application Notes don't say anything about how the + * systick reference clock is configured. (Quite possibly + * they don't have one at all.) This 1MHz clock matches the + * pre-existing behaviour that used to be hardcoded in the + * armv7m_systick implementation. + */ +#define REFCLK_FRQ (1 * 1000 * 1000) + /* Initialize the auxiliary RAM region @mr and map it into * the memory map at @base. */ @@ -146,6 +156,9 @@ static void mps2_common_init(MachineState *machine) mms->sysclk = clock_new(OBJECT(machine), "SYSCLK"); clock_set_hz(mms->sysclk, SYSCLK_FRQ); + mms->refclk = clock_new(OBJECT(machine), "REFCLK"); + clock_set_hz(mms->refclk, REFCLK_FRQ); + /* The FPGA images have an odd combination of different RAMs, * because in hardware they are different implementations and * connected to different buses, giving varying performance/size @@ -223,6 +236,8 @@ static void mps2_common_init(MachineState *machine) default: g_assert_not_reached(); } + qdev_connect_clock_in(armv7m, "cpuclk", mms->sysclk); + qdev_connect_clock_in(armv7m, "refclk", mms->refclk); qdev_prop_set_string(armv7m, "cpu-type", machine->cpu_type); qdev_prop_set_bit(armv7m, "enable-bitband", true); object_property_set_link(OBJECT(&mms->armv7m), "memory", @@ -413,7 +428,17 @@ static void mps2_common_init(MachineState *machine) 0x40023000, /* Audio */ 0x40029000, /* Shield0 */ 0x4002a000}; /* Shield1 */ - sysbus_create_simple(TYPE_ARM_SBCON_I2C, i2cbase[i], NULL); + DeviceState *dev; + + dev = sysbus_create_simple(TYPE_ARM_SBCON_I2C, i2cbase[i], NULL); + if (i < 2) { + /* + * internal-only bus: mark it full to avoid user-created + * i2c devices being plugged into it. + */ + BusState *qbus = qdev_get_child_bus(dev, "i2c"); + qbus_mark_full(qbus); + } } create_unimplemented_device("i2s", 0x40024000, 0x400); @@ -424,8 +449,6 @@ static void mps2_common_init(MachineState *machine) qdev_get_gpio_in(armv7m, mmc->fpga_type == FPGA_AN511 ? 47 : 13)); - system_clock_scale = NANOSECONDS_PER_SECOND / SYSCLK_FRQ; - armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0x400000); } diff --git a/hw/arm/msf2-soc.c b/hw/arm/msf2-soc.c index d2c29e82d13..b5fe9f364d5 100644 --- a/hw/arm/msf2-soc.c +++ b/hw/arm/msf2-soc.c @@ -27,9 +27,9 @@ #include "qapi/error.h" #include "exec/address-spaces.h" #include "hw/char/serial.h" -#include "hw/irq.h" #include "hw/arm/msf2-soc.h" #include "hw/misc/unimp.h" +#include "hw/qdev-clock.h" #include "sysemu/sysemu.h" #define MSF2_TIMER_BASE 0x40004000 @@ -74,6 +74,9 @@ static void m2sxxx_soc_initfn(Object *obj) } object_initialize_child(obj, "emac", &s->emac, TYPE_MSS_EMAC); + + s->m3clk = qdev_init_clock_in(DEVICE(obj), "m3clk", NULL, NULL, 0); + s->refclk = qdev_init_clock_in(DEVICE(obj), "refclk", NULL, NULL, 0); } static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp) @@ -84,11 +87,34 @@ static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp) int i; MemoryRegion *system_memory = get_system_memory(); - MemoryRegion *nvm = g_new(MemoryRegion, 1); - MemoryRegion *nvm_alias = g_new(MemoryRegion, 1); - MemoryRegion *sram = g_new(MemoryRegion, 1); - memory_region_init_rom(nvm, OBJECT(dev_soc), "MSF2.eNVM", s->envm_size, + if (!clock_has_source(s->m3clk)) { + error_setg(errp, "m3clk must be wired up by the board code"); + return; + } + + /* + * We use s->refclk internally and only define it with qdev_init_clock_in() + * so it is correctly parented and not leaked on an init/deinit; it is not + * intended as an externally exposed clock. + */ + if (clock_has_source(s->refclk)) { + error_setg(errp, "refclk must not be wired up by the board code"); + return; + } + + /* + * TODO: ideally we should model the SoC SYSTICK_CR register at 0xe0042038, + * which allows the guest to program the divisor between the m3clk and + * the systick refclk to either /4, /8, /16 or /32, as well as setting + * the value the guest can read in the STCALIB register. Currently we + * implement the divisor as a fixed /32, which matches the reset value + * of SYSTICK_CR. + */ + clock_set_mul_div(s->refclk, 32, 1); + clock_set_source(s->refclk, s->m3clk); + + memory_region_init_rom(&s->nvm, OBJECT(dev_soc), "MSF2.eNVM", s->envm_size, &error_fatal); /* * On power-on, the eNVM region 0x60000000 is automatically @@ -96,34 +122,28 @@ static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp) * start address (0x0). We do not support remapping other eNVM, * eSRAM and DDR regions by guest(via Sysreg) currently. */ - memory_region_init_alias(nvm_alias, OBJECT(dev_soc), "MSF2.eNVM", nvm, 0, - s->envm_size); + memory_region_init_alias(&s->nvm_alias, OBJECT(dev_soc), "MSF2.eNVM", + &s->nvm, 0, s->envm_size); - memory_region_add_subregion(system_memory, ENVM_BASE_ADDRESS, nvm); - memory_region_add_subregion(system_memory, 0, nvm_alias); + memory_region_add_subregion(system_memory, ENVM_BASE_ADDRESS, &s->nvm); + memory_region_add_subregion(system_memory, 0, &s->nvm_alias); - memory_region_init_ram(sram, NULL, "MSF2.eSRAM", s->esram_size, + memory_region_init_ram(&s->sram, NULL, "MSF2.eSRAM", s->esram_size, &error_fatal); - memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram); + memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram); armv7m = DEVICE(&s->armv7m); qdev_prop_set_uint32(armv7m, "num-irq", 81); qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type); qdev_prop_set_bit(armv7m, "enable-bitband", true); + qdev_connect_clock_in(armv7m, "cpuclk", s->m3clk); + qdev_connect_clock_in(armv7m, "refclk", s->refclk); object_property_set_link(OBJECT(&s->armv7m), "memory", OBJECT(get_system_memory()), &error_abort); if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) { return; } - if (!s->m3clk) { - error_setg(errp, "Invalid m3clk value"); - error_append_hint(errp, "m3clk can not be zero\n"); - return; - } - - system_clock_scale = NANOSECONDS_PER_SECOND / s->m3clk; - for (i = 0; i < MSF2_NUM_UARTS; i++) { if (serial_hd(i)) { serial_mm_init(get_system_memory(), uart_addr[i], 2, @@ -133,8 +153,13 @@ static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp) } dev = DEVICE(&s->timer); - /* APB0 clock is the timer input clock */ - qdev_prop_set_uint32(dev, "clock-frequency", s->m3clk / s->apb0div); + /* + * APB0 clock is the timer input clock. + * TODO: ideally the MSF2 timer device should use a Clock rather than a + * clock-frequency integer property. + */ + qdev_prop_set_uint32(dev, "clock-frequency", + clock_get_hz(s->m3clk) / s->apb0div); if (!sysbus_realize(SYS_BUS_DEVICE(&s->timer), errp)) { return; } @@ -211,8 +236,6 @@ static Property m2sxxx_soc_properties[] = { DEFINE_PROP_UINT64("eNVM-size", MSF2State, envm_size, MSF2_ENVM_MAX_SIZE), DEFINE_PROP_UINT64("eSRAM-size", MSF2State, esram_size, MSF2_ESRAM_MAX_SIZE), - /* Libero GUI shows 100Mhz as default for clocks */ - DEFINE_PROP_UINT32("m3clk", MSF2State, m3clk, 100 * 1000000), /* default divisors in Libero GUI */ DEFINE_PROP_UINT8("apb0div", MSF2State, apb0div, 2), DEFINE_PROP_UINT8("apb1div", MSF2State, apb1div, 2), diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c index f9b61c36ddb..396e8b99138 100644 --- a/hw/arm/msf2-som.c +++ b/hw/arm/msf2-som.c @@ -29,9 +29,9 @@ #include "hw/boards.h" #include "hw/qdev-properties.h" #include "hw/arm/boot.h" +#include "hw/qdev-clock.h" #include "exec/address-spaces.h" #include "hw/arm/msf2-soc.h" -#include "cpu.h" #define DDR_BASE_ADDRESS 0xA0000000 #define DDR_SIZE (64 * MiB) @@ -50,6 +50,7 @@ static void emcraft_sf2_s2s010_init(MachineState *machine) BusState *spi_bus; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ddr = g_new(MemoryRegion, 1); + Clock *m3clk; if (strcmp(machine->cpu_type, mc->default_cpu_type) != 0) { error_report("This board can only be used with CPU %s", @@ -73,7 +74,10 @@ static void emcraft_sf2_s2s010_init(MachineState *machine) * in Libero. CPU clock is divided by APB0 and APB1 divisors for * peripherals. Emcraft's SoM kit comes with these settings by default. */ - qdev_prop_set_uint32(dev, "m3clk", 142 * 1000000); + /* This clock doesn't need migration because it is fixed-frequency */ + m3clk = clock_new(OBJECT(machine), "m3clk"); + clock_set_hz(m3clk, 142 * 1000000); + qdev_connect_clock_in(dev, "m3clk", m3clk); qdev_prop_set_uint32(dev, "apb0div", 2); qdev_prop_set_uint32(dev, "apb1div", 2); diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index 9cebece2de0..2d612cc0c9b 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -19,7 +19,6 @@ #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/char/serial.h" -#include "hw/hw.h" #include "qemu/timer.h" #include "hw/ptimer.h" #include "hw/qdev-properties.h" @@ -32,7 +31,6 @@ #include "sysemu/block-backend.h" #include "sysemu/runstate.h" #include "sysemu/dma.h" -#include "exec/address-spaces.h" #include "ui/pixel_ops.h" #include "qemu/cutils.h" #include "qom/object.h" diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c index 1733b71507c..3365da11bf7 100644 --- a/hw/arm/netduino2.c +++ b/hw/arm/netduino2.c @@ -26,6 +26,7 @@ #include "qapi/error.h" #include "hw/boards.h" #include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" #include "qemu/error-report.h" #include "hw/arm/stm32f205_soc.h" #include "hw/arm/boot.h" @@ -36,16 +37,15 @@ static void netduino2_init(MachineState *machine) { DeviceState *dev; + Clock *sysclk; - /* - * TODO: ideally we would model the SoC RCC and let it handle - * system_clock_scale, including its ability to define different - * possible SYSCLK sources. - */ - system_clock_scale = NANOSECONDS_PER_SECOND / SYSCLK_FRQ; + /* This clock doesn't need migration because it is fixed-frequency */ + sysclk = clock_new(OBJECT(machine), "SYSCLK"); + clock_set_hz(sysclk, SYSCLK_FRQ); dev = qdev_new(TYPE_STM32F205_SOC); qdev_prop_set_string(dev, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m3")); + qdev_connect_clock_in(dev, "sysclk", sysclk); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, diff --git a/hw/arm/netduinoplus2.c b/hw/arm/netduinoplus2.c index d3ad7a2b675..76cea8e4891 100644 --- a/hw/arm/netduinoplus2.c +++ b/hw/arm/netduinoplus2.c @@ -26,6 +26,7 @@ #include "qapi/error.h" #include "hw/boards.h" #include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" #include "qemu/error-report.h" #include "hw/arm/stm32f405_soc.h" #include "hw/arm/boot.h" @@ -36,16 +37,15 @@ static void netduinoplus2_init(MachineState *machine) { DeviceState *dev; + Clock *sysclk; - /* - * TODO: ideally we would model the SoC RCC and let it handle - * system_clock_scale, including its ability to define different - * possible SYSCLK sources. - */ - system_clock_scale = NANOSECONDS_PER_SECOND / SYSCLK_FRQ; + /* This clock doesn't need migration because it is fixed-frequency */ + sysclk = clock_new(OBJECT(machine), "SYSCLK"); + clock_set_hz(sysclk, SYSCLK_FRQ); dev = qdev_new(TYPE_STM32F405_SOC); qdev_prop_set_string(dev, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m4")); + qdev_connect_clock_in(dev, "sysclk", sysclk); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); armv7m_load_kernel(ARM_CPU(first_cpu), diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c index 495b0f8e91d..878c2208e07 100644 --- a/hw/arm/npcm7xx.c +++ b/hw/arm/npcm7xx.c @@ -16,7 +16,6 @@ #include "qemu/osdep.h" -#include "exec/address-spaces.h" #include "hw/arm/boot.h" #include "hw/arm/npcm7xx.h" #include "hw/char/serial.h" @@ -64,6 +63,8 @@ #define NPCM7XX_ROM_BA (0xffff0000) #define NPCM7XX_ROM_SZ (64 * KiB) +/* SDHCI Modules */ +#define NPCM7XX_MMC_BA (0xf0842000) /* Clock configuration values to be fixed up when bypassing bootloader */ @@ -84,6 +85,7 @@ enum NPCM7xxInterrupt { NPCM7XX_UART3_IRQ, NPCM7XX_EMC1RX_IRQ = 15, NPCM7XX_EMC1TX_IRQ, + NPCM7XX_MMC_IRQ = 26, NPCM7XX_TIMER0_IRQ = 32, /* Timer Module 0 */ NPCM7XX_TIMER1_IRQ, NPCM7XX_TIMER2_IRQ, @@ -444,6 +446,8 @@ static void npcm7xx_init(Object *obj) for (i = 0; i < ARRAY_SIZE(s->emc); i++) { object_initialize_child(obj, "emc[*]", &s->emc[i], TYPE_NPCM7XX_EMC); } + + object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI); } static void npcm7xx_realize(DeviceState *dev, Error **errp) @@ -708,6 +712,12 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp) &error_abort); memory_region_add_subregion(get_system_memory(), NPCM7XX_ROM_BA, &s->irom); + /* SDHCI */ + sysbus_realize(SYS_BUS_DEVICE(&s->mmc), &error_abort); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc), 0, NPCM7XX_MMC_BA); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc), 0, + npcm7xx_irq(s, NPCM7XX_MMC_IRQ)); + create_unimplemented_device("npcm7xx.shm", 0xc0001000, 4 * KiB); create_unimplemented_device("npcm7xx.vdmx", 0xe0800000, 4 * KiB); create_unimplemented_device("npcm7xx.pcierc", 0xe1000000, 64 * KiB); @@ -737,7 +747,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp) create_unimplemented_device("npcm7xx.usbd[8]", 0xf0838000, 4 * KiB); create_unimplemented_device("npcm7xx.usbd[9]", 0xf0839000, 4 * KiB); create_unimplemented_device("npcm7xx.sd", 0xf0840000, 8 * KiB); - create_unimplemented_device("npcm7xx.mmc", 0xf0842000, 8 * KiB); create_unimplemented_device("npcm7xx.pcimbx", 0xf0848000, 512 * KiB); create_unimplemented_device("npcm7xx.aes", 0xf0858000, 4 * KiB); create_unimplemented_device("npcm7xx.des", 0xf0859000, 4 * KiB); diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c index e22fe4bf8f0..dec7d16ae51 100644 --- a/hw/arm/npcm7xx_boards.c +++ b/hw/arm/npcm7xx_boards.c @@ -16,9 +16,9 @@ #include "qemu/osdep.h" -#include "exec/address-spaces.h" #include "hw/arm/npcm7xx.h" #include "hw/core/cpu.h" +#include "hw/i2c/i2c_mux_pca954x.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/loader.h" #include "hw/qdev-core.h" @@ -27,10 +27,14 @@ #include "qemu-common.h" #include "qemu/datadir.h" #include "qemu/units.h" +#include "sysemu/blockdev.h" #include "sysemu/sysemu.h" +#include "sysemu/block-backend.h" #define NPCM750_EVB_POWER_ON_STRAPS 0x00001ff7 #define QUANTA_GSJ_POWER_ON_STRAPS 0x00001fff +#define QUANTA_GBS_POWER_ON_STRAPS 0x000017ff +#define KUDO_BMC_POWER_ON_STRAPS 0x00001fff static const char npcm7xx_default_bootrom[] = "npcm7xx_bootrom.bin"; @@ -80,6 +84,22 @@ static void npcm7xx_connect_dram(NPCM7xxState *soc, MemoryRegion *dram) &error_abort); } +static void sdhci_attach_drive(SDHCIState *sdhci) +{ + DriveInfo *di = drive_get_next(IF_SD); + BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL; + + BusState *bus = qdev_get_child_bus(DEVICE(sdhci), "sd-bus"); + if (bus == NULL) { + error_report("No SD bus found in SOC object"); + exit(1); + } + + DeviceState *carddev = qdev_new(TYPE_SD_CARD); + qdev_prop_set_drive_err(carddev, "drive", blk, &error_fatal); + qdev_realize_and_unref(carddev, bus, &error_fatal); +} + static NPCM7xxState *npcm7xx_create_soc(MachineState *machine, uint32_t hw_straps) { @@ -222,7 +242,18 @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc) at24c_eeprom_init(soc, 9, 0x55, 8192); at24c_eeprom_init(soc, 10, 0x55, 8192); - /* TODO: Add additional i2c devices. */ + /* + * i2c-11: + * - power-brick@36: delta,dps800 + * - hotswap@15: ti,lm5066i + */ + + /* + * i2c-12: + * - ucd90160@6b + */ + + i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 15), "pca9548", 0x75); } static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc) @@ -239,6 +270,65 @@ static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc) npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x05, 1); } +static void quanta_gbs_i2c_init(NPCM7xxState *soc) +{ + /* + * i2c-0: + * pca9546@71 + * + * i2c-1: + * pca9535@24 + * pca9535@20 + * pca9535@21 + * pca9535@22 + * pca9535@23 + * pca9535@25 + * pca9535@26 + * + * i2c-2: + * sbtsi@4c + * + * i2c-5: + * atmel,24c64@50 mb_fru + * pca9546@71 + * - channel 0: max31725@54 + * - channel 1: max31725@55 + * - channel 2: max31725@5d + * atmel,24c64@51 fan_fru + * - channel 3: atmel,24c64@52 hsbp_fru + * + * i2c-6: + * pca9545@73 + * + * i2c-7: + * pca9545@72 + * + * i2c-8: + * adi,adm1272@10 + * + * i2c-9: + * pca9546@71 + * - channel 0: isil,isl68137@60 + * - channel 1: isil,isl68137@61 + * - channel 2: isil,isl68137@63 + * - channel 3: isil,isl68137@45 + * + * i2c-10: + * pca9545@71 + * + * i2c-11: + * pca9545@76 + * + * i2c-12: + * maxim,max34451@4e + * isil,isl68137@5d + * isil,isl68137@5e + * + * i2c-14: + * pca9545@70 + */ +} + static void npcm750_evb_init(MachineState *machine) { NPCM7xxState *soc; @@ -270,6 +360,41 @@ static void quanta_gsj_init(MachineState *machine) npcm7xx_load_kernel(machine, soc); } +static void quanta_gbs_init(MachineState *machine) +{ + NPCM7xxState *soc; + + soc = npcm7xx_create_soc(machine, QUANTA_GBS_POWER_ON_STRAPS); + npcm7xx_connect_dram(soc, machine->ram); + qdev_realize(DEVICE(soc), NULL, &error_fatal); + + npcm7xx_load_bootrom(machine, soc); + + npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f", + drive_get(IF_MTD, 0, 0)); + + quanta_gbs_i2c_init(soc); + sdhci_attach_drive(&soc->mmc.sdhci); + npcm7xx_load_kernel(machine, soc); +} + +static void kudo_bmc_init(MachineState *machine) +{ + NPCM7xxState *soc; + + soc = npcm7xx_create_soc(machine, KUDO_BMC_POWER_ON_STRAPS); + npcm7xx_connect_dram(soc, machine->ram); + qdev_realize(DEVICE(soc), NULL, &error_fatal); + + npcm7xx_load_bootrom(machine, soc); + npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f", + drive_get(IF_MTD, 0, 0)); + npcm7xx_connect_flash(&soc->fiu[1], 0, "mx66u51235f", + drive_get(IF_MTD, 3, 0)); + + npcm7xx_load_kernel(machine, soc); +} + static void npcm7xx_set_soc_type(NPCM7xxMachineClass *nmc, const char *type) { NPCM7xxClass *sc = NPCM7XX_CLASS(object_class_by_name(type)); @@ -301,7 +426,7 @@ static void npcm750_evb_machine_class_init(ObjectClass *oc, void *data) npcm7xx_set_soc_type(nmc, TYPE_NPCM750); - mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex A9)"; + mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex-A9)"; mc->init = npcm750_evb_init; mc->default_ram_size = 512 * MiB; }; @@ -313,11 +438,35 @@ static void gsj_machine_class_init(ObjectClass *oc, void *data) npcm7xx_set_soc_type(nmc, TYPE_NPCM730); - mc->desc = "Quanta GSJ (Cortex A9)"; + mc->desc = "Quanta GSJ (Cortex-A9)"; mc->init = quanta_gsj_init; mc->default_ram_size = 512 * MiB; }; +static void gbs_bmc_machine_class_init(ObjectClass *oc, void *data) +{ + NPCM7xxMachineClass *nmc = NPCM7XX_MACHINE_CLASS(oc); + MachineClass *mc = MACHINE_CLASS(oc); + + npcm7xx_set_soc_type(nmc, TYPE_NPCM730); + + mc->desc = "Quanta GBS (Cortex-A9)"; + mc->init = quanta_gbs_init; + mc->default_ram_size = 1 * GiB; +} + +static void kudo_bmc_machine_class_init(ObjectClass *oc, void *data) +{ + NPCM7xxMachineClass *nmc = NPCM7XX_MACHINE_CLASS(oc); + MachineClass *mc = MACHINE_CLASS(oc); + + npcm7xx_set_soc_type(nmc, TYPE_NPCM730); + + mc->desc = "Kudo BMC (Cortex-A9)"; + mc->init = kudo_bmc_init; + mc->default_ram_size = 1 * GiB; +}; + static const TypeInfo npcm7xx_machine_types[] = { { .name = TYPE_NPCM7XX_MACHINE, @@ -334,6 +483,14 @@ static const TypeInfo npcm7xx_machine_types[] = { .name = MACHINE_TYPE_NAME("quanta-gsj"), .parent = TYPE_NPCM7XX_MACHINE, .class_init = gsj_machine_class_init, + }, { + .name = MACHINE_TYPE_NAME("quanta-gbs-bmc"), + .parent = TYPE_NPCM7XX_MACHINE, + .class_init = gbs_bmc_machine_class_init, + }, { + .name = MACHINE_TYPE_NAME("kudo-bmc"), + .parent = TYPE_NPCM7XX_MACHINE, + .class_init = kudo_bmc_machine_class_init, }, }; diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c index e15981e019f..34da0d62f00 100644 --- a/hw/arm/nrf51_soc.c +++ b/hw/arm/nrf51_soc.c @@ -12,10 +12,9 @@ #include "qapi/error.h" #include "hw/arm/boot.h" #include "hw/sysbus.h" +#include "hw/qdev-clock.h" #include "hw/misc/unimp.h" -#include "exec/address-spaces.h" #include "qemu/log.h" -#include "cpu.h" #include "hw/arm/nrf51.h" #include "hw/arm/nrf51_soc.h" @@ -68,7 +67,22 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp) return; } - system_clock_scale = NANOSECONDS_PER_SECOND / HCLK_FRQ; + /* + * HCLK on this SoC is fixed, so we set up sysclk ourselves and + * the board shouldn't connect it. + */ + if (clock_has_source(s->sysclk)) { + error_setg(errp, "sysclk clock must not be wired up by the board code"); + return; + } + /* This clock doesn't need migration because it is fixed-frequency */ + clock_set_hz(s->sysclk, HCLK_FRQ); + qdev_connect_clock_in(DEVICE(&s->cpu), "cpuclk", s->sysclk); + /* + * This SoC has no systick device, so don't connect refclk. + * TODO: model the lack of systick (currently the armv7m object + * will always provide one). + */ object_property_set_link(OBJECT(&s->cpu), "memory", OBJECT(&s->container), &error_abort); @@ -193,6 +207,8 @@ static void nrf51_soc_init(Object *obj) TYPE_NRF51_TIMER); } + + s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0); } static Property nrf51_soc_properties[] = { diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index 387eea4d44c..af3164c5519 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -34,16 +34,16 @@ #include "hw/boards.h" #include "hw/i2c/i2c.h" #include "hw/display/blizzard.h" +#include "hw/input/lm832x.h" #include "hw/input/tsc2xxx.h" #include "hw/misc/cbus.h" -#include "hw/misc/tmp105.h" +#include "hw/sensor/tmp105.h" #include "hw/qdev-properties.h" #include "hw/block/flash.h" #include "hw/hw.h" #include "hw/loader.h" #include "hw/sysbus.h" #include "qemu/log.h" -#include "exec/address-spaces.h" /* Nokia N8x0 support */ struct n800_s { @@ -417,7 +417,7 @@ static void n810_kbd_setup(struct n800_s *s) /* Attach the LM8322 keyboard to the I2C bus, * should happen in n8x0_i2c_setup and s->kbd be initialised here. */ s->kbd = DEVICE(i2c_slave_create_simple(omap_i2c_bus(s->mpu->i2c[0]), - "lm8323", N810_LM8323_ADDR)); + TYPE_LM8323, N810_LM8323_ADDR)); qdev_connect_gpio_out(s->kbd, 0, kbd_irq); } @@ -692,7 +692,7 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len) default: bad_cmd: qemu_log_mask(LOG_GUEST_ERROR, - "%s: unknown command %02x\n", __func__, s->cmd); + "%s: unknown command 0x%02x\n", __func__, s->cmd); break; } diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c index 02c0f66431b..180d3788f89 100644 --- a/hw/arm/omap1.c +++ b/hw/arm/omap1.c @@ -24,7 +24,6 @@ #include "qemu-common.h" #include "cpu.h" #include "exec/address-spaces.h" -#include "hw/boards.h" #include "hw/hw.h" #include "hw/irq.h" #include "hw/qdev-properties.h" diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c index 16d388fc79d..02b1aa8c974 100644 --- a/hw/arm/omap2.c +++ b/hw/arm/omap2.c @@ -27,7 +27,6 @@ #include "sysemu/qtest.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" -#include "hw/boards.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/arm/boot.h" diff --git a/hw/arm/orangepi.c b/hw/arm/orangepi.c index 40cdb5c6d2c..0cf9895ce79 100644 --- a/hw/arm/orangepi.c +++ b/hw/arm/orangepi.c @@ -21,12 +21,9 @@ #include "qemu/units.h" #include "exec/address-spaces.h" #include "qapi/error.h" -#include "cpu.h" -#include "hw/sysbus.h" #include "hw/boards.h" #include "hw/qdev-properties.h" #include "hw/arm/allwinner-h3.h" -#include "sysemu/sysemu.h" static struct arm_boot_info orangepi_binfo = { .nb_cpus = AW_H3_NUM_CPUS, diff --git a/hw/arm/palm.c b/hw/arm/palm.c index 4e3dc5fbbf2..68e11dd1ecc 100644 --- a/hw/arm/palm.c +++ b/hw/arm/palm.c @@ -29,7 +29,6 @@ #include "hw/input/tsc2xxx.h" #include "hw/irq.h" #include "hw/loader.h" -#include "exec/address-spaces.h" #include "cpu.h" #include "qemu/cutils.h" #include "qom/object.h" diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c index fdc4955e95b..15a247efae2 100644 --- a/hw/arm/pxa2xx.c +++ b/hw/arm/pxa2xx.c @@ -1437,7 +1437,7 @@ static void pxa2xx_i2c_write(void *opaque, hwaddr addr, break; case ISAR: - i2c_set_slave_address(I2C_SLAVE(s->slave), value & 0x7f); + i2c_slave_set_address(I2C_SLAVE(s->slave), value & 0x7f); break; case IDBR: diff --git a/hw/arm/pxa2xx_pic.c b/hw/arm/pxa2xx_pic.c index cf6cb2a373a..ed032fed548 100644 --- a/hw/arm/pxa2xx_pic.c +++ b/hw/arm/pxa2xx_pic.c @@ -301,7 +301,7 @@ DeviceState *pxa2xx_pic_init(hwaddr base, ARMCPU *cpu) return dev; } -static VMStateDescription vmstate_pxa2xx_pic_regs = { +static const VMStateDescription vmstate_pxa2xx_pic_regs = { .name = "pxa2xx_pic", .version_id = 0, .minimum_version_id = 0, diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c index 990509d3852..146d35382bf 100644 --- a/hw/arm/raspi.c +++ b/hw/arm/raspi.c @@ -16,14 +16,12 @@ #include "qemu/units.h" #include "qemu/cutils.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/arm/bcm2836.h" #include "hw/registerfields.h" #include "qemu/error-report.h" #include "hw/boards.h" #include "hw/loader.h" #include "hw/arm/boot.h" -#include "sysemu/sysemu.h" #include "qom/object.h" #define SMPBOOT_ADDR 0x300 /* this should leave enough space for ATAGS */ @@ -283,7 +281,7 @@ static void raspi_machine_init(MachineState *machine) object_property_add_const_link(OBJECT(&s->soc), "ram", OBJECT(machine->ram)); object_property_set_int(OBJECT(&s->soc), "board-rev", board_rev, &error_abort); - qdev_realize(DEVICE(&s->soc), NULL, &error_abort); + qdev_realize(DEVICE(&s->soc), NULL, &error_fatal); /* Create and plug in the SD cards */ di = drive_get_next(IF_SD); @@ -342,7 +340,6 @@ static void raspi2b_machine_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); RaspiMachineClass *rmc = RASPI_MACHINE_CLASS(oc); - mc->alias = "raspi2"; rmc->board_rev = 0xa21041; raspi_machine_class_common_init(mc, rmc->board_rev); }; @@ -362,7 +359,6 @@ static void raspi3b_machine_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); RaspiMachineClass *rmc = RASPI_MACHINE_CLASS(oc); - mc->alias = "raspi3"; rmc->board_rev = 0xa02082; raspi_machine_class_common_init(mc, rmc->board_rev); }; diff --git a/hw/arm/realview.c b/hw/arm/realview.c index 0831159d158..1c54316ba38 100644 --- a/hw/arm/realview.c +++ b/hw/arm/realview.c @@ -20,7 +20,6 @@ #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/i2c/i2c.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "hw/char/pl011.h" #include "hw/cpu/a9mpcore.h" diff --git a/hw/arm/sabrelite.c b/hw/arm/sabrelite.c index a3dbf85e0ed..553608e5835 100644 --- a/hw/arm/sabrelite.c +++ b/hw/arm/sabrelite.c @@ -15,7 +15,6 @@ #include "hw/arm/fsl-imx6.h" #include "hw/boards.h" #include "hw/qdev-properties.h" -#include "sysemu/sysemu.h" #include "qemu/error-report.h" #include "sysemu/qtest.h" @@ -88,7 +87,7 @@ static void sabrelite_init(MachineState *machine) qdev_realize_and_unref(flash_dev, BUS(spi_bus), &error_fatal); cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0); - sysbus_connect_irq(SYS_BUS_DEVICE(spi_dev), 1, cs_line); + qdev_connect_gpio_out(DEVICE(&s->gpio[2]), 19, cs_line); } } } @@ -106,7 +105,7 @@ static void sabrelite_init(MachineState *machine) static void sabrelite_machine_init(MachineClass *mc) { - mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex A9)"; + mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex-A9)"; mc->init = sabrelite_init; mc->max_cpus = FSL_IMX6_NUM_CPUS; mc->ignore_memory_transaction_failures = true; diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index 88dfb2284c1..358714bd3e8 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -27,7 +27,6 @@ #include "sysemu/numa.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" -#include "exec/address-spaces.h" #include "exec/hwaddr.h" #include "kvm_arm.h" #include "hw/arm/boot.h" @@ -66,7 +65,7 @@ enum { SBSA_GIC_DIST, SBSA_GIC_REDIST, SBSA_SECURE_EC, - SBSA_GWDT, + SBSA_GWDT_WS0, SBSA_GWDT_REFRESH, SBSA_GWDT_CONTROL, SBSA_SMMU, @@ -141,7 +140,7 @@ static const int sbsa_ref_irqmap[] = { [SBSA_AHCI] = 10, [SBSA_EHCI] = 11, [SBSA_SMMU] = 12, /* ... to 15 */ - [SBSA_GWDT] = 16, + [SBSA_GWDT_WS0] = 16, }; static const char * const valid_cpus[] = { @@ -482,7 +481,7 @@ static void create_wdt(const SBSAMachineState *sms) hwaddr cbase = sbsa_ref_memmap[SBSA_GWDT_CONTROL].base; DeviceState *dev = qdev_new(TYPE_WDT_SBSA); SysBusDevice *s = SYS_BUS_DEVICE(dev); - int irq = sbsa_ref_irqmap[SBSA_GWDT]; + int irq = sbsa_ref_irqmap[SBSA_GWDT_WS0]; sysbus_realize_and_unref(s, &error_fatal); sysbus_mmio_map(s, 0, rbase); @@ -671,7 +670,7 @@ static void sbsa_ref_init(MachineState *machine) int n, sbsa_max_cpus; if (!cpu_type_valid(machine->cpu_type)) { - error_report("mach-virt: CPU type %s not supported", machine->cpu_type); + error_report("sbsa-ref: CPU type %s not supported", machine->cpu_type); exit(1); } @@ -692,13 +691,6 @@ static void sbsa_ref_init(MachineState *machine) firmware_loaded = sbsa_firmware_init(sms, sysmem, secure_sysmem); - if (machine->kernel_filename && firmware_loaded) { - error_report("sbsa-ref: No fw_cfg device on this machine, " - "so -kernel option is not supported when firmware loaded, " - "please load OS from hard disk instead"); - exit(1); - } - /* * This machine has EL3 enabled, external firmware should supply PSCI * implementation, so the QEMU's internal PSCI is disabled. diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index 84d2c62c26f..0459850a93d 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -17,7 +17,6 @@ */ #include "qemu/osdep.h" -#include "exec/address-spaces.h" #include "trace.h" #include "exec/target_page.h" #include "hw/core/cpu.h" diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h index 3dac5766ca3..d1885ae3f25 100644 --- a/hw/arm/smmuv3-internal.h +++ b/hw/arm/smmuv3-internal.h @@ -570,7 +570,7 @@ static inline int pa_range(STE *ste) /* CD fields */ -#define CD_VALID(x) extract32((x)->word[0], 30, 1) +#define CD_VALID(x) extract32((x)->word[0], 31, 1) #define CD_ASID(x) extract32((x)->word[1], 16, 16) #define CD_TTB(x, sel) \ ({ \ diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 87056125357..01b60bee495 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -23,7 +23,6 @@ #include "migration/vmstate.h" #include "hw/qdev-core.h" #include "hw/pci/pci.h" -#include "exec/address-spaces.h" #include "cpu.h" #include "trace.h" #include "qemu/log.h" @@ -259,8 +258,9 @@ static void smmuv3_init_regs(SMMUv3State *s) s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, 1); - /* 4K and 64K granule support */ + /* 4K, 16K and 64K granule support */ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ @@ -503,7 +503,8 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) tg = CD_TG(cd, i); tt->granule_sz = tg2granule(tg, i); - if ((tt->granule_sz != 12 && tt->granule_sz != 16) || CD_ENDI(cd)) { + if ((tt->granule_sz != 12 && tt->granule_sz != 14 && + tt->granule_sz != 16) || CD_ENDI(cd)) { goto bad_cd; } @@ -856,43 +857,45 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) { - uint8_t scale = 0, num = 0, ttl = 0; - dma_addr_t addr = CMD_ADDR(cmd); + dma_addr_t end, addr = CMD_ADDR(cmd); uint8_t type = CMD_TYPE(cmd); uint16_t vmid = CMD_VMID(cmd); + uint8_t scale = CMD_SCALE(cmd); + uint8_t num = CMD_NUM(cmd); + uint8_t ttl = CMD_TTL(cmd); bool leaf = CMD_LEAF(cmd); uint8_t tg = CMD_TG(cmd); - uint64_t first_page = 0, last_page; - uint64_t num_pages = 1; + uint64_t num_pages; + uint8_t granule; int asid = -1; - if (tg) { - scale = CMD_SCALE(cmd); - num = CMD_NUM(cmd); - ttl = CMD_TTL(cmd); - num_pages = (num + 1) * BIT_ULL(scale); - } - if (type == SMMU_CMD_TLBI_NH_VA) { asid = CMD_ASID(cmd); } - /* Split invalidations into ^2 range invalidations */ - last_page = num_pages - 1; - while (num_pages) { - uint8_t granule = tg * 2 + 10; - uint64_t mask, count; + if (!tg) { + trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf); + smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1); + smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl); + return; + } + + /* RIL in use */ - mask = dma_aligned_pow2_mask(first_page, last_page, 64 - granule); - count = mask + 1; + num_pages = (num + 1) * BIT_ULL(scale); + granule = tg * 2 + 10; + + /* Split invalidations into ^2 range invalidations */ + end = addr + (num_pages << granule) - 1; - trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, count, ttl, leaf); - smmuv3_inv_notifiers_iova(s, asid, addr, tg, count); - smmu_iotlb_inv_iova(s, asid, addr, tg, count, ttl); + while (addr != end + 1) { + uint64_t mask = dma_aligned_pow2_mask(addr, end, 64); - num_pages -= count; - first_page += count; - addr += count * BIT_ULL(granule); + num_pages = (mask + 1) >> granule; + trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); + smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages); + smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl); + addr += mask + 1; } } diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index 6b3bf9828bc..5aab0b85657 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -30,7 +30,7 @@ #include "audio/audio.h" #include "hw/boards.h" #include "hw/sysbus.h" -#include "hw/misc/max111x.h" +#include "hw/adc/max111x.h" #include "migration/vmstate.h" #include "exec/address-spaces.h" #include "cpu.h" @@ -769,9 +769,9 @@ static void spitz_wm8750_addr(void *opaque, int line, int level) { I2CSlave *wm = (I2CSlave *) opaque; if (level) - i2c_set_slave_address(wm, SPITZ_WM_ADDRH); + i2c_slave_set_address(wm, SPITZ_WM_ADDRH); else - i2c_set_slave_address(wm, SPITZ_WM_ADDRL); + i2c_slave_set_address(wm, SPITZ_WM_ADDRL); } static void spitz_i2c_setup(PXA2xxState *cpu) @@ -1134,7 +1134,7 @@ static bool is_version_0(void *opaque, int version_id) return version_id == 0; } -static VMStateDescription vmstate_sl_nand_info = { +static const VMStateDescription vmstate_sl_nand_info = { .name = "sl-nand", .version_id = 0, .minimum_version_id = 0, @@ -1170,7 +1170,7 @@ static const TypeInfo sl_nand_info = { .class_init = sl_nand_class_init, }; -static VMStateDescription vmstate_spitz_kbd = { +static const VMStateDescription vmstate_spitz_kbd = { .name = "spitz-keyboard", .version_id = 1, .minimum_version_id = 0, diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index 27292ec4113..78827ace6b8 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -26,8 +26,8 @@ #include "hw/watchdog/cmsdk-apb-watchdog.h" #include "migration/vmstate.h" #include "hw/misc/unimp.h" +#include "hw/timer/stellaris-gptm.h" #include "hw/qdev-clock.h" -#include "cpu.h" #include "qom/object.h" #define GPIO_A 0 @@ -56,306 +56,6 @@ typedef const struct { uint32_t peripherals; } stellaris_board_info; -/* General purpose timer module. */ - -#define TYPE_STELLARIS_GPTM "stellaris-gptm" -OBJECT_DECLARE_SIMPLE_TYPE(gptm_state, STELLARIS_GPTM) - -struct gptm_state { - SysBusDevice parent_obj; - - MemoryRegion iomem; - uint32_t config; - uint32_t mode[2]; - uint32_t control; - uint32_t state; - uint32_t mask; - uint32_t load[2]; - uint32_t match[2]; - uint32_t prescale[2]; - uint32_t match_prescale[2]; - uint32_t rtc; - int64_t tick[2]; - struct gptm_state *opaque[2]; - QEMUTimer *timer[2]; - /* The timers have an alternate output used to trigger the ADC. */ - qemu_irq trigger; - qemu_irq irq; -}; - -static void gptm_update_irq(gptm_state *s) -{ - int level; - level = (s->state & s->mask) != 0; - qemu_set_irq(s->irq, level); -} - -static void gptm_stop(gptm_state *s, int n) -{ - timer_del(s->timer[n]); -} - -static void gptm_reload(gptm_state *s, int n, int reset) -{ - int64_t tick; - if (reset) - tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - else - tick = s->tick[n]; - - if (s->config == 0) { - /* 32-bit CountDown. */ - uint32_t count; - count = s->load[0] | (s->load[1] << 16); - tick += (int64_t)count * system_clock_scale; - } else if (s->config == 1) { - /* 32-bit RTC. 1Hz tick. */ - tick += NANOSECONDS_PER_SECOND; - } else if (s->mode[n] == 0xa) { - /* PWM mode. Not implemented. */ - } else { - qemu_log_mask(LOG_UNIMP, - "GPTM: 16-bit timer mode unimplemented: 0x%x\n", - s->mode[n]); - return; - } - s->tick[n] = tick; - timer_mod(s->timer[n], tick); -} - -static void gptm_tick(void *opaque) -{ - gptm_state **p = (gptm_state **)opaque; - gptm_state *s; - int n; - - s = *p; - n = p - s->opaque; - if (s->config == 0) { - s->state |= 1; - if ((s->control & 0x20)) { - /* Output trigger. */ - qemu_irq_pulse(s->trigger); - } - if (s->mode[0] & 1) { - /* One-shot. */ - s->control &= ~1; - } else { - /* Periodic. */ - gptm_reload(s, 0, 0); - } - } else if (s->config == 1) { - /* RTC. */ - uint32_t match; - s->rtc++; - match = s->match[0] | (s->match[1] << 16); - if (s->rtc > match) - s->rtc = 0; - if (s->rtc == 0) { - s->state |= 8; - } - gptm_reload(s, 0, 0); - } else if (s->mode[n] == 0xa) { - /* PWM mode. Not implemented. */ - } else { - qemu_log_mask(LOG_UNIMP, - "GPTM: 16-bit timer mode unimplemented: 0x%x\n", - s->mode[n]); - } - gptm_update_irq(s); -} - -static uint64_t gptm_read(void *opaque, hwaddr offset, - unsigned size) -{ - gptm_state *s = (gptm_state *)opaque; - - switch (offset) { - case 0x00: /* CFG */ - return s->config; - case 0x04: /* TAMR */ - return s->mode[0]; - case 0x08: /* TBMR */ - return s->mode[1]; - case 0x0c: /* CTL */ - return s->control; - case 0x18: /* IMR */ - return s->mask; - case 0x1c: /* RIS */ - return s->state; - case 0x20: /* MIS */ - return s->state & s->mask; - case 0x24: /* CR */ - return 0; - case 0x28: /* TAILR */ - return s->load[0] | ((s->config < 4) ? (s->load[1] << 16) : 0); - case 0x2c: /* TBILR */ - return s->load[1]; - case 0x30: /* TAMARCHR */ - return s->match[0] | ((s->config < 4) ? (s->match[1] << 16) : 0); - case 0x34: /* TBMATCHR */ - return s->match[1]; - case 0x38: /* TAPR */ - return s->prescale[0]; - case 0x3c: /* TBPR */ - return s->prescale[1]; - case 0x40: /* TAPMR */ - return s->match_prescale[0]; - case 0x44: /* TBPMR */ - return s->match_prescale[1]; - case 0x48: /* TAR */ - if (s->config == 1) { - return s->rtc; - } - qemu_log_mask(LOG_UNIMP, - "GPTM: read of TAR but timer read not supported\n"); - return 0; - case 0x4c: /* TBR */ - qemu_log_mask(LOG_UNIMP, - "GPTM: read of TBR but timer read not supported\n"); - return 0; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "GPTM: read at bad offset 0x02%" HWADDR_PRIx "\n", - offset); - return 0; - } -} - -static void gptm_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) -{ - gptm_state *s = (gptm_state *)opaque; - uint32_t oldval; - - /* The timers should be disabled before changing the configuration. - We take advantage of this and defer everything until the timer - is enabled. */ - switch (offset) { - case 0x00: /* CFG */ - s->config = value; - break; - case 0x04: /* TAMR */ - s->mode[0] = value; - break; - case 0x08: /* TBMR */ - s->mode[1] = value; - break; - case 0x0c: /* CTL */ - oldval = s->control; - s->control = value; - /* TODO: Implement pause. */ - if ((oldval ^ value) & 1) { - if (value & 1) { - gptm_reload(s, 0, 1); - } else { - gptm_stop(s, 0); - } - } - if (((oldval ^ value) & 0x100) && s->config >= 4) { - if (value & 0x100) { - gptm_reload(s, 1, 1); - } else { - gptm_stop(s, 1); - } - } - break; - case 0x18: /* IMR */ - s->mask = value & 0x77; - gptm_update_irq(s); - break; - case 0x24: /* CR */ - s->state &= ~value; - break; - case 0x28: /* TAILR */ - s->load[0] = value & 0xffff; - if (s->config < 4) { - s->load[1] = value >> 16; - } - break; - case 0x2c: /* TBILR */ - s->load[1] = value & 0xffff; - break; - case 0x30: /* TAMARCHR */ - s->match[0] = value & 0xffff; - if (s->config < 4) { - s->match[1] = value >> 16; - } - break; - case 0x34: /* TBMATCHR */ - s->match[1] = value >> 16; - break; - case 0x38: /* TAPR */ - s->prescale[0] = value; - break; - case 0x3c: /* TBPR */ - s->prescale[1] = value; - break; - case 0x40: /* TAPMR */ - s->match_prescale[0] = value; - break; - case 0x44: /* TBPMR */ - s->match_prescale[0] = value; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "GPTM: write at bad offset 0x02%" HWADDR_PRIx "\n", - offset); - } - gptm_update_irq(s); -} - -static const MemoryRegionOps gptm_ops = { - .read = gptm_read, - .write = gptm_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static const VMStateDescription vmstate_stellaris_gptm = { - .name = "stellaris_gptm", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32(config, gptm_state), - VMSTATE_UINT32_ARRAY(mode, gptm_state, 2), - VMSTATE_UINT32(control, gptm_state), - VMSTATE_UINT32(state, gptm_state), - VMSTATE_UINT32(mask, gptm_state), - VMSTATE_UNUSED(8), - VMSTATE_UINT32_ARRAY(load, gptm_state, 2), - VMSTATE_UINT32_ARRAY(match, gptm_state, 2), - VMSTATE_UINT32_ARRAY(prescale, gptm_state, 2), - VMSTATE_UINT32_ARRAY(match_prescale, gptm_state, 2), - VMSTATE_UINT32(rtc, gptm_state), - VMSTATE_INT64_ARRAY(tick, gptm_state, 2), - VMSTATE_TIMER_PTR_ARRAY(timer, gptm_state, 2), - VMSTATE_END_OF_LIST() - } -}; - -static void stellaris_gptm_init(Object *obj) -{ - DeviceState *dev = DEVICE(obj); - gptm_state *s = STELLARIS_GPTM(obj); - SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - - sysbus_init_irq(sbd, &s->irq); - qdev_init_gpio_out(dev, &s->trigger, 1); - - memory_region_init_io(&s->iomem, obj, &gptm_ops, s, - "gptm", 0x1000); - sysbus_init_mmio(sbd, &s->iomem); - - s->opaque[0] = s->opaque[1] = s; -} - -static void stellaris_gptm_realize(DeviceState *dev, Error **errp) -{ - gptm_state *s = STELLARIS_GPTM(dev); - s->timer[0] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[0]); - s->timer[1] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[1]); -} - /* System controller. */ #define TYPE_STELLARIS_SYS "stellaris-sys" @@ -563,17 +263,18 @@ static bool ssys_use_rcc2(ssys_state *s) */ static void ssys_calculate_system_clock(ssys_state *s, bool propagate_clock) { + int period_ns; /* * SYSDIV field specifies divisor: 0 == /1, 1 == /2, etc. Input * clock is 200MHz, which is a period of 5 ns. Dividing the clock * frequency by X is the same as multiplying the period by X. */ if (ssys_use_rcc2(s)) { - system_clock_scale = 5 * (((s->rcc2 >> 23) & 0x3f) + 1); + period_ns = 5 * (((s->rcc2 >> 23) & 0x3f) + 1); } else { - system_clock_scale = 5 * (((s->rcc >> 23) & 0xf) + 1); + period_ns = 5 * (((s->rcc >> 23) & 0xf) + 1); } - clock_set_ns(s->sysclk, system_clock_scale); + clock_set_ns(s->sysclk, period_ns); if (propagate_clock) { clock_propagate(s->sysclk); } @@ -756,33 +457,6 @@ static void stellaris_sys_instance_init(Object *obj) s->sysclk = qdev_init_clock_out(DEVICE(s), "SYSCLK"); } -static DeviceState *stellaris_sys_init(uint32_t base, qemu_irq irq, - stellaris_board_info *board, - uint8_t *macaddr) -{ - DeviceState *dev = qdev_new(TYPE_STELLARIS_SYS); - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - - /* Most devices come preprogrammed with a MAC address in the user data. */ - qdev_prop_set_uint32(dev, "user0", - macaddr[0] | (macaddr[1] << 8) | (macaddr[2] << 16)); - qdev_prop_set_uint32(dev, "user1", - macaddr[3] | (macaddr[4] << 8) | (macaddr[5] << 16)); - qdev_prop_set_uint32(dev, "did0", board->did0); - qdev_prop_set_uint32(dev, "did1", board->did1); - qdev_prop_set_uint32(dev, "dc0", board->dc0); - qdev_prop_set_uint32(dev, "dc1", board->dc1); - qdev_prop_set_uint32(dev, "dc2", board->dc2); - qdev_prop_set_uint32(dev, "dc3", board->dc3); - qdev_prop_set_uint32(dev, "dc4", board->dc4); - - sysbus_realize_and_unref(sbd, &error_fatal); - sysbus_mmio_map(sbd, 0, base); - sysbus_connect_irq(sbd, 0, irq); - - return dev; -} - /* I2C controller. */ #define TYPE_STELLARIS_I2C "stellaris-i2c" @@ -1350,6 +1024,7 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board) DeviceState *ssys_dev; int i; int j; + const uint8_t *macaddr; MemoryRegion *sram = g_new(MemoryRegion, 1); MemoryRegion *flash = g_new(MemoryRegion, 1); @@ -1367,15 +1042,42 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board) &error_fatal); memory_region_add_subregion(system_memory, 0x20000000, sram); + /* + * Create the system-registers object early, because we will + * need its sysclk output. + */ + ssys_dev = qdev_new(TYPE_STELLARIS_SYS); + /* Most devices come preprogrammed with a MAC address in the user data. */ + macaddr = nd_table[0].macaddr.a; + qdev_prop_set_uint32(ssys_dev, "user0", + macaddr[0] | (macaddr[1] << 8) | (macaddr[2] << 16)); + qdev_prop_set_uint32(ssys_dev, "user1", + macaddr[3] | (macaddr[4] << 8) | (macaddr[5] << 16)); + qdev_prop_set_uint32(ssys_dev, "did0", board->did0); + qdev_prop_set_uint32(ssys_dev, "did1", board->did1); + qdev_prop_set_uint32(ssys_dev, "dc0", board->dc0); + qdev_prop_set_uint32(ssys_dev, "dc1", board->dc1); + qdev_prop_set_uint32(ssys_dev, "dc2", board->dc2); + qdev_prop_set_uint32(ssys_dev, "dc3", board->dc3); + qdev_prop_set_uint32(ssys_dev, "dc4", board->dc4); + sysbus_realize_and_unref(SYS_BUS_DEVICE(ssys_dev), &error_fatal); + nvic = qdev_new(TYPE_ARMV7M); qdev_prop_set_uint32(nvic, "num-irq", NUM_IRQ_LINES); qdev_prop_set_string(nvic, "cpu-type", ms->cpu_type); qdev_prop_set_bit(nvic, "enable-bitband", true); + qdev_connect_clock_in(nvic, "cpuclk", + qdev_get_clock_out(ssys_dev, "SYSCLK")); + /* This SoC does not connect the systick reference clock */ object_property_set_link(OBJECT(nvic), "memory", OBJECT(get_system_memory()), &error_abort); /* This will exit with an error if the user passed us a bad cpu_type */ sysbus_realize_and_unref(SYS_BUS_DEVICE(nvic), &error_fatal); + /* Now we can wire up the IRQ and MMIO of the system registers */ + sysbus_mmio_map(SYS_BUS_DEVICE(ssys_dev), 0, 0x400fe000); + sysbus_connect_irq(SYS_BUS_DEVICE(ssys_dev), 0, qdev_get_gpio_in(nvic, 28)); + if (board->dc1 & (1 << 16)) { dev = sysbus_create_varargs(TYPE_STELLARIS_ADC, 0x40038000, qdev_get_gpio_in(nvic, 14), @@ -1389,19 +1091,21 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board) } for (i = 0; i < 4; i++) { if (board->dc2 & (0x10000 << i)) { - dev = sysbus_create_simple(TYPE_STELLARIS_GPTM, - 0x40030000 + i * 0x1000, - qdev_get_gpio_in(nvic, timer_irq[i])); + SysBusDevice *sbd; + + dev = qdev_new(TYPE_STELLARIS_GPTM); + sbd = SYS_BUS_DEVICE(dev); + qdev_connect_clock_in(dev, "clk", + qdev_get_clock_out(ssys_dev, "SYSCLK")); + sysbus_realize_and_unref(sbd, &error_fatal); + sysbus_mmio_map(sbd, 0, 0x40030000 + i * 0x1000); + sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(nvic, timer_irq[i])); /* TODO: This is incorrect, but we get away with it because the ADC output is only ever pulsed. */ qdev_connect_gpio_out(dev, 0, adc); } } - ssys_dev = stellaris_sys_init(0x400fe000, qdev_get_gpio_in(nvic, 28), - board, nd_table[0].macaddr.a); - - if (board->dc1 & (1 << 3)) { /* watchdog present */ dev = qdev_new(TYPE_LUMINARY_WATCHDOG); @@ -1454,13 +1158,67 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board) DeviceState *sddev; DeviceState *ssddev; - /* Some boards have both an OLED controller and SD card connected to + /* + * Some boards have both an OLED controller and SD card connected to * the same SSI port, with the SD card chip select connected to a * GPIO pin. Technically the OLED chip select is connected to the * SSI Fss pin. We do not bother emulating that as both devices * should never be selected simultaneously, and our OLED controller * ignores stray 0xff commands that occur when deselecting the SD * card. + * + * The h/w wiring is: + * - GPIO pin D0 is wired to the active-low SD card chip select + * - GPIO pin A3 is wired to the active-low OLED chip select + * - The SoC wiring of the PL061 "auxiliary function" for A3 is + * SSI0Fss ("frame signal"), which is an output from the SoC's + * SSI controller. The SSI controller takes SSI0Fss low when it + * transmits a frame, so it can work as a chip-select signal. + * - GPIO A4 is aux-function SSI0Rx, and wired to the SD card Tx + * (the OLED never sends data to the CPU, so no wiring needed) + * - GPIO A5 is aux-function SSI0Tx, and wired to the SD card Rx + * and the OLED display-data-in + * - GPIO A2 is aux-function SSI0Clk, wired to SD card and OLED + * serial-clock input + * So a guest that wants to use the OLED can configure the PL061 + * to make pins A2, A3, A5 aux-function, so they are connected + * directly to the SSI controller. When the SSI controller sends + * data it asserts SSI0Fss which selects the OLED. + * A guest that wants to use the SD card configures A2, A4 and A5 + * as aux-function, but leaves A3 as a software-controlled GPIO + * line. It asserts the SD card chip-select by using the PL061 + * to control pin D0, and lets the SSI controller handle Clk, Tx + * and Rx. (The SSI controller asserts Fss during tx cycles as + * usual, but because A3 is not set to aux-function this is not + * forwarded to the OLED, and so the OLED stays unselected.) + * + * The QEMU implementation instead is: + * - GPIO pin D0 is wired to the active-low SD card chip select, + * and also to the OLED chip-select which is implemented + * as *active-high* + * - SSI controller signals go to the devices regardless of + * whether the guest programs A2, A4, A5 as aux-function or not + * + * The problem with this implementation is if the guest doesn't + * care about the SD card and only uses the OLED. In that case it + * may choose never to do anything with D0 (leaving it in its + * default floating state, which reliably leaves the card disabled + * because an SD card has a pullup on CS within the card itself), + * and only set up A2, A3, A5. This for us would mean the OLED + * never gets the chip-select assert it needs. We work around + * this with a manual raise of D0 here (despite board creation + * code being the wrong place to raise IRQ lines) to put the OLED + * into an initially selected state. + * + * In theory the right way to model this would be: + * - Implement aux-function support in the PL061, with an + * extra set of AFIN and AFOUT GPIO lines (set up so that + * if a GPIO line is in auxfn mode the main GPIO in and out + * track the AFIN and AFOUT lines) + * - Wire the AFOUT for D0 up to either a line from the + * SSI controller that's pulled low around every transmit, + * or at least to an always-0 line here on the board + * - Make the ssd0323 OLED controller chipselect active-low */ bus = qdev_get_child_bus(dev, "ssi"); @@ -1589,22 +1347,6 @@ static const TypeInfo stellaris_i2c_info = { .class_init = stellaris_i2c_class_init, }; -static void stellaris_gptm_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->vmsd = &vmstate_stellaris_gptm; - dc->realize = stellaris_gptm_realize; -} - -static const TypeInfo stellaris_gptm_info = { - .name = TYPE_STELLARIS_GPTM, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(gptm_state), - .instance_init = stellaris_gptm_init, - .class_init = stellaris_gptm_class_init, -}; - static void stellaris_adc_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -1643,7 +1385,6 @@ static const TypeInfo stellaris_sys_info = { static void stellaris_register_types(void) { type_register_static(&stellaris_i2c_info); - type_register_static(&stellaris_gptm_info); type_register_static(&stellaris_adc_info); type_register_static(&stellaris_sys_info); } diff --git a/hw/arm/stm32f100_soc.c b/hw/arm/stm32f100_soc.c new file mode 100644 index 00000000000..f7b344ba9fb --- /dev/null +++ b/hw/arm/stm32f100_soc.c @@ -0,0 +1,209 @@ +/* + * STM32F100 SoC + * + * Copyright (c) 2021 Alexandre Iooss + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "hw/arm/boot.h" +#include "exec/address-spaces.h" +#include "hw/arm/stm32f100_soc.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" +#include "hw/misc/unimp.h" +#include "sysemu/sysemu.h" + +/* stm32f100_soc implementation is derived from stm32f205_soc */ + +static const uint32_t usart_addr[STM_NUM_USARTS] = { 0x40013800, 0x40004400, + 0x40004800 }; +static const uint32_t spi_addr[STM_NUM_SPIS] = { 0x40013000, 0x40003800 }; + +static const int usart_irq[STM_NUM_USARTS] = {37, 38, 39}; +static const int spi_irq[STM_NUM_SPIS] = {35, 36}; + +static void stm32f100_soc_initfn(Object *obj) +{ + STM32F100State *s = STM32F100_SOC(obj); + int i; + + object_initialize_child(obj, "armv7m", &s->armv7m, TYPE_ARMV7M); + + for (i = 0; i < STM_NUM_USARTS; i++) { + object_initialize_child(obj, "usart[*]", &s->usart[i], + TYPE_STM32F2XX_USART); + } + + for (i = 0; i < STM_NUM_SPIS; i++) { + object_initialize_child(obj, "spi[*]", &s->spi[i], TYPE_STM32F2XX_SPI); + } + + s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0); + s->refclk = qdev_init_clock_in(DEVICE(s), "refclk", NULL, NULL, 0); +} + +static void stm32f100_soc_realize(DeviceState *dev_soc, Error **errp) +{ + STM32F100State *s = STM32F100_SOC(dev_soc); + DeviceState *dev, *armv7m; + SysBusDevice *busdev; + int i; + + MemoryRegion *system_memory = get_system_memory(); + + /* + * We use s->refclk internally and only define it with qdev_init_clock_in() + * so it is correctly parented and not leaked on an init/deinit; it is not + * intended as an externally exposed clock. + */ + if (clock_has_source(s->refclk)) { + error_setg(errp, "refclk clock must not be wired up by the board code"); + return; + } + + if (!clock_has_source(s->sysclk)) { + error_setg(errp, "sysclk clock must be wired up by the board code"); + return; + } + + /* + * TODO: ideally we should model the SoC RCC and its ability to + * change the sysclk frequency and define different sysclk sources. + */ + + /* The refclk always runs at frequency HCLK / 8 */ + clock_set_mul_div(s->refclk, 8, 1); + clock_set_source(s->refclk, s->sysclk); + + /* + * Init flash region + * Flash starts at 0x08000000 and then is aliased to boot memory at 0x0 + */ + memory_region_init_rom(&s->flash, OBJECT(dev_soc), "STM32F100.flash", + FLASH_SIZE, &error_fatal); + memory_region_init_alias(&s->flash_alias, OBJECT(dev_soc), + "STM32F100.flash.alias", &s->flash, 0, FLASH_SIZE); + memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, &s->flash); + memory_region_add_subregion(system_memory, 0, &s->flash_alias); + + /* Init SRAM region */ + memory_region_init_ram(&s->sram, NULL, "STM32F100.sram", SRAM_SIZE, + &error_fatal); + memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram); + + /* Init ARMv7m */ + armv7m = DEVICE(&s->armv7m); + qdev_prop_set_uint32(armv7m, "num-irq", 61); + qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type); + qdev_prop_set_bit(armv7m, "enable-bitband", true); + qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk); + qdev_connect_clock_in(armv7m, "refclk", s->refclk); + object_property_set_link(OBJECT(&s->armv7m), "memory", + OBJECT(get_system_memory()), &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) { + return; + } + + /* Attach UART (uses USART registers) and USART controllers */ + for (i = 0; i < STM_NUM_USARTS; i++) { + dev = DEVICE(&(s->usart[i])); + qdev_prop_set_chr(dev, "chardev", serial_hd(i)); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->usart[i]), errp)) { + return; + } + busdev = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(busdev, 0, usart_addr[i]); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i])); + } + + /* SPI 1 and 2 */ + for (i = 0; i < STM_NUM_SPIS; i++) { + dev = DEVICE(&(s->spi[i])); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) { + return; + } + busdev = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(busdev, 0, spi_addr[i]); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i])); + } + + create_unimplemented_device("timer[2]", 0x40000000, 0x400); + create_unimplemented_device("timer[3]", 0x40000400, 0x400); + create_unimplemented_device("timer[4]", 0x40000800, 0x400); + create_unimplemented_device("timer[6]", 0x40001000, 0x400); + create_unimplemented_device("timer[7]", 0x40001400, 0x400); + create_unimplemented_device("RTC", 0x40002800, 0x400); + create_unimplemented_device("WWDG", 0x40002C00, 0x400); + create_unimplemented_device("IWDG", 0x40003000, 0x400); + create_unimplemented_device("I2C1", 0x40005400, 0x400); + create_unimplemented_device("I2C2", 0x40005800, 0x400); + create_unimplemented_device("BKP", 0x40006C00, 0x400); + create_unimplemented_device("PWR", 0x40007000, 0x400); + create_unimplemented_device("DAC", 0x40007400, 0x400); + create_unimplemented_device("CEC", 0x40007800, 0x400); + create_unimplemented_device("AFIO", 0x40010000, 0x400); + create_unimplemented_device("EXTI", 0x40010400, 0x400); + create_unimplemented_device("GPIOA", 0x40010800, 0x400); + create_unimplemented_device("GPIOB", 0x40010C00, 0x400); + create_unimplemented_device("GPIOC", 0x40011000, 0x400); + create_unimplemented_device("GPIOD", 0x40011400, 0x400); + create_unimplemented_device("GPIOE", 0x40011800, 0x400); + create_unimplemented_device("ADC1", 0x40012400, 0x400); + create_unimplemented_device("timer[1]", 0x40012C00, 0x400); + create_unimplemented_device("timer[15]", 0x40014000, 0x400); + create_unimplemented_device("timer[16]", 0x40014400, 0x400); + create_unimplemented_device("timer[17]", 0x40014800, 0x400); + create_unimplemented_device("DMA", 0x40020000, 0x400); + create_unimplemented_device("RCC", 0x40021000, 0x400); + create_unimplemented_device("Flash Int", 0x40022000, 0x400); + create_unimplemented_device("CRC", 0x40023000, 0x400); +} + +static Property stm32f100_soc_properties[] = { + DEFINE_PROP_STRING("cpu-type", STM32F100State, cpu_type), + DEFINE_PROP_END_OF_LIST(), +}; + +static void stm32f100_soc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = stm32f100_soc_realize; + device_class_set_props(dc, stm32f100_soc_properties); +} + +static const TypeInfo stm32f100_soc_info = { + .name = TYPE_STM32F100_SOC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(STM32F100State), + .instance_init = stm32f100_soc_initfn, + .class_init = stm32f100_soc_class_init, +}; + +static void stm32f100_soc_types(void) +{ + type_register_static(&stm32f100_soc_info); +} + +type_init(stm32f100_soc_types) diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c index 9cd41bf56da..c6b75a381d9 100644 --- a/hw/arm/stm32f205_soc.c +++ b/hw/arm/stm32f205_soc.c @@ -29,6 +29,7 @@ #include "exec/address-spaces.h" #include "hw/arm/stm32f205_soc.h" #include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" #include "sysemu/sysemu.h" /* At the moment only Timer 2 to 5 are modelled */ @@ -74,6 +75,9 @@ static void stm32f205_soc_initfn(Object *obj) for (i = 0; i < STM_NUM_SPIS; i++) { object_initialize_child(obj, "spi[*]", &s->spi[i], TYPE_STM32F2XX_SPI); } + + s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0); + s->refclk = qdev_init_clock_in(DEVICE(s), "refclk", NULL, NULL, 0); } static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) @@ -84,26 +88,49 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) int i; MemoryRegion *system_memory = get_system_memory(); - MemoryRegion *sram = g_new(MemoryRegion, 1); - MemoryRegion *flash = g_new(MemoryRegion, 1); - MemoryRegion *flash_alias = g_new(MemoryRegion, 1); - memory_region_init_rom(flash, OBJECT(dev_soc), "STM32F205.flash", + /* + * We use s->refclk internally and only define it with qdev_init_clock_in() + * so it is correctly parented and not leaked on an init/deinit; it is not + * intended as an externally exposed clock. + */ + if (clock_has_source(s->refclk)) { + error_setg(errp, "refclk clock must not be wired up by the board code"); + return; + } + + if (!clock_has_source(s->sysclk)) { + error_setg(errp, "sysclk clock must be wired up by the board code"); + return; + } + + /* + * TODO: ideally we should model the SoC RCC and its ability to + * change the sysclk frequency and define different sysclk sources. + */ + + /* The refclk always runs at frequency HCLK / 8 */ + clock_set_mul_div(s->refclk, 8, 1); + clock_set_source(s->refclk, s->sysclk); + + memory_region_init_rom(&s->flash, OBJECT(dev_soc), "STM32F205.flash", FLASH_SIZE, &error_fatal); - memory_region_init_alias(flash_alias, OBJECT(dev_soc), - "STM32F205.flash.alias", flash, 0, FLASH_SIZE); + memory_region_init_alias(&s->flash_alias, OBJECT(dev_soc), + "STM32F205.flash.alias", &s->flash, 0, FLASH_SIZE); - memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, flash); - memory_region_add_subregion(system_memory, 0, flash_alias); + memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, &s->flash); + memory_region_add_subregion(system_memory, 0, &s->flash_alias); - memory_region_init_ram(sram, NULL, "STM32F205.sram", SRAM_SIZE, + memory_region_init_ram(&s->sram, NULL, "STM32F205.sram", SRAM_SIZE, &error_fatal); - memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram); + memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram); armv7m = DEVICE(&s->armv7m); qdev_prop_set_uint32(armv7m, "num-irq", 96); qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type); qdev_prop_set_bit(armv7m, "enable-bitband", true); + qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk); + qdev_connect_clock_in(armv7m, "refclk", s->refclk); object_property_set_link(OBJECT(&s->armv7m), "memory", OBJECT(get_system_memory()), &error_abort); if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) { diff --git a/hw/arm/stm32f405_soc.c b/hw/arm/stm32f405_soc.c index cb04c111987..0019b7f4785 100644 --- a/hw/arm/stm32f405_soc.c +++ b/hw/arm/stm32f405_soc.c @@ -28,6 +28,7 @@ #include "exec/address-spaces.h" #include "sysemu/sysemu.h" #include "hw/arm/stm32f405_soc.h" +#include "hw/qdev-clock.h" #include "hw/misc/unimp.h" #define SYSCFG_ADD 0x40013800 @@ -80,6 +81,9 @@ static void stm32f405_soc_initfn(Object *obj) } object_initialize_child(obj, "exti", &s->exti, TYPE_STM32F4XX_EXTI); + + s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0); + s->refclk = qdev_init_clock_in(DEVICE(s), "refclk", NULL, NULL, 0); } static void stm32f405_soc_realize(DeviceState *dev_soc, Error **errp) @@ -91,6 +95,30 @@ static void stm32f405_soc_realize(DeviceState *dev_soc, Error **errp) Error *err = NULL; int i; + /* + * We use s->refclk internally and only define it with qdev_init_clock_in() + * so it is correctly parented and not leaked on an init/deinit; it is not + * intended as an externally exposed clock. + */ + if (clock_has_source(s->refclk)) { + error_setg(errp, "refclk clock must not be wired up by the board code"); + return; + } + + if (!clock_has_source(s->sysclk)) { + error_setg(errp, "sysclk clock must be wired up by the board code"); + return; + } + + /* + * TODO: ideally we should model the SoC RCC and its ability to + * change the sysclk frequency and define different sysclk sources. + */ + + /* The refclk always runs at frequency HCLK / 8 */ + clock_set_mul_div(s->refclk, 8, 1); + clock_set_source(s->refclk, s->sysclk); + memory_region_init_rom(&s->flash, OBJECT(dev_soc), "STM32F405.flash", FLASH_SIZE, &err); if (err != NULL) { @@ -116,6 +144,8 @@ static void stm32f405_soc_realize(DeviceState *dev_soc, Error **errp) qdev_prop_set_uint32(armv7m, "num-irq", 96); qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type); qdev_prop_set_bit(armv7m, "enable-bitband", true); + qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk); + qdev_connect_clock_in(armv7m, "refclk", s->refclk); object_property_set_link(OBJECT(&s->armv7m), "memory", OBJECT(system_memory), &error_abort); if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) { diff --git a/hw/arm/stm32vldiscovery.c b/hw/arm/stm32vldiscovery.c new file mode 100644 index 00000000000..04036da3ee0 --- /dev/null +++ b/hw/arm/stm32vldiscovery.c @@ -0,0 +1,65 @@ +/* + * ST STM32VLDISCOVERY machine + * + * Copyright (c) 2021 Alexandre Iooss + * Copyright (c) 2014 Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" +#include "qemu/error-report.h" +#include "hw/arm/stm32f100_soc.h" +#include "hw/arm/boot.h" + +/* stm32vldiscovery implementation is derived from netduinoplus2 */ + +/* Main SYSCLK frequency in Hz (24MHz) */ +#define SYSCLK_FRQ 24000000ULL + +static void stm32vldiscovery_init(MachineState *machine) +{ + DeviceState *dev; + Clock *sysclk; + + /* This clock doesn't need migration because it is fixed-frequency */ + sysclk = clock_new(OBJECT(machine), "SYSCLK"); + clock_set_hz(sysclk, SYSCLK_FRQ); + + dev = qdev_new(TYPE_STM32F100_SOC); + qdev_prop_set_string(dev, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m3")); + qdev_connect_clock_in(dev, "sysclk", sysclk); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + armv7m_load_kernel(ARM_CPU(first_cpu), + machine->kernel_filename, + FLASH_SIZE); +} + +static void stm32vldiscovery_machine_init(MachineClass *mc) +{ + mc->desc = "ST STM32VLDISCOVERY (Cortex-M3)"; + mc->init = stm32vldiscovery_init; +} + +DEFINE_MACHINE("stm32vldiscovery", stm32vldiscovery_machine_init) diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c index c7ca54bceaa..939a57dda55 100644 --- a/hw/arm/strongarm.c +++ b/hw/arm/strongarm.c @@ -30,7 +30,6 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "cpu.h" -#include "hw/boards.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" @@ -207,7 +206,7 @@ static int strongarm_pic_post_load(void *opaque, int version_id) return 0; } -static VMStateDescription vmstate_strongarm_pic_regs = { +static const VMStateDescription vmstate_strongarm_pic_regs = { .name = "strongarm_pic", .version_id = 0, .minimum_version_id = 0, diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c index 6b6906f4cfc..48c5fe9bf18 100644 --- a/hw/arm/sysbus-fdt.c +++ b/hw/arm/sysbus-fdt.c @@ -437,6 +437,7 @@ static bool vfio_platform_match(SysBusDevice *sbdev, #endif /* CONFIG_LINUX */ +#ifdef CONFIG_TPM /* * add_tpm_tis_fdt_node: Create a DT node for TPM TIS * @@ -467,6 +468,7 @@ static int add_tpm_tis_fdt_node(SysBusDevice *sbdev, void *opaque) g_free(nodename); return 0; } +#endif static int no_fdt_node(SysBusDevice *sbdev, void *opaque) { @@ -488,7 +490,9 @@ static const BindingEntry bindings[] = { TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node), VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node), #endif +#ifdef CONFIG_TPM TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node), +#endif TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node), TYPE_BINDING("", NULL), /* last element */ }; diff --git a/hw/arm/trace-events b/hw/arm/trace-events index b79a91af5fb..2dee296c8fb 100644 --- a/hw/arm/trace-events +++ b/hw/arm/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # virt-acpi-build.c virt_acpi_setup(void) "No fw cfg or ACPI disabled. Bailing out." diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c index 1ea55346264..575399c4fc9 100644 --- a/hw/arm/versatilepb.c +++ b/hw/arm/versatilepb.c @@ -21,7 +21,6 @@ #include "hw/i2c/arm_sbcon_i2c.h" #include "hw/irq.h" #include "hw/boards.h" -#include "exec/address-spaces.h" #include "hw/block/flash.h" #include "qemu/error-report.h" #include "hw/char/pl011.h" diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c index 326a1a6db52..58481c07629 100644 --- a/hw/arm/vexpress.c +++ b/hw/arm/vexpress.c @@ -35,7 +35,6 @@ #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/loader.h" -#include "exec/address-spaces.h" #include "hw/block/flash.h" #include "sysemu/device_tree.h" #include "qemu/error-report.h" diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 60fe2e65a76..674f9026525 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -44,6 +44,7 @@ #include "hw/acpi/tpm.h" #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "hw/pci-host/gpex.h" #include "hw/arm/virt.h" #include "hw/mem/nvdimm.h" @@ -205,6 +206,7 @@ static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap, aml_append(scope, dev); } +#ifdef CONFIG_TPM static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms) { PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev); @@ -236,180 +238,317 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms) aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); } +#endif +#define ID_MAPPING_ENTRY_SIZE 20 +#define SMMU_V3_ENTRY_SIZE 68 +#define ROOT_COMPLEX_ENTRY_SIZE 36 +#define IORT_NODE_OFFSET 48 + +static void build_iort_id_mapping(GArray *table_data, uint32_t input_base, + uint32_t id_count, uint32_t out_ref) +{ + /* Table 4 ID mapping format */ + build_append_int_noprefix(table_data, input_base, 4); /* Input base */ + build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */ + build_append_int_noprefix(table_data, input_base, 4); /* Output base */ + build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */ + /* Flags */ + build_append_int_noprefix(table_data, 0 /* Single mapping (disabled) */, 4); +} + +struct AcpiIortIdMapping { + uint32_t input_base; + uint32_t id_count; +}; +typedef struct AcpiIortIdMapping AcpiIortIdMapping; + +/* Build the iort ID mapping to SMMUv3 for a given PCI host bridge */ +static int +iort_host_bridges(Object *obj, void *opaque) +{ + GArray *idmap_blob = opaque; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + int min_bus, max_bus; + + pci_bus_range(bus, &min_bus, &max_bus); + + AcpiIortIdMapping idmap = { + .input_base = min_bus << 8, + .id_count = (max_bus - min_bus + 1) << 8, + }; + g_array_append_val(idmap_blob, idmap); + } + } + + return 0; +} + +static int iort_idmap_compare(gconstpointer a, gconstpointer b) +{ + AcpiIortIdMapping *idmap_a = (AcpiIortIdMapping *)a; + AcpiIortIdMapping *idmap_b = (AcpiIortIdMapping *)b; + + return idmap_a->input_base - idmap_b->input_base; +} + +/* + * Input Output Remapping Table (IORT) + * Conforms to "IO Remapping Table System Software on ARM Platforms", + * Document number: ARM DEN 0049E.b, Feb 2021 + */ static void build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { - int nb_nodes, iort_start = table_data->len; + int i, nb_nodes, rc_mapping_count; + const uint32_t iort_node_offset = IORT_NODE_OFFSET; + size_t node_size, smmu_offset = 0; AcpiIortIdMapping *idmap; - AcpiIortItsGroup *its; - AcpiIortTable *iort; - AcpiIortSmmu3 *smmu; - size_t node_size, iort_node_offset, iort_length, smmu_offset = 0; - AcpiIortRC *rc; + uint32_t id = 0; + GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); + GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); - iort = acpi_data_push(table_data, sizeof(*iort)); + AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; + /* Table 2 The IORT */ + acpi_table_begin(&table, table_data); if (vms->iommu == VIRT_IOMMU_SMMUV3) { + AcpiIortIdMapping next_range = {0}; + + object_child_foreach_recursive(object_get_root(), + iort_host_bridges, smmu_idmaps); + + /* Sort the smmu idmap by input_base */ + g_array_sort(smmu_idmaps, iort_idmap_compare); + + /* + * Split the whole RIDs by mapping from RC to SMMU, + * build the ID mapping from RC to ITS directly. + */ + for (i = 0; i < smmu_idmaps->len; i++) { + idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + + if (next_range.input_base < idmap->input_base) { + next_range.id_count = idmap->input_base - next_range.input_base; + g_array_append_val(its_idmaps, next_range); + } + + next_range.input_base = idmap->input_base + idmap->id_count; + } + + /* Append the last RC -> ITS ID mapping */ + if (next_range.input_base < 0xFFFF) { + next_range.id_count = 0xFFFF - next_range.input_base; + g_array_append_val(its_idmaps, next_range); + } + nb_nodes = 3; /* RC, ITS, SMMUv3 */ + rc_mapping_count = smmu_idmaps->len + its_idmaps->len; } else { nb_nodes = 2; /* RC, ITS */ + rc_mapping_count = 1; } - - iort_length = sizeof(*iort); - iort->node_count = cpu_to_le32(nb_nodes); - /* - * Use a copy in case table_data->data moves during acpi_data_push - * operations. - */ - iort_node_offset = sizeof(*iort); - iort->node_offset = cpu_to_le32(iort_node_offset); - - /* ITS group node */ - node_size = sizeof(*its) + sizeof(uint32_t); - iort_length += node_size; - its = acpi_data_push(table_data, node_size); - - its->type = ACPI_IORT_NODE_ITS_GROUP; - its->length = cpu_to_le16(node_size); - its->its_count = cpu_to_le32(1); - its->identifiers[0] = 0; /* MADT translation_id */ + /* Number of IORT Nodes */ + build_append_int_noprefix(table_data, nb_nodes, 4); + + /* Offset to Array of IORT Nodes */ + build_append_int_noprefix(table_data, IORT_NODE_OFFSET, 4); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + + /* Table 12 ITS Group Format */ + build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */ + node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 1, 1); /* Revision */ + build_append_int_noprefix(table_data, id++, 4); /* Identifier */ + build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */ + build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */ + build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */ + /* GIC ITS Identifier Array */ + build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4); if (vms->iommu == VIRT_IOMMU_SMMUV3) { int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; - /* SMMUv3 node */ - smmu_offset = iort_node_offset + node_size; - node_size = sizeof(*smmu) + sizeof(*idmap); - iort_length += node_size; - smmu = acpi_data_push(table_data, node_size); - - smmu->type = ACPI_IORT_NODE_SMMU_V3; - smmu->length = cpu_to_le16(node_size); - smmu->mapping_count = cpu_to_le32(1); - smmu->mapping_offset = cpu_to_le32(sizeof(*smmu)); - smmu->base_address = cpu_to_le64(vms->memmap[VIRT_SMMU].base); - smmu->flags = cpu_to_le32(ACPI_IORT_SMMU_V3_COHACC_OVERRIDE); - smmu->event_gsiv = cpu_to_le32(irq); - smmu->pri_gsiv = cpu_to_le32(irq + 1); - smmu->sync_gsiv = cpu_to_le32(irq + 2); - smmu->gerr_gsiv = cpu_to_le32(irq + 3); - - /* Identity RID mapping covering the whole input RID range */ - idmap = &smmu->id_mapping_array[0]; - idmap->input_base = 0; - idmap->id_count = cpu_to_le32(0xFFFF); - idmap->output_base = 0; + smmu_offset = table_data->len - table.table_offset; + /* Table 9 SMMUv3 Format */ + build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */ + node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 4, 1); /* Revision */ + build_append_int_noprefix(table_data, id++, 4); /* Identifier */ + build_append_int_noprefix(table_data, 1, 4); /* Number of ID mappings */ + /* Reference to ID Array */ + build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4); + /* Base address */ + build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8); + /* Flags */ + build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + build_append_int_noprefix(table_data, 0, 8); /* VATOS address */ + /* Model */ + build_append_int_noprefix(table_data, 0 /* Generic SMMU-v3 */, 4); + build_append_int_noprefix(table_data, irq, 4); /* Event */ + build_append_int_noprefix(table_data, irq + 1, 4); /* PRI */ + build_append_int_noprefix(table_data, irq + 3, 4); /* GERR */ + build_append_int_noprefix(table_data, irq + 2, 4); /* Sync */ + build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */ + /* DeviceID mapping index (ignored since interrupts are GSIV based) */ + build_append_int_noprefix(table_data, 0, 4); + /* output IORT node is the ITS group node (the first node) */ - idmap->output_reference = cpu_to_le32(iort_node_offset); + build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET); } - /* Root Complex Node */ - node_size = sizeof(*rc) + sizeof(*idmap); - iort_length += node_size; - rc = acpi_data_push(table_data, node_size); - - rc->type = ACPI_IORT_NODE_PCI_ROOT_COMPLEX; - rc->length = cpu_to_le16(node_size); - rc->mapping_count = cpu_to_le32(1); - rc->mapping_offset = cpu_to_le32(sizeof(*rc)); - - /* fully coherent device */ - rc->memory_properties.cache_coherency = cpu_to_le32(1); - rc->memory_properties.memory_flags = 0x3; /* CCA = CPM = DCAS = 1 */ - rc->pci_segment_number = 0; /* MCFG pci_segment */ - - /* Identity RID mapping covering the whole input RID range */ - idmap = &rc->id_mapping_array[0]; - idmap->input_base = 0; - idmap->id_count = cpu_to_le32(0xFFFF); - idmap->output_base = 0; - + /* Table 17 Root Complex Node */ + build_append_int_noprefix(table_data, 2 /* Root complex */, 1); /* Type */ + node_size = ROOT_COMPLEX_ENTRY_SIZE + + ID_MAPPING_ENTRY_SIZE * rc_mapping_count; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 3, 1); /* Revision */ + build_append_int_noprefix(table_data, id++, 4); /* Identifier */ + /* Number of ID mappings */ + build_append_int_noprefix(table_data, rc_mapping_count, 4); + /* Reference to ID Array */ + build_append_int_noprefix(table_data, ROOT_COMPLEX_ENTRY_SIZE, 4); + + /* Table 14 Memory access properties */ + /* CCA: Cache Coherent Attribute */ + build_append_int_noprefix(table_data, 1 /* fully coherent */, 4); + build_append_int_noprefix(table_data, 0, 1); /* AH: Note Allocation Hints */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + /* Table 15 Memory Access Flags */ + build_append_int_noprefix(table_data, 0x3 /* CCA = CPM = DACS = 1 */, 1); + + build_append_int_noprefix(table_data, 0, 4); /* ATS Attribute */ + /* MCFG pci_segment */ + build_append_int_noprefix(table_data, 0, 4); /* PCI Segment number */ + + /* Memory address size limit */ + build_append_int_noprefix(table_data, 64, 1); + + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + + /* Output Reference */ if (vms->iommu == VIRT_IOMMU_SMMUV3) { - /* output IORT node is the smmuv3 node */ - idmap->output_reference = cpu_to_le32(smmu_offset); + AcpiIortIdMapping *range; + + /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */ + for (i = 0; i < smmu_idmaps->len; i++) { + range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + /* output IORT node is the smmuv3 node */ + build_iort_id_mapping(table_data, range->input_base, + range->id_count, smmu_offset); + } + + /* bypassed RIDs connect to ITS group node directly: RC -> ITS */ + for (i = 0; i < its_idmaps->len; i++) { + range = &g_array_index(its_idmaps, AcpiIortIdMapping, i); + /* output IORT node is the ITS group node (the first node) */ + build_iort_id_mapping(table_data, range->input_base, + range->id_count, iort_node_offset); + } } else { /* output IORT node is the ITS group node (the first node) */ - idmap->output_reference = cpu_to_le32(iort_node_offset); + build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET); } - /* - * Update the pointer address in case table_data->data moves during above - * acpi_data_push operations. - */ - iort = (AcpiIortTable *)(table_data->data + iort_start); - iort->length = cpu_to_le32(iort_length); - - build_header(linker, table_data, (void *)(table_data->data + iort_start), - "IORT", table_data->len - iort_start, 0, vms->oem_id, - vms->oem_table_id); + acpi_table_end(linker, &table); + g_array_free(smmu_idmaps, true); + g_array_free(its_idmaps, true); } +/* + * Serial Port Console Redirection Table (SPCR) + * Rev: 1.07 + */ static void build_spcr(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { - AcpiSerialPortConsoleRedirection *spcr; - const MemMapEntry *uart_memmap = &vms->memmap[VIRT_UART]; - int irq = vms->irqmap[VIRT_UART] + ARM_SPI_BASE; - int spcr_start = table_data->len; - - spcr = acpi_data_push(table_data, sizeof(*spcr)); - - spcr->interface_type = 0x3; /* ARM PL011 UART */ - - spcr->base_address.space_id = AML_SYSTEM_MEMORY; - spcr->base_address.bit_width = 8; - spcr->base_address.bit_offset = 0; - spcr->base_address.access_width = 1; - spcr->base_address.address = cpu_to_le64(uart_memmap->base); - - spcr->interrupt_types = (1 << 3); /* Bit[3] ARMH GIC interrupt */ - spcr->gsi = cpu_to_le32(irq); /* Global System Interrupt */ - - spcr->baud = 3; /* Baud Rate: 3 = 9600 */ - spcr->parity = 0; /* No Parity */ - spcr->stopbits = 1; /* 1 Stop bit */ - spcr->flowctrl = (1 << 1); /* Bit[1] = RTS/CTS hardware flow control */ - spcr->term_type = 0; /* Terminal Type: 0 = VT100 */ - - spcr->pci_device_id = 0xffff; /* PCI Device ID: not a PCI device */ - spcr->pci_vendor_id = 0xffff; /* PCI Vendor ID: not a PCI device */ - - build_header(linker, table_data, (void *)(table_data->data + spcr_start), - "SPCR", table_data->len - spcr_start, 2, vms->oem_id, - vms->oem_table_id); + AcpiTable table = { .sig = "SPCR", .rev = 2, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; + + acpi_table_begin(&table, table_data); + + /* Interface Type */ + build_append_int_noprefix(table_data, 3, 1); /* ARM PL011 UART */ + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + /* Base Address */ + build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 8, 0, 1, + vms->memmap[VIRT_UART].base); + /* Interrupt Type */ + build_append_int_noprefix(table_data, + (1 << 3) /* Bit[3] ARMH GIC interrupt */, 1); + build_append_int_noprefix(table_data, 0, 1); /* IRQ */ + /* Global System Interrupt */ + build_append_int_noprefix(table_data, + vms->irqmap[VIRT_UART] + ARM_SPI_BASE, 4); + build_append_int_noprefix(table_data, 3 /* 9600 */, 1); /* Baud Rate */ + build_append_int_noprefix(table_data, 0 /* No Parity */, 1); /* Parity */ + /* Stop Bits */ + build_append_int_noprefix(table_data, 1 /* 1 Stop bit */, 1); + /* Flow Control */ + build_append_int_noprefix(table_data, + (1 << 1) /* RTS/CTS hardware flow control */, 1); + /* Terminal Type */ + build_append_int_noprefix(table_data, 0 /* VT100 */, 1); + build_append_int_noprefix(table_data, 0, 1); /* Language */ + /* PCI Device ID */ + build_append_int_noprefix(table_data, 0xffff /* not a PCI device*/, 2); + /* PCI Vendor ID */ + build_append_int_noprefix(table_data, 0xffff /* not a PCI device*/, 2); + build_append_int_noprefix(table_data, 0, 1); /* PCI Bus Number */ + build_append_int_noprefix(table_data, 0, 1); /* PCI Device Number */ + build_append_int_noprefix(table_data, 0, 1); /* PCI Function Number */ + build_append_int_noprefix(table_data, 0, 4); /* PCI Flags */ + build_append_int_noprefix(table_data, 0, 1); /* PCI Segment */ + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + + acpi_table_end(linker, &table); } +/* + * ACPI spec, Revision 5.1 + * 5.2.16 System Resource Affinity Table (SRAT) + */ static void build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { - AcpiSystemResourceAffinityTable *srat; - AcpiSratProcessorGiccAffinity *core; - AcpiSratMemoryAffinity *numamem; - int i, srat_start; + int i; uint64_t mem_base; MachineClass *mc = MACHINE_GET_CLASS(vms); MachineState *ms = MACHINE(vms); const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(ms); + AcpiTable table = { .sig = "SRAT", .rev = 3, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; - srat_start = table_data->len; - srat = acpi_data_push(table_data, sizeof(*srat)); - srat->reserved1 = cpu_to_le32(1); + acpi_table_begin(&table, table_data); + build_append_int_noprefix(table_data, 1, 4); /* Reserved */ + build_append_int_noprefix(table_data, 0, 8); /* Reserved */ for (i = 0; i < cpu_list->len; ++i) { - core = acpi_data_push(table_data, sizeof(*core)); - core->type = ACPI_SRAT_PROCESSOR_GICC; - core->length = sizeof(*core); - core->proximity = cpu_to_le32(cpu_list->cpus[i].props.node_id); - core->acpi_processor_uid = cpu_to_le32(i); - core->flags = cpu_to_le32(1); + uint32_t nodeid = cpu_list->cpus[i].props.node_id; + /* + * 5.2.16.4 GICC Affinity Structure + */ + build_append_int_noprefix(table_data, 3, 1); /* Type */ + build_append_int_noprefix(table_data, 18, 1); /* Length */ + build_append_int_noprefix(table_data, nodeid, 4); /* Proximity Domain */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + /* Flags, Table 5-76 */ + build_append_int_noprefix(table_data, 1 /* Enabled */, 4); + build_append_int_noprefix(table_data, 0, 4); /* Clock Domain */ } mem_base = vms->memmap[VIRT_MEM].base; for (i = 0; i < ms->numa_state->num_nodes; ++i) { if (ms->numa_state->nodes[i].node_mem > 0) { - numamem = acpi_data_push(table_data, sizeof(*numamem)); - build_srat_memory(numamem, mem_base, + build_srat_memory(table_data, mem_base, ms->numa_state->nodes[i].node_mem, i, MEM_AFFINITY_ENABLED); mem_base += ms->numa_state->nodes[i].node_mem; @@ -421,142 +560,251 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } if (ms->device_memory) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, ms->device_memory->base, + build_srat_memory(table_data, ms->device_memory->base, memory_region_size(&ms->device_memory->mr), ms->numa_state->num_nodes - 1, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); } - build_header(linker, table_data, (void *)(table_data->data + srat_start), - "SRAT", table_data->len - srat_start, 3, vms->oem_id, - vms->oem_table_id); + acpi_table_end(linker, &table); } -/* GTDT */ +/* + * ACPI spec, Revision 5.1 + * 5.2.24 Generic Timer Description Table (GTDT) + */ static void build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); - int gtdt_start = table_data->len; - AcpiGenericTimerTable *gtdt; - uint32_t irqflags; - - if (vmc->claim_edge_triggered_timers) { - irqflags = ACPI_GTDT_INTERRUPT_MODE_EDGE; - } else { - irqflags = ACPI_GTDT_INTERRUPT_MODE_LEVEL; - } - - gtdt = acpi_data_push(table_data, sizeof *gtdt); - /* The interrupt values are the same with the device tree when adding 16 */ - gtdt->secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_S_EL1_IRQ + 16); - gtdt->secure_el1_flags = cpu_to_le32(irqflags); - - gtdt->non_secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL1_IRQ + 16); - gtdt->non_secure_el1_flags = cpu_to_le32(irqflags | - ACPI_GTDT_CAP_ALWAYS_ON); - - gtdt->virtual_timer_interrupt = cpu_to_le32(ARCH_TIMER_VIRT_IRQ + 16); - gtdt->virtual_timer_flags = cpu_to_le32(irqflags); + /* + * Table 5-117 Flag Definitions + * set only "Timer interrupt Mode" and assume "Timer Interrupt + * polarity" bit as '0: Interrupt is Active high' + */ + uint32_t irqflags = vmc->claim_edge_triggered_timers ? + 1 : /* Interrupt is Edge triggered */ + 0; /* Interrupt is Level triggered */ + AcpiTable table = { .sig = "GTDT", .rev = 2, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; + + acpi_table_begin(&table, table_data); + + /* CntControlBase Physical Address */ + /* FIXME: invalid value, should be 0xFFFFFFFFFFFFFFFF if not impl. ? */ + build_append_int_noprefix(table_data, 0, 8); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + /* + * FIXME: clarify comment: + * The interrupt values are the same with the device tree when adding 16 + */ + /* Secure EL1 timer GSIV */ + build_append_int_noprefix(table_data, ARCH_TIMER_S_EL1_IRQ + 16, 4); + /* Secure EL1 timer Flags */ + build_append_int_noprefix(table_data, irqflags, 4); + /* Non-Secure EL1 timer GSIV */ + build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL1_IRQ + 16, 4); + /* Non-Secure EL1 timer Flags */ + build_append_int_noprefix(table_data, irqflags | + 1UL << 2, /* Always-on Capability */ + 4); + /* Virtual timer GSIV */ + build_append_int_noprefix(table_data, ARCH_TIMER_VIRT_IRQ + 16, 4); + /* Virtual Timer Flags */ + build_append_int_noprefix(table_data, irqflags, 4); + /* Non-Secure EL2 timer GSIV */ + build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL2_IRQ + 16, 4); + /* Non-Secure EL2 timer Flags */ + build_append_int_noprefix(table_data, irqflags, 4); + /* CntReadBase Physical address */ + build_append_int_noprefix(table_data, 0, 8); + /* Platform Timer Count */ + build_append_int_noprefix(table_data, 0, 4); + /* Platform Timer Offset */ + build_append_int_noprefix(table_data, 0, 4); + + acpi_table_end(linker, &table); +} - gtdt->non_secure_el2_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL2_IRQ + 16); - gtdt->non_secure_el2_flags = cpu_to_le32(irqflags); +/* Debug Port Table 2 (DBG2) */ +static void +build_dbg2(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) +{ + AcpiTable table = { .sig = "DBG2", .rev = 0, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; + int dbg2devicelength; + const char name[] = "COM0"; + const int namespace_length = sizeof(name); + + acpi_table_begin(&table, table_data); + + dbg2devicelength = 22 + /* BaseAddressRegister[] offset */ + 12 + /* BaseAddressRegister[] */ + 4 + /* AddressSize[] */ + namespace_length /* NamespaceString[] */; + + /* OffsetDbgDeviceInfo */ + build_append_int_noprefix(table_data, 44, 4); + /* NumberDbgDeviceInfo */ + build_append_int_noprefix(table_data, 1, 4); + + /* Table 2. Debug Device Information structure format */ + build_append_int_noprefix(table_data, 0, 1); /* Revision */ + build_append_int_noprefix(table_data, dbg2devicelength, 2); /* Length */ + /* NumberofGenericAddressRegisters */ + build_append_int_noprefix(table_data, 1, 1); + /* NameSpaceStringLength */ + build_append_int_noprefix(table_data, namespace_length, 2); + build_append_int_noprefix(table_data, 38, 2); /* NameSpaceStringOffset */ + build_append_int_noprefix(table_data, 0, 2); /* OemDataLength */ + /* OemDataOffset (0 means no OEM data) */ + build_append_int_noprefix(table_data, 0, 2); + + /* Port Type */ + build_append_int_noprefix(table_data, 0x8000 /* Serial */, 2); + /* Port Subtype */ + build_append_int_noprefix(table_data, 0x3 /* ARM PL011 UART */, 2); + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + /* BaseAddressRegisterOffset */ + build_append_int_noprefix(table_data, 22, 2); + /* AddressSizeOffset */ + build_append_int_noprefix(table_data, 34, 2); + + /* BaseAddressRegister[] */ + build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 8, 0, 1, + vms->memmap[VIRT_UART].base); + + /* AddressSize[] */ + build_append_int_noprefix(table_data, + vms->memmap[VIRT_UART].size, 4); + + /* NamespaceString[] */ + g_array_append_vals(table_data, name, namespace_length); + + acpi_table_end(linker, &table); +}; - build_header(linker, table_data, - (void *)(table_data->data + gtdt_start), "GTDT", - table_data->len - gtdt_start, 2, vms->oem_id, - vms->oem_table_id); +/* + * ACPI spec, Revision 5.1 Errata A + * 5.2.12 Multiple APIC Description Table (MADT) + */ +static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) +{ + build_append_int_noprefix(table_data, 0xE, 1); /* Type */ + build_append_int_noprefix(table_data, 16, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + /* Discovery Range Base Addres */ + build_append_int_noprefix(table_data, base, 8); + build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */ } -/* MADT */ static void build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { + int i; VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); - int madt_start = table_data->len; const MemMapEntry *memmap = vms->memmap; - const int *irqmap = vms->irqmap; - AcpiMadtGenericDistributor *gicd; - AcpiMadtGenericMsiFrame *gic_msi; - int i; - - acpi_data_push(table_data, sizeof(AcpiMultipleApicTable)); - - gicd = acpi_data_push(table_data, sizeof *gicd); - gicd->type = ACPI_APIC_GENERIC_DISTRIBUTOR; - gicd->length = sizeof(*gicd); - gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base); - gicd->version = vms->gic_version; + AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; + + acpi_table_begin(&table, table_data); + /* Local Interrupt Controller Address */ + build_append_int_noprefix(table_data, 0, 4); + build_append_int_noprefix(table_data, 0, 4); /* Flags */ + + /* 5.2.12.15 GIC Distributor Structure */ + build_append_int_noprefix(table_data, 0xC, 1); /* Type */ + build_append_int_noprefix(table_data, 24, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); /* GIC ID */ + /* Physical Base Address */ + build_append_int_noprefix(table_data, memmap[VIRT_GIC_DIST].base, 8); + build_append_int_noprefix(table_data, 0, 4); /* System Vector Base */ + /* GIC version */ + build_append_int_noprefix(table_data, vms->gic_version, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ for (i = 0; i < MACHINE(vms)->smp.cpus; i++) { - AcpiMadtGenericCpuInterface *gicc = acpi_data_push(table_data, - sizeof(*gicc)); ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? PPI(ARCH_GIC_MAINT_IRQ) : 0; + uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ? + PPI(VIRTUAL_PMU_IRQ) : 0; - gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE; - gicc->length = sizeof(*gicc); if (vms->gic_version == 2) { - gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base); - gicc->gich_base_address = cpu_to_le64(memmap[VIRT_GIC_HYP].base); - gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base); + physical_base_address = memmap[VIRT_GIC_CPU].base; + gicv = memmap[VIRT_GIC_VCPU].base; + gich = memmap[VIRT_GIC_HYP].base; } - gicc->cpu_interface_number = cpu_to_le32(i); - gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity); - gicc->uid = cpu_to_le32(i); - gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); - if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { - gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ)); - } - if (vms->virt) { - gicc->vgic_interrupt = cpu_to_le32(PPI(ARCH_GIC_MAINT_IRQ)); - } + /* 5.2.12.14 GIC Structure */ + build_append_int_noprefix(table_data, 0xB, 1); /* Type */ + build_append_int_noprefix(table_data, 76, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, i, 4); /* GIC ID */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + /* Flags */ + build_append_int_noprefix(table_data, 1, 4); /* Enabled */ + /* Parking Protocol Version */ + build_append_int_noprefix(table_data, 0, 4); + /* Performance Interrupt GSIV */ + build_append_int_noprefix(table_data, pmu_interrupt, 4); + build_append_int_noprefix(table_data, 0, 8); /* Parked Address */ + /* Physical Base Address */ + build_append_int_noprefix(table_data, physical_base_address, 8); + build_append_int_noprefix(table_data, gicv, 8); /* GICV */ + build_append_int_noprefix(table_data, gich, 8); /* GICH */ + /* VGIC Maintenance interrupt */ + build_append_int_noprefix(table_data, vgic_interrupt, 4); + build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/ + /* MPIDR */ + build_append_int_noprefix(table_data, armcpu->mp_affinity, 8); } if (vms->gic_version == 3) { - AcpiMadtGenericTranslator *gic_its; - int nb_redist_regions = virt_gicv3_redist_region_count(vms); - AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data, - sizeof *gicr); - - gicr->type = ACPI_APIC_GENERIC_REDISTRIBUTOR; - gicr->length = sizeof(*gicr); - gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base); - gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size); - - if (nb_redist_regions == 2) { - gicr = acpi_data_push(table_data, sizeof(*gicr)); - gicr->type = ACPI_APIC_GENERIC_REDISTRIBUTOR; - gicr->length = sizeof(*gicr); - gicr->base_address = - cpu_to_le64(memmap[VIRT_HIGH_GIC_REDIST2].base); - gicr->range_length = - cpu_to_le32(memmap[VIRT_HIGH_GIC_REDIST2].size); + build_append_gicr(table_data, memmap[VIRT_GIC_REDIST].base, + memmap[VIRT_GIC_REDIST].size); + if (virt_gicv3_redist_region_count(vms) == 2) { + build_append_gicr(table_data, memmap[VIRT_HIGH_GIC_REDIST2].base, + memmap[VIRT_HIGH_GIC_REDIST2].size); } if (its_class_name() && !vmc->no_its) { - gic_its = acpi_data_push(table_data, sizeof *gic_its); - gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR; - gic_its->length = sizeof(*gic_its); - gic_its->translation_id = 0; - gic_its->base_address = cpu_to_le64(memmap[VIRT_GIC_ITS].base); + /* + * FIXME: Structure is from Revision 6.0 where 'GIC Structure' + * has additional fields on top of implemented 5.1 Errata A, + * to make it consistent with v6.0 we need to bump everything + * to v6.0 + */ + /* + * ACPI spec, Revision 6.0 Errata A + * (original 6.0 definition has invalid Length) + * 5.2.12.18 GIC ITS Structure + */ + build_append_int_noprefix(table_data, 0xF, 1); /* Type */ + build_append_int_noprefix(table_data, 20, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); /* GIC ITS ID */ + /* Physical Base Address */ + build_append_int_noprefix(table_data, memmap[VIRT_GIC_ITS].base, 8); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ } } else { - gic_msi = acpi_data_push(table_data, sizeof *gic_msi); - gic_msi->type = ACPI_APIC_GENERIC_MSI_FRAME; - gic_msi->length = sizeof(*gic_msi); - gic_msi->gic_msi_frame_id = 0; - gic_msi->base_address = cpu_to_le64(memmap[VIRT_GIC_V2M].base); - gic_msi->flags = cpu_to_le32(1); - gic_msi->spi_count = cpu_to_le16(NUM_GICV2M_SPIS); - gic_msi->spi_base = cpu_to_le16(irqmap[VIRT_GIC_V2M] + ARM_SPI_BASE); + const uint16_t spi_base = vms->irqmap[VIRT_GIC_V2M] + ARM_SPI_BASE; + + /* 5.2.12.16 GIC MSI Frame Structure */ + build_append_int_noprefix(table_data, 0xD, 1); /* Type */ + build_append_int_noprefix(table_data, 24, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); /* GIC MSI Frame ID */ + /* Physical Base Address */ + build_append_int_noprefix(table_data, memmap[VIRT_GIC_V2M].base, 8); + build_append_int_noprefix(table_data, 1, 4); /* Flags */ + /* SPI Count */ + build_append_int_noprefix(table_data, NUM_GICV2M_SPIS, 2); + build_append_int_noprefix(table_data, spi_base, 2); /* SPI Base */ } - - build_header(linker, table_data, - (void *)(table_data->data + madt_start), "APIC", - table_data->len - madt_start, 3, vms->oem_id, - vms->oem_table_id); + acpi_table_end(linker, &table); } /* FADT */ @@ -598,10 +846,11 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) MachineState *ms = MACHINE(vms); const MemMapEntry *memmap = vms->memmap; const int *irqmap = vms->irqmap; + AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; + acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); - /* Reserve space for header */ - acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader)); /* When booting the VM with UEFI, UEFI takes ownership of the RTC hardware. * While UEFI can use libfdt to disable the RTC device node in the DTB that @@ -642,16 +891,16 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } acpi_dsdt_add_power_button(scope); +#ifdef CONFIG_TPM acpi_dsdt_add_tpm(scope, vms); +#endif aml_append(dsdt, scope); - /* copy AML table into ACPI tables blob and patch header there */ + /* copy AML table into ACPI tables blob */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); - build_header(linker, table_data, - (void *)(table_data->data + table_data->len - dsdt->buf->len), - "DSDT", dsdt->buf->len, 2, vms->oem_id, - vms->oem_table_id); + + acpi_table_end(linker, &table); free_aml_allocator(); } @@ -694,13 +943,19 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) dsdt = tables_blob->len; build_dsdt(tables_blob, tables->linker, vms); - /* FADT MADT GTDT MCFG SPCR pointed to by RSDT */ + /* FADT MADT PPTT GTDT MCFG SPCR DBG2 pointed to by RSDT */ acpi_add_table(table_offsets, tables_blob); build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); acpi_add_table(table_offsets, tables_blob); build_madt(tables_blob, tables->linker, vms); + if (!vmc->no_cpu_topology) { + acpi_add_table(table_offsets, tables_blob); + build_pptt(tables_blob, tables->linker, ms, + vms->oem_id, vms->oem_table_id); + } + acpi_add_table(table_offsets, tables_blob); build_gtdt(tables_blob, tables->linker, vms); @@ -717,6 +972,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables_blob); build_spcr(tables_blob, tables->linker, vms); + acpi_add_table(table_offsets, tables_blob); + build_dbg2(tables_blob, tables->linker, vms); + if (vms->ras) { build_ghes_error_table(tables->hardware_errors, tables->linker); acpi_add_table(table_offsets, tables_blob); @@ -745,11 +1003,13 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_iort(tables_blob, tables->linker, vms); } +#ifdef CONFIG_TPM if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) { acpi_add_table(table_offsets, tables_blob); build_tpm2(tables_blob, tables->linker, tables->tcpalog, vms->oem_id, vms->oem_table_id); } +#endif /* XSDT is pointed to by RSDP */ xsdt = tables_blob->len; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a30a254600c..a94e906b2eb 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -36,7 +36,6 @@ #include "monitor/qdev.h" #include "qapi/error.h" #include "hw/sysbus.h" -#include "hw/boards.h" #include "hw/arm/boot.h" #include "hw/arm/primecell.h" #include "hw/arm/virt.h" @@ -48,11 +47,11 @@ #include "sysemu/device_tree.h" #include "sysemu/numa.h" #include "sysemu/runstate.h" -#include "sysemu/sysemu.h" #include "sysemu/tpm.h" #include "sysemu/kvm.h" +#include "sysemu/hvf.h" #include "hw/loader.h" -#include "exec/address-spaces.h" +#include "qapi/error.h" #include "qemu/bitops.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -207,6 +206,7 @@ static const char *valid_cpus[] = { ARM_CPU_TYPE_NAME("cortex-a57"), ARM_CPU_TYPE_NAME("cortex-a72"), ARM_CPU_TYPE_NAME("morello"), + ARM_CPU_TYPE_NAME("a64fx"), ARM_CPU_TYPE_NAME("host"), ARM_CPU_TYPE_NAME("max"), }; @@ -357,20 +357,21 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) int cpu; int addr_cells = 1; const MachineState *ms = MACHINE(vms); + const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); int smp_cpus = ms->smp.cpus; /* - * From Documentation/devicetree/bindings/arm/cpus.txt - * On ARM v8 64-bit systems value should be set to 2, - * that corresponds to the MPIDR_EL1 register size. - * If MPIDR_EL1[63:32] value is equal to 0 on all CPUs - * in the system, #address-cells can be set to 1, since - * MPIDR_EL1[63:32] bits are not used for CPUs - * identification. + * See Linux Documentation/devicetree/bindings/arm/cpus.yaml + * On ARM v8 64-bit systems value should be set to 2, + * that corresponds to the MPIDR_EL1 register size. + * If MPIDR_EL1[63:32] value is equal to 0 on all CPUs + * in the system, #address-cells can be set to 1, since + * MPIDR_EL1[63:32] bits are not used for CPUs + * identification. * - * Here we actually don't know whether our system is 32- or 64-bit one. - * The simplest way to go is to examine affinity IDs of all our CPUs. If - * at least one of them has Aff3 populated, we set #address-cells to 2. + * Here we actually don't know whether our system is 32- or 64-bit one. + * The simplest way to go is to examine affinity IDs of all our CPUs. If + * at least one of them has Aff3 populated, we set #address-cells to 2. */ for (cpu = 0; cpu < smp_cpus; cpu++) { ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); @@ -413,8 +414,57 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) ms->possible_cpus->cpus[cs->cpu_index].props.node_id); } + if (!vmc->no_cpu_topology) { + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(ms->fdt)); + } + g_free(nodename); } + + if (!vmc->no_cpu_topology) { + /* + * Add vCPU topology description through fdt node cpu-map. + * + * See Linux Documentation/devicetree/bindings/cpu/cpu-topology.txt + * In a SMP system, the hierarchy of CPUs can be defined through + * four entities that are used to describe the layout of CPUs in + * the system: socket/cluster/core/thread. + * + * A socket node represents the boundary of system physical package + * and its child nodes must be one or more cluster nodes. A system + * can contain several layers of clustering within a single physical + * package and cluster nodes can be contained in parent cluster nodes. + * + * Given that cluster is not yet supported in the vCPU topology, + * we currently generate one cluster node within each socket node + * by default. + */ + qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map"); + + for (cpu = smp_cpus - 1; cpu >= 0; cpu--) { + char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu); + char *map_path; + + if (ms->smp.threads > 1) { + map_path = g_strdup_printf( + "/cpus/cpu-map/socket%d/cluster0/core%d/thread%d", + cpu / (ms->smp.cores * ms->smp.threads), + (cpu / ms->smp.threads) % ms->smp.cores, + cpu % ms->smp.threads); + } else { + map_path = g_strdup_printf( + "/cpus/cpu-map/socket%d/cluster0/core%d", + cpu / ms->smp.cores, + cpu % ms->smp.cores); + } + qemu_fdt_add_path(ms->fdt, map_path); + qemu_fdt_setprop_phandle(ms->fdt, map_path, "cpu", cpu_path); + + g_free(map_path); + g_free(cpu_path); + } + } } static void fdt_add_its_gic_node(VirtMachineState *vms) @@ -590,6 +640,12 @@ static void create_its(VirtMachineState *vms) const char *itsclass = its_class_name(); DeviceState *dev; + if (!strcmp(itsclass, "arm-gicv3-its")) { + if (!vms->tcg_its) { + itsclass = NULL; + } + } + if (!itsclass) { /* Do nothing if not supported */ return; @@ -627,7 +683,7 @@ static void create_v2m(VirtMachineState *vms) vms->msi_controller = VIRT_MSI_CTRL_GICV2M; } -static void create_gic(VirtMachineState *vms) +static void create_gic(VirtMachineState *vms, MemoryRegion *mem) { MachineState *ms = MACHINE(vms); /* We create a standalone GIC */ @@ -661,6 +717,14 @@ static void create_gic(VirtMachineState *vms) nb_redist_regions); qdev_prop_set_uint32(vms->gic, "redist-region-count[0]", redist0_count); + if (!kvm_irqchip_in_kernel()) { + if (vms->tcg_its) { + object_property_set_link(OBJECT(vms->gic), "sysmem", + OBJECT(mem), &error_fatal); + qdev_prop_set_bit(vms->gic, "has-lpi", true); + } + } + if (nb_redist_regions == 2) { uint32_t redist1_capacity = vms->memmap[VIRT_HIGH_GIC_REDIST2].size / GICV3_REDIST_SIZE; @@ -902,6 +966,9 @@ static void create_gpio_devices(const VirtMachineState *vms, int gpio, MachineState *ms = MACHINE(vms); pl061_dev = qdev_new("pl061"); + /* Pull lines down to 0 if not driven by the PL061 */ + qdev_prop_set_uint32(pl061_dev, "pullups", 0); + qdev_prop_set_uint32(pl061_dev, "pulldowns", 0xff); s = SYS_BUS_DEVICE(pl061_dev); sysbus_realize_and_unref(s, &error_fatal); memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0)); @@ -1371,6 +1438,7 @@ static void create_pcie(VirtMachineState *vms) } pci = PCI_HOST_BRIDGE(dev); + pci->bypass_iommu = vms->default_bus_bypass_iommu; vms->bus = pci->bus; if (vms->bus) { for (i = 0; i < nb_nics; i++) { @@ -1447,7 +1515,7 @@ static void create_platform_bus(VirtMachineState *vms) MemoryRegion *sysmem = get_system_memory(); dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE); - dev->id = TYPE_PLATFORM_BUS_DEVICE; + dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE); qdev_prop_set_uint32(dev, "num_irqs", PLATFORM_BUS_NUM_IRQS); qdev_prop_set_uint32(dev, "mmio_size", vms->memmap[VIRT_PLATFORM_BUS].size); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); @@ -1534,8 +1602,10 @@ static void virt_build_smbios(VirtMachineState *vms) vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, true, SMBIOS_ENTRY_POINT_30); - smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len, + &error_fatal); if (smbios_anchor) { fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-tables", @@ -1858,11 +1928,6 @@ static void machvirt_init(MachineState *machine) } if (vms->secure) { - if (kvm_enabled()) { - error_report("mach-virt: KVM does not support Security extensions"); - exit(1); - } - /* * The Secure view of the world is the same as the NonSecure, * but with a few extra devices. Create it as a container region @@ -1915,15 +1980,17 @@ static void machvirt_init(MachineState *machine) exit(1); } - if (vms->virt && kvm_enabled()) { - error_report("mach-virt: KVM does not support providing " - "Virtualization extensions to the guest CPU"); + if (vms->virt && (kvm_enabled() || hvf_enabled())) { + error_report("mach-virt: %s does not support providing " + "Virtualization extensions to the guest CPU", + kvm_enabled() ? "KVM" : "HVF"); exit(1); } - if (vms->mte && kvm_enabled()) { - error_report("mach-virt: KVM does not support providing " - "MTE to the guest CPU"); + if (vms->mte && (kvm_enabled() || hvf_enabled())) { + error_report("mach-virt: %s does not support providing " + "MTE to the guest CPU", + kvm_enabled() ? "KVM" : "HVF"); exit(1); } @@ -2050,7 +2117,7 @@ static void machvirt_init(MachineState *machine) virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); - create_gic(vms); + create_gic(vms, sysmem); virt_cpu_post_init(vms, sysmem); @@ -2330,6 +2397,21 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) } } +static bool virt_get_default_bus_bypass_iommu(Object *obj, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->default_bus_bypass_iommu; +} + +static void virt_set_default_bus_bypass_iommu(Object *obj, bool value, + Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + vms->default_bus_bypass_iommu = value; +} + static CpuInstanceProperties virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) { @@ -2610,7 +2692,9 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM); +#ifdef CONFIG_TPM machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); +#endif mc->block_default_type = IF_VIRTIO; mc->no_cdrom = 1; mc->pci_allow_0_address = true; @@ -2671,6 +2755,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) "Set the IOMMU type. " "Valid values are none and smmuv3"); + object_class_property_add_bool(oc, "default-bus-bypass-iommu", + virt_get_default_bus_bypass_iommu, + virt_set_default_bus_bypass_iommu); + object_class_property_set_description(oc, "default-bus-bypass-iommu", + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + object_class_property_add_bool(oc, "ras", virt_get_ras, virt_set_ras); object_class_property_set_description(oc, "ras", @@ -2733,11 +2824,20 @@ static void virt_instance_init(Object *obj) } else { /* Default allows ITS instantiation */ vms->its = true; + + if (vmc->no_tcg_its) { + vms->tcg_its = false; + } else { + vms->tcg_its = true; + } } /* Default disallows iommu instantiation */ vms->iommu = VIRT_IOMMU_NONE; + /* The default root bus is attached to iommu by default */ + vms->default_bus_bypass_iommu = false; + /* Default disallows RAS instantiation */ vms->ras = false; @@ -2772,10 +2872,31 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_6_2_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(6, 2) + +static void virt_machine_6_1_options(MachineClass *mc) +{ + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + + virt_machine_6_2_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len); + mc->smp_props.prefer_sockets = true; + vmc->no_cpu_topology = true; + + /* qemu ITS was introduced with 6.2 */ + vmc->no_tcg_its = true; +} +DEFINE_VIRT_MACHINE(6, 1) + static void virt_machine_6_0_options(MachineClass *mc) { + virt_machine_6_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); } -DEFINE_VIRT_MACHINE_AS_LATEST(6, 0) +DEFINE_VIRT_MACHINE(6, 0) static void virt_machine_5_2_options(MachineClass *mc) { diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c index 8db6cfd47f5..69c333e91b1 100644 --- a/hw/arm/xilinx_zynq.c +++ b/hw/arm/xilinx_zynq.c @@ -22,12 +22,11 @@ #include "hw/sysbus.h" #include "hw/arm/boot.h" #include "net/net.h" -#include "exec/address-spaces.h" #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/block/flash.h" #include "hw/loader.h" -#include "hw/misc/zynq-xadc.h" +#include "hw/adc/zynq-xadc.h" #include "hw/ssi/ssi.h" #include "hw/usb/chipidea.h" #include "qemu/error-report.h" @@ -119,7 +118,7 @@ static void gem_init(NICInfo *nd, uint32_t base, qemu_irq irq) qemu_check_nic_model(nd, TYPE_CADENCE_GEM); qdev_set_nic_properties(dev, nd); } - object_property_set_int(OBJECT(dev), "phy-addr", 23, &error_abort); + object_property_set_int(OBJECT(dev), "phy-addr", 7, &error_abort); s = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(s, &error_fatal); sysbus_mmio_map(s, 0, base); @@ -313,6 +312,9 @@ static void zynq_init(MachineState *machine) sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[39-IRQ_OFFSET]); dev = qdev_new("pl330"); + object_property_set_link(OBJECT(dev), "memory", + OBJECT(address_space_mem), + &error_fatal); qdev_prop_set_uint8(dev, "num_chnls", 8); qdev_prop_set_uint8(dev, "num_periph_req", 4); qdev_prop_set_uint8(dev, "num_events", 16); diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c index 8482cd61960..d2f55e29b64 100644 --- a/hw/arm/xlnx-versal-virt.c +++ b/hw/arm/xlnx-versal-virt.c @@ -10,11 +10,9 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "sysemu/device_tree.h" -#include "exec/address-spaces.h" #include "hw/boards.h" #include "hw/sysbus.h" #include "hw/arm/sysbus-fdt.h" @@ -358,6 +356,61 @@ static void fdt_add_rtc_node(VersalVirt *s) g_free(name); } +static void fdt_add_bbram_node(VersalVirt *s) +{ + const char compat[] = TYPE_XLNX_BBRAM; + const char interrupt_names[] = "bbram-error"; + char *name = g_strdup_printf("/bbram@%x", MM_PMC_BBRAM_CTRL); + + qemu_fdt_add_subnode(s->fdt, name); + + qemu_fdt_setprop_cells(s->fdt, name, "interrupts", + GIC_FDT_IRQ_TYPE_SPI, VERSAL_BBRAM_APB_IRQ_0, + GIC_FDT_IRQ_FLAGS_LEVEL_HI); + qemu_fdt_setprop(s->fdt, name, "interrupt-names", + interrupt_names, sizeof(interrupt_names)); + qemu_fdt_setprop_sized_cells(s->fdt, name, "reg", + 2, MM_PMC_BBRAM_CTRL, + 2, MM_PMC_BBRAM_CTRL_SIZE); + qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat)); + g_free(name); +} + +static void fdt_add_efuse_ctrl_node(VersalVirt *s) +{ + const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CTRL; + const char interrupt_names[] = "pmc_efuse"; + char *name = g_strdup_printf("/pmc_efuse@%x", MM_PMC_EFUSE_CTRL); + + qemu_fdt_add_subnode(s->fdt, name); + + qemu_fdt_setprop_cells(s->fdt, name, "interrupts", + GIC_FDT_IRQ_TYPE_SPI, VERSAL_EFUSE_IRQ, + GIC_FDT_IRQ_FLAGS_LEVEL_HI); + qemu_fdt_setprop(s->fdt, name, "interrupt-names", + interrupt_names, sizeof(interrupt_names)); + qemu_fdt_setprop_sized_cells(s->fdt, name, "reg", + 2, MM_PMC_EFUSE_CTRL, + 2, MM_PMC_EFUSE_CTRL_SIZE); + qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat)); + g_free(name); +} + +static void fdt_add_efuse_cache_node(VersalVirt *s) +{ + const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CACHE; + char *name = g_strdup_printf("/xlnx_pmc_efuse_cache@%x", + MM_PMC_EFUSE_CACHE); + + qemu_fdt_add_subnode(s->fdt, name); + + qemu_fdt_setprop_sized_cells(s->fdt, name, "reg", + 2, MM_PMC_EFUSE_CACHE, + 2, MM_PMC_EFUSE_CACHE_SIZE); + qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat)); + g_free(name); +} + static void fdt_nop_memory_nodes(void *fdt, Error **errp) { Error *err = NULL; @@ -512,6 +565,30 @@ static void create_virtio_regions(VersalVirt *s) } } +static void bbram_attach_drive(XlnxBBRam *dev) +{ + DriveInfo *dinfo; + BlockBackend *blk; + + dinfo = drive_get_by_index(IF_PFLASH, 0); + blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL; + if (blk) { + qdev_prop_set_drive(DEVICE(dev), "drive", blk); + } +} + +static void efuse_attach_drive(XlnxEFuse *dev) +{ + DriveInfo *dinfo; + BlockBackend *blk; + + dinfo = drive_get_by_index(IF_PFLASH, 1); + blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL; + if (blk) { + qdev_prop_set_drive(DEVICE(dev), "drive", blk); + } +} + static void sd_plugin_card(SDHCIState *sd, DriveInfo *di) { BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL; @@ -572,6 +649,9 @@ static void versal_virt_init(MachineState *machine) fdt_add_usb_xhci_nodes(s); fdt_add_sd_nodes(s); fdt_add_rtc_node(s); + fdt_add_bbram_node(s); + fdt_add_efuse_ctrl_node(s); + fdt_add_efuse_cache_node(s); fdt_add_cpu_nodes(s, psci_conduit); fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz); fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz); @@ -581,6 +661,12 @@ static void versal_virt_init(MachineState *machine) memory_region_add_subregion_overlap(get_system_memory(), 0, &s->soc.fpd.apu.mr, 0); + /* Attach bbram backend, if given */ + bbram_attach_drive(&s->soc.pmc.bbram); + + /* Attach efuse backend, if given */ + efuse_attach_drive(&s->soc.pmc.efuse); + /* Plugin SD cards. */ for (i = 0; i < ARRAY_SIZE(s->soc.pmc.iou.sd); i++) { sd_plugin_card(&s->soc.pmc.iou.sd[i], drive_get_next(IF_SD)); diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c index 79609692e4e..b2705b6925e 100644 --- a/hw/arm/xlnx-versal.c +++ b/hw/arm/xlnx-versal.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qapi/error.h" -#include "qemu/log.h" #include "qemu/module.h" #include "hw/sysbus.h" #include "net/net.h" @@ -219,6 +218,8 @@ static void versal_create_admas(Versal *s, qemu_irq *pic) TYPE_XLNX_ZDMA); dev = DEVICE(&s->lpd.iou.adma[i]); object_property_set_int(OBJECT(dev), "bus-width", 128, &error_abort); + object_property_set_link(OBJECT(dev), "dma", + OBJECT(get_system_memory()), &error_fatal); sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal); mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); @@ -313,6 +314,61 @@ static void versal_create_xrams(Versal *s, qemu_irq *pic) } } +static void versal_create_bbram(Versal *s, qemu_irq *pic) +{ + SysBusDevice *sbd; + + object_initialize_child_with_props(OBJECT(s), "bbram", &s->pmc.bbram, + sizeof(s->pmc.bbram), TYPE_XLNX_BBRAM, + &error_fatal, + "crc-zpads", "0", + NULL); + sbd = SYS_BUS_DEVICE(&s->pmc.bbram); + + sysbus_realize(sbd, &error_fatal); + memory_region_add_subregion(&s->mr_ps, MM_PMC_BBRAM_CTRL, + sysbus_mmio_get_region(sbd, 0)); + sysbus_connect_irq(sbd, 0, pic[VERSAL_BBRAM_APB_IRQ_0]); +} + +static void versal_realize_efuse_part(Versal *s, Object *dev, hwaddr base) +{ + SysBusDevice *part = SYS_BUS_DEVICE(dev); + + object_property_set_link(OBJECT(part), "efuse", + OBJECT(&s->pmc.efuse), &error_abort); + + sysbus_realize(part, &error_abort); + memory_region_add_subregion(&s->mr_ps, base, + sysbus_mmio_get_region(part, 0)); +} + +static void versal_create_efuse(Versal *s, qemu_irq *pic) +{ + Object *bits = OBJECT(&s->pmc.efuse); + Object *ctrl = OBJECT(&s->pmc.efuse_ctrl); + Object *cache = OBJECT(&s->pmc.efuse_cache); + + object_initialize_child(OBJECT(s), "efuse-ctrl", &s->pmc.efuse_ctrl, + TYPE_XLNX_VERSAL_EFUSE_CTRL); + + object_initialize_child(OBJECT(s), "efuse-cache", &s->pmc.efuse_cache, + TYPE_XLNX_VERSAL_EFUSE_CACHE); + + object_initialize_child_with_props(ctrl, "xlnx-efuse@0", bits, + sizeof(s->pmc.efuse), + TYPE_XLNX_EFUSE, &error_abort, + "efuse-nr", "3", + "efuse-size", "8192", + NULL); + + qdev_realize(DEVICE(bits), NULL, &error_abort); + versal_realize_efuse_part(s, ctrl, MM_PMC_EFUSE_CTRL); + versal_realize_efuse_part(s, cache, MM_PMC_EFUSE_CACHE); + + sysbus_connect_irq(SYS_BUS_DEVICE(ctrl), 0, pic[VERSAL_EFUSE_IRQ]); +} + /* This takes the board allocated linear DDR memory and creates aliases * for each split DDR range/aperture on the Versal address map. */ @@ -377,6 +433,8 @@ static void versal_unimp(Versal *s) MM_CRL, MM_CRL_SIZE); versal_unimp_area(s, "crf", &s->mr_ps, MM_FPD_CRF, MM_FPD_CRF_SIZE); + versal_unimp_area(s, "apu", &s->mr_ps, + MM_FPD_FPD_APU, MM_FPD_FPD_APU_SIZE); versal_unimp_area(s, "crp", &s->mr_ps, MM_PMC_CRP, MM_PMC_CRP_SIZE); versal_unimp_area(s, "iou-scntr", &s->mr_ps, @@ -399,6 +457,8 @@ static void versal_realize(DeviceState *dev, Error **errp) versal_create_sds(s, pic); versal_create_rtc(s, pic); versal_create_xrams(s, pic); + versal_create_bbram(s, pic); + versal_create_efuse(s, pic); versal_map_ddr(s); versal_unimp(s); diff --git a/hw/arm/xlnx-zcu102.c b/hw/arm/xlnx-zcu102.c index a9db25eb99f..3dc2b5e8ca4 100644 --- a/hw/arm/xlnx-zcu102.c +++ b/hw/arm/xlnx-zcu102.c @@ -17,7 +17,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/arm/xlnx-zynqmp.h" #include "hw/boards.h" #include "qemu/error-report.h" @@ -99,6 +98,30 @@ static void zcu102_modify_dtb(const struct arm_boot_info *binfo, void *fdt) } } +static void bbram_attach_drive(XlnxBBRam *dev) +{ + DriveInfo *dinfo; + BlockBackend *blk; + + dinfo = drive_get_by_index(IF_PFLASH, 2); + blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL; + if (blk) { + qdev_prop_set_drive(DEVICE(dev), "drive", blk); + } +} + +static void efuse_attach_drive(XlnxEFuse *dev) +{ + DriveInfo *dinfo; + BlockBackend *blk; + + dinfo = drive_get_by_index(IF_PFLASH, 3); + blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL; + if (blk) { + qdev_prop_set_drive(DEVICE(dev), "drive", blk); + } +} + static void xlnx_zcu102_init(MachineState *machine) { XlnxZCU102 *s = ZCU102_MACHINE(machine); @@ -137,6 +160,12 @@ static void xlnx_zcu102_init(MachineState *machine) qdev_realize(DEVICE(&s->soc), NULL, &error_fatal); + /* Attach bbram backend, if given */ + bbram_attach_drive(&s->soc.bbram); + + /* Attach efuse backend, if given */ + efuse_attach_drive(&s->soc.efuse); + /* Create and plug in the SD cards */ for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) { BusState *bus; diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c index 7f01284a5c0..1c52a575aad 100644 --- a/hw/arm/xlnx-zynqmp.c +++ b/hw/arm/xlnx-zynqmp.c @@ -18,11 +18,10 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/module.h" -#include "cpu.h" #include "hw/arm/xlnx-zynqmp.h" #include "hw/intc/arm_gic_common.h" +#include "hw/misc/unimp.h" #include "hw/boards.h" -#include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" #include "kvm_arm.h" @@ -58,12 +57,21 @@ #define DPDMA_ADDR 0xfd4c0000 #define DPDMA_IRQ 116 +#define APU_ADDR 0xfd5c0000 +#define APU_SIZE 0x100 + #define IPI_ADDR 0xFF300000 #define IPI_IRQ 64 #define RTC_ADDR 0xffa60000 #define RTC_IRQ 26 +#define BBRAM_ADDR 0xffcd0000 +#define BBRAM_IRQ 11 + +#define EFUSE_ADDR 0xffcc0000 +#define EFUSE_IRQ 87 + #define SDHCI_CAPABILITIES 0x280737ec6481 /* Datasheet: UG1085 (v1.7) */ static const uint64_t gem_addr[XLNX_ZYNQMP_NUM_GEMS] = { @@ -224,6 +232,73 @@ static void xlnx_zynqmp_create_rpu(MachineState *ms, XlnxZynqMPState *s, qdev_realize(DEVICE(&s->rpu_cluster), NULL, &error_fatal); } +static void xlnx_zynqmp_create_bbram(XlnxZynqMPState *s, qemu_irq *gic) +{ + SysBusDevice *sbd; + + object_initialize_child_with_props(OBJECT(s), "bbram", &s->bbram, + sizeof(s->bbram), TYPE_XLNX_BBRAM, + &error_fatal, + "crc-zpads", "1", + NULL); + sbd = SYS_BUS_DEVICE(&s->bbram); + + sysbus_realize(sbd, &error_fatal); + sysbus_mmio_map(sbd, 0, BBRAM_ADDR); + sysbus_connect_irq(sbd, 0, gic[BBRAM_IRQ]); +} + +static void xlnx_zynqmp_create_efuse(XlnxZynqMPState *s, qemu_irq *gic) +{ + Object *bits = OBJECT(&s->efuse); + Object *ctrl = OBJECT(&s->efuse_ctrl); + SysBusDevice *sbd; + + object_initialize_child(OBJECT(s), "efuse-ctrl", &s->efuse_ctrl, + TYPE_XLNX_ZYNQMP_EFUSE); + + object_initialize_child_with_props(ctrl, "xlnx-efuse@0", bits, + sizeof(s->efuse), + TYPE_XLNX_EFUSE, &error_abort, + "efuse-nr", "3", + "efuse-size", "2048", + NULL); + + qdev_realize(DEVICE(bits), NULL, &error_abort); + object_property_set_link(ctrl, "efuse", bits, &error_abort); + + sbd = SYS_BUS_DEVICE(ctrl); + sysbus_realize(sbd, &error_abort); + sysbus_mmio_map(sbd, 0, EFUSE_ADDR); + sysbus_connect_irq(sbd, 0, gic[EFUSE_IRQ]); +} + +static void xlnx_zynqmp_create_unimp_mmio(XlnxZynqMPState *s) +{ + static const struct UnimpInfo { + const char *name; + hwaddr base; + hwaddr size; + } unimp_areas[ARRAY_SIZE(s->mr_unimp)] = { + { .name = "apu", APU_ADDR, APU_SIZE }, + }; + unsigned int nr; + + for (nr = 0; nr < ARRAY_SIZE(unimp_areas); nr++) { + const struct UnimpInfo *info = &unimp_areas[nr]; + DeviceState *dev = qdev_new(TYPE_UNIMPLEMENTED_DEVICE); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + + assert(info->name && info->base && info->size > 0); + qdev_prop_set_string(dev, "name", info->name); + qdev_prop_set_uint64(dev, "size", info->size); + object_property_add_child(OBJECT(s), info->name, OBJECT(dev)); + + sysbus_realize_and_unref(sbd, &error_fatal); + sysbus_mmio_map(sbd, 0, info->base); + } +} + static void xlnx_zynqmp_init(Object *obj) { MachineState *ms = MACHINE(qdev_get_machine()); @@ -572,26 +647,6 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) g_free(bus_name); } - if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi), errp)) { - return; - } - sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 0, QSPI_ADDR); - sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 1, LQSPI_ADDR); - sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi), 0, gic_spi[QSPI_IRQ]); - - for (i = 0; i < XLNX_ZYNQMP_NUM_QSPI_BUS; i++) { - gchar *bus_name; - gchar *target_bus; - - /* Alias controller SPI bus to the SoC itself */ - bus_name = g_strdup_printf("qspi%d", i); - target_bus = g_strdup_printf("spi%d", i); - object_property_add_alias(OBJECT(s), bus_name, - OBJECT(&s->qspi), target_bus); - g_free(bus_name); - g_free(target_bus); - } - if (!sysbus_realize(SYS_BUS_DEVICE(&s->dp), errp)) { return; } @@ -618,11 +673,19 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(&s->rtc), 0, RTC_ADDR); sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0, gic_spi[RTC_IRQ]); + xlnx_zynqmp_create_bbram(s, gic_spi); + xlnx_zynqmp_create_efuse(s, gic_spi); + xlnx_zynqmp_create_unimp_mmio(s); + for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) { if (!object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128, errp)) { return; } + if (!object_property_set_link(OBJECT(&s->gdma[i]), "dma", + OBJECT(system_memory), errp)) { + return; + } if (!sysbus_realize(SYS_BUS_DEVICE(&s->gdma[i]), errp)) { return; } @@ -633,6 +696,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) } for (i = 0; i < XLNX_ZYNQMP_NUM_ADMA_CH; i++) { + if (!object_property_set_link(OBJECT(&s->adma[i]), "dma", + OBJECT(system_memory), errp)) { + return; + } if (!sysbus_realize(SYS_BUS_DEVICE(&s->adma[i]), errp)) { return; } @@ -642,14 +709,36 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) gic_spi[adma_ch_intr[i]]); } + if (!object_property_set_link(OBJECT(&s->qspi_dma), "dma", + OBJECT(system_memory), errp)) { + return; + } if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi_dma), errp)) { return; } sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi_dma), 0, QSPI_DMA_ADDR); sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi_dma), 0, gic_spi[QSPI_IRQ]); - object_property_set_link(OBJECT(&s->qspi), "stream-connected-dma", - OBJECT(&s->qspi_dma), errp); + + if (!object_property_set_link(OBJECT(&s->qspi), "stream-connected-dma", + OBJECT(&s->qspi_dma), errp)) { + return; + } + if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 0, QSPI_ADDR); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 1, LQSPI_ADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi), 0, gic_spi[QSPI_IRQ]); + + for (i = 0; i < XLNX_ZYNQMP_NUM_QSPI_BUS; i++) { + g_autofree gchar *bus_name = g_strdup_printf("qspi%d", i); + g_autofree gchar *target_bus = g_strdup_printf("spi%d", i); + + /* Alias controller SPI bus to the SoC itself */ + object_property_add_alias(OBJECT(s), bus_name, + OBJECT(&s->qspi), target_bus); + } } static Property xlnx_zynqmp_props[] = { diff --git a/hw/arm/z2.c b/hw/arm/z2.c index 5099bd83802..9c1e876207b 100644 --- a/hw/arm/z2.c +++ b/hw/arm/z2.c @@ -162,7 +162,7 @@ static void zipit_lcd_realize(SSIPeripheral *dev, Error **errp) z->pos = 0; } -static VMStateDescription vmstate_zipit_lcd_state = { +static const VMStateDescription vmstate_zipit_lcd_state = { .name = "zipit-lcd", .version_id = 2, .minimum_version_id = 2, @@ -268,7 +268,7 @@ static uint8_t aer915_recv(I2CSlave *slave) return retval; } -static VMStateDescription vmstate_aer915_state = { +static const VMStateDescription vmstate_aer915_state = { .name = "aer915", .version_id = 1, .minimum_version_id = 1, diff --git a/hw/audio/adlib.c b/hw/audio/adlib.c index 42d50d2fdc4..5f979b1487d 100644 --- a/hw/audio/adlib.c +++ b/hw/audio/adlib.c @@ -186,7 +186,7 @@ static int write_audio (AdlibState *s, int samples) static void adlib_callback (void *opaque, int free) { AdlibState *s = opaque; - int samples, net = 0, to_play, written; + int samples, to_play, written; samples = free >> SHIFT; if (!(s->active && s->enabled) || !samples) { @@ -219,7 +219,6 @@ static void adlib_callback (void *opaque, int free) written = write_audio (s, samples); if (written) { - net += written; samples -= written; s->pos = (s->pos + written) % s->samples; } diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c index 4330213fff1..8ce9df64e3e 100644 --- a/hw/audio/intel-hda.c +++ b/hw/audio/intel-hda.c @@ -52,7 +52,7 @@ void hda_codec_bus_init(DeviceState *dev, HDACodecBus *bus, size_t bus_size, hda_codec_response_func response, hda_codec_xfer_func xfer) { - qbus_create_inplace(bus, bus_size, TYPE_HDA_BUS, dev, NULL); + qbus_init(bus, bus_size, TYPE_HDA_BUS, dev, NULL); bus->response = response; bus->xfer = xfer; } diff --git a/hw/audio/meson.build b/hw/audio/meson.build index 32c42bdebe4..e48a9fc73d5 100644 --- a/hw/audio/meson.build +++ b/hw/audio/meson.build @@ -7,7 +7,6 @@ softmmu_ss.add(when: 'CONFIG_ES1370', if_true: files('es1370.c')) softmmu_ss.add(when: 'CONFIG_GUS', if_true: files('gus.c', 'gusemu_hal.c', 'gusemu_mixer.c')) softmmu_ss.add(when: 'CONFIG_HDA', if_true: files('intel-hda.c', 'hda-codec.c')) softmmu_ss.add(when: 'CONFIG_MARVELL_88W8618', if_true: files('marvell_88w8618.c')) -softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-ac97.c')) softmmu_ss.add(when: 'CONFIG_PCSPK', if_true: files('pcspk.c')) softmmu_ss.add(when: 'CONFIG_PL041', if_true: files('pl041.c', 'lm4549.c')) softmmu_ss.add(when: 'CONFIG_SB16', if_true: files('sb16.c')) diff --git a/hw/audio/milkymist-ac97.c b/hw/audio/milkymist-ac97.c deleted file mode 100644 index 7d2e0570380..00000000000 --- a/hw/audio/milkymist-ac97.c +++ /dev/null @@ -1,360 +0,0 @@ -/* - * QEMU model of the Milkymist System Controller. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/ac97.pdf - */ - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "audio/audio.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -enum { - R_AC97_CTRL = 0, - R_AC97_ADDR, - R_AC97_DATAOUT, - R_AC97_DATAIN, - R_D_CTRL, - R_D_ADDR, - R_D_REMAINING, - R_RESERVED, - R_U_CTRL, - R_U_ADDR, - R_U_REMAINING, - R_MAX -}; - -enum { - AC97_CTRL_RQEN = (1<<0), - AC97_CTRL_WRITE = (1<<1), -}; - -enum { - CTRL_EN = (1<<0), -}; - -#define TYPE_MILKYMIST_AC97 "milkymist-ac97" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistAC97State, MILKYMIST_AC97) - -struct MilkymistAC97State { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - - QEMUSoundCard card; - SWVoiceIn *voice_in; - SWVoiceOut *voice_out; - - uint32_t regs[R_MAX]; - - qemu_irq crrequest_irq; - qemu_irq crreply_irq; - qemu_irq dmar_irq; - qemu_irq dmaw_irq; -}; - -static void update_voices(MilkymistAC97State *s) -{ - if (s->regs[R_D_CTRL] & CTRL_EN) { - AUD_set_active_out(s->voice_out, 1); - } else { - AUD_set_active_out(s->voice_out, 0); - } - - if (s->regs[R_U_CTRL] & CTRL_EN) { - AUD_set_active_in(s->voice_in, 1); - } else { - AUD_set_active_in(s->voice_in, 0); - } -} - -static uint64_t ac97_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistAC97State *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_AC97_CTRL: - case R_AC97_ADDR: - case R_AC97_DATAOUT: - case R_AC97_DATAIN: - case R_D_CTRL: - case R_D_ADDR: - case R_D_REMAINING: - case R_U_CTRL: - case R_U_ADDR: - case R_U_REMAINING: - r = s->regs[addr]; - break; - - default: - error_report("milkymist_ac97: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_ac97_memory_read(addr << 2, r); - - return r; -} - -static void ac97_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistAC97State *s = opaque; - - trace_milkymist_ac97_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_AC97_CTRL: - /* always raise an IRQ according to the direction */ - if (value & AC97_CTRL_RQEN) { - if (value & AC97_CTRL_WRITE) { - trace_milkymist_ac97_pulse_irq_crrequest(); - qemu_irq_pulse(s->crrequest_irq); - } else { - trace_milkymist_ac97_pulse_irq_crreply(); - qemu_irq_pulse(s->crreply_irq); - } - } - - /* RQEN is self clearing */ - s->regs[addr] = value & ~AC97_CTRL_RQEN; - break; - case R_D_CTRL: - case R_U_CTRL: - s->regs[addr] = value; - update_voices(s); - break; - case R_AC97_ADDR: - case R_AC97_DATAOUT: - case R_AC97_DATAIN: - case R_D_ADDR: - case R_D_REMAINING: - case R_U_ADDR: - case R_U_REMAINING: - s->regs[addr] = value; - break; - - default: - error_report("milkymist_ac97: write access to unknown register 0x" - TARGET_FMT_plx, addr); - break; - } - -} - -static const MemoryRegionOps ac97_mmio_ops = { - .read = ac97_read, - .write = ac97_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void ac97_in_cb(void *opaque, int avail_b) -{ - MilkymistAC97State *s = opaque; - uint8_t buf[4096]; - uint32_t remaining = s->regs[R_U_REMAINING]; - int temp = MIN(remaining, avail_b); - uint32_t addr = s->regs[R_U_ADDR]; - int transferred = 0; - - trace_milkymist_ac97_in_cb(avail_b, remaining); - - /* prevent from raising an IRQ */ - if (temp == 0) { - return; - } - - while (temp) { - int acquired, to_copy; - - to_copy = MIN(temp, sizeof(buf)); - acquired = AUD_read(s->voice_in, buf, to_copy); - if (!acquired) { - break; - } - - cpu_physical_memory_write(addr, buf, acquired); - - temp -= acquired; - addr += acquired; - transferred += acquired; - } - - trace_milkymist_ac97_in_cb_transferred(transferred); - - s->regs[R_U_ADDR] = addr; - s->regs[R_U_REMAINING] -= transferred; - - if ((s->regs[R_U_CTRL] & CTRL_EN) && (s->regs[R_U_REMAINING] == 0)) { - trace_milkymist_ac97_pulse_irq_dmaw(); - qemu_irq_pulse(s->dmaw_irq); - } -} - -static void ac97_out_cb(void *opaque, int free_b) -{ - MilkymistAC97State *s = opaque; - uint8_t buf[4096]; - uint32_t remaining = s->regs[R_D_REMAINING]; - int temp = MIN(remaining, free_b); - uint32_t addr = s->regs[R_D_ADDR]; - int transferred = 0; - - trace_milkymist_ac97_out_cb(free_b, remaining); - - /* prevent from raising an IRQ */ - if (temp == 0) { - return; - } - - while (temp) { - int copied, to_copy; - - to_copy = MIN(temp, sizeof(buf)); - cpu_physical_memory_read(addr, buf, to_copy); - copied = AUD_write(s->voice_out, buf, to_copy); - if (!copied) { - break; - } - temp -= copied; - addr += copied; - transferred += copied; - } - - trace_milkymist_ac97_out_cb_transferred(transferred); - - s->regs[R_D_ADDR] = addr; - s->regs[R_D_REMAINING] -= transferred; - - if ((s->regs[R_D_CTRL] & CTRL_EN) && (s->regs[R_D_REMAINING] == 0)) { - trace_milkymist_ac97_pulse_irq_dmar(); - qemu_irq_pulse(s->dmar_irq); - } -} - -static void milkymist_ac97_reset(DeviceState *d) -{ - MilkymistAC97State *s = MILKYMIST_AC97(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - - AUD_set_active_in(s->voice_in, 0); - AUD_set_active_out(s->voice_out, 0); -} - -static int ac97_post_load(void *opaque, int version_id) -{ - MilkymistAC97State *s = opaque; - - update_voices(s); - - return 0; -} - -static void milkymist_ac97_init(Object *obj) -{ - MilkymistAC97State *s = MILKYMIST_AC97(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); - - sysbus_init_irq(dev, &s->crrequest_irq); - sysbus_init_irq(dev, &s->crreply_irq); - sysbus_init_irq(dev, &s->dmar_irq); - sysbus_init_irq(dev, &s->dmaw_irq); - - memory_region_init_io(&s->regs_region, obj, &ac97_mmio_ops, s, - "milkymist-ac97", R_MAX * 4); - sysbus_init_mmio(dev, &s->regs_region); -} - -static void milkymist_ac97_realize(DeviceState *dev, Error **errp) -{ - MilkymistAC97State *s = MILKYMIST_AC97(dev); - struct audsettings as; - - AUD_register_card("Milkymist AC'97", &s->card); - - as.freq = 48000; - as.nchannels = 2; - as.fmt = AUDIO_FORMAT_S16; - as.endianness = 1; - - s->voice_in = AUD_open_in(&s->card, s->voice_in, - "mm_ac97.in", s, ac97_in_cb, &as); - s->voice_out = AUD_open_out(&s->card, s->voice_out, - "mm_ac97.out", s, ac97_out_cb, &as); -} - -static const VMStateDescription vmstate_milkymist_ac97 = { - .name = "milkymist-ac97", - .version_id = 1, - .minimum_version_id = 1, - .post_load = ac97_post_load, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistAC97State, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static Property milkymist_ac97_properties[] = { - DEFINE_AUDIO_PROPERTIES(MilkymistAC97State, card), - DEFINE_PROP_END_OF_LIST(), -}; - -static void milkymist_ac97_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_ac97_realize; - dc->reset = milkymist_ac97_reset; - dc->vmsd = &vmstate_milkymist_ac97; - device_class_set_props(dc, milkymist_ac97_properties); -} - -static const TypeInfo milkymist_ac97_info = { - .name = TYPE_MILKYMIST_AC97, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistAC97State), - .instance_init = milkymist_ac97_init, - .class_init = milkymist_ac97_class_init, -}; - -static void milkymist_ac97_register_types(void) -{ - type_register_static(&milkymist_ac97_info); -} - -type_init(milkymist_ac97_register_types) diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c index 8b207004102..60f1f75e3ac 100644 --- a/hw/audio/sb16.c +++ b/hw/audio/sb16.c @@ -115,6 +115,9 @@ struct SB16State { PortioList portio_list; }; +#define SAMPLE_RATE_MIN 5000 +#define SAMPLE_RATE_MAX 45000 + static void SB_audio_callback (void *opaque, int free); static int magic_of_irq (int irq) @@ -226,6 +229,23 @@ static void continue_dma8 (SB16State *s) control (s, 1); } +static inline int restrict_sampling_rate(int freq) +{ + if (freq < SAMPLE_RATE_MIN) { + qemu_log_mask(LOG_GUEST_ERROR, + "sampling range too low: %d, increasing to %u\n", + freq, SAMPLE_RATE_MIN); + return SAMPLE_RATE_MIN; + } else if (freq > SAMPLE_RATE_MAX) { + qemu_log_mask(LOG_GUEST_ERROR, + "sampling range too high: %d, decreasing to %u\n", + freq, SAMPLE_RATE_MAX); + return SAMPLE_RATE_MAX; + } else { + return freq; + } +} + static void dma_cmd8 (SB16State *s, int mask, int dma_len) { s->fmt = AUDIO_FORMAT_U8; @@ -241,6 +261,7 @@ static void dma_cmd8 (SB16State *s, int mask, int dma_len) int tmp = (256 - s->time_const); s->freq = (1000000 + (tmp / 2)) / tmp; } + s->freq = restrict_sampling_rate(s->freq); if (dma_len != -1) { s->block_size = dma_len << s->fmt_stereo; @@ -754,7 +775,7 @@ static void complete (SB16State *s) * and FT2 sets output freq with this (go figure). Compare: * http://homepages.cae.wisc.edu/~brodskye/sb16doc/sb16doc.html#SamplingRate */ - s->freq = dsp_get_hilo (s); + s->freq = restrict_sampling_rate(dsp_get_hilo(s)); ldebug ("set freq %d\n", s->freq); break; diff --git a/hw/audio/trace-events b/hw/audio/trace-events index 60556b4a979..e0e71cd9b16 100644 --- a/hw/audio/trace-events +++ b/hw/audio/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # cs4231.c cs4231_mem_readl_dreg(uint32_t reg, uint32_t ret) "read dreg %d: 0x%02x" @@ -6,18 +6,6 @@ cs4231_mem_readl_reg(uint32_t reg, uint32_t ret) "read reg %d: 0x%08x" cs4231_mem_writel_reg(uint32_t reg, uint32_t old, uint32_t val) "write reg %d: 0x%08x -> 0x%08x" cs4231_mem_writel_dreg(uint32_t reg, uint32_t old, uint32_t val) "write dreg %d: 0x%02x -> 0x%02x" -# milkymist-ac97.c -milkymist_ac97_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_ac97_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_ac97_pulse_irq_crrequest(void) "Pulse IRQ CR request" -milkymist_ac97_pulse_irq_crreply(void) "Pulse IRQ CR reply" -milkymist_ac97_pulse_irq_dmaw(void) "Pulse IRQ DMA write" -milkymist_ac97_pulse_irq_dmar(void) "Pulse IRQ DMA read" -milkymist_ac97_in_cb(int avail, uint32_t remaining) "avail %d remaining %u" -milkymist_ac97_in_cb_transferred(int transferred) "transferred %d" -milkymist_ac97_out_cb(int free, uint32_t remaining) "free %d remaining %u" -milkymist_ac97_out_cb_transferred(int transferred) "transferred %d" - # hda-codec.c hda_audio_running(const char *stream, int nr, bool running) "st %s, nr %d, run %d" hda_audio_format(const char *stream, int chan, const char *fmt, int freq) "st %s, %d x %s @ %d Hz" diff --git a/hw/avr/arduino.c b/hw/avr/arduino.c index 3ff31492fa6..48ef478346e 100644 --- a/hw/avr/arduino.c +++ b/hw/avr/arduino.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "hw/boards.h" #include "atmega.h" #include "boot.h" #include "qom/object.h" diff --git a/hw/avr/atmega.c b/hw/avr/atmega.c index 44c6afebbb6..0608e2d475e 100644 --- a/hw/avr/atmega.c +++ b/hw/avr/atmega.c @@ -18,7 +18,6 @@ #include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "qom/object.h" -#include "hw/boards.h" /* FIXME memory_region_allocate_system_memory for sram */ #include "hw/misc/unimp.h" #include "atmega.h" @@ -402,7 +401,7 @@ static void atmega1280_class_init(ObjectClass *oc, void *data) { AtmegaMcuClass *amc = ATMEGA_MCU_CLASS(oc); - amc->cpu_type = AVR_CPU_TYPE_NAME("avr6"); + amc->cpu_type = AVR_CPU_TYPE_NAME("avr51"); amc->flash_size = 128 * KiB; amc->eeprom_size = 4 * KiB; amc->sram_size = 8 * KiB; diff --git a/hw/block/Kconfig b/hw/block/Kconfig index 4fcd1521668..9e8f28f9824 100644 --- a/hw/block/Kconfig +++ b/hw/block/Kconfig @@ -1,8 +1,14 @@ config FDC bool - # FIXME: there is no separate file for the MMIO floppy disk controller, so - # select ISA_BUS here instead of polluting each board that requires one - select ISA_BUS + +config FDC_ISA + bool + depends on ISA_BUS + select FDC + +config FDC_SYSBUS + bool + select FDC config SSI_M25P80 bool @@ -25,11 +31,6 @@ config ONENAND config TC58128 bool -config NVME_PCI - bool - default y if PCI_DEVICES - depends on PCI - config VIRTIO_BLK bool default y diff --git a/hw/block/block.c b/hw/block/block.c index 1e34573da71..d47ebf005ad 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -65,24 +65,58 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp) { BlockBackend *blk = conf->blk; BlockSizes blocksizes; - int backend_ret; + BlockDriverState *bs; + bool use_blocksizes; + bool use_bs; + + switch (conf->backend_defaults) { + case ON_OFF_AUTO_AUTO: + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); + use_bs = false; + break; + + case ON_OFF_AUTO_ON: + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); + bs = blk_bs(blk); + use_bs = bs; + break; + + case ON_OFF_AUTO_OFF: + use_blocksizes = false; + use_bs = false; + break; + + default: + abort(); + } - backend_ret = blk_probe_blocksizes(blk, &blocksizes); /* fill in detected values if they are not defined via qemu command line */ if (!conf->physical_block_size) { - if (!backend_ret) { + if (use_blocksizes) { conf->physical_block_size = blocksizes.phys; } else { conf->physical_block_size = BDRV_SECTOR_SIZE; } } if (!conf->logical_block_size) { - if (!backend_ret) { + if (use_blocksizes) { conf->logical_block_size = blocksizes.log; } else { conf->logical_block_size = BDRV_SECTOR_SIZE; } } + if (use_bs) { + if (!conf->opt_io_size) { + conf->opt_io_size = bs->bl.opt_transfer; + } + if (conf->discard_granularity == -1) { + if (bs->bl.pdiscard_alignment) { + conf->discard_granularity = bs->bl.pdiscard_alignment; + } else if (bs->bl.request_alignment != 1) { + conf->discard_granularity = bs->bl.request_alignment; + } + } + } if (conf->logical_block_size > conf->physical_block_size) { error_setg(errp, diff --git a/hw/block/dataplane/trace-events b/hw/block/dataplane/trace-events index 843cc4e7b16..38fc3e75071 100644 --- a/hw/block/dataplane/trace-events +++ b/hw/block/dataplane/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # virtio-blk.c virtio_blk_data_plane_start(void *s) "dataplane %p" diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index e9050c8987e..ee5a5352dc8 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -198,19 +198,38 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) goto fail_guest_notifiers; } + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + /* Set up virtqueue notify */ for (i = 0; i < nvqs; i++) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); if (r != 0) { + int j = i; + fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); while (i--) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } - goto fail_guest_notifiers; + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + while (j--) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); + } + goto fail_host_notifiers; } } + memory_region_transaction_commit(); + s->starting = false; vblk->dataplane_started = true; trace_virtio_blk_data_plane_start(s); @@ -221,7 +240,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) aio_context_release(old_context); if (r < 0) { error_report_err(local_err); - goto fail_guest_notifiers; + goto fail_aio_context; } /* Process queued requests before the ones in vring */ @@ -245,6 +264,20 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) aio_context_release(s->ctx); return 0; + fail_aio_context: + memory_region_transaction_begin(); + + for (i = 0; i < nvqs; i++) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + } + + memory_region_transaction_commit(); + + for (i = 0; i < nvqs; i++) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + fail_host_notifiers: + k->set_guest_notifiers(qbus->parent, nvqs, false); fail_guest_notifiers: /* * If we failed to set up the guest notifiers queued requests will be @@ -305,8 +338,23 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) aio_context_release(s->ctx); + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + for (i = 0; i < nvqs; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + for (i = 0; i < nvqs; i++) { virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } diff --git a/hw/block/ecc.c b/hw/block/ecc.c index 1a182367ee6..6e0d63842c1 100644 --- a/hw/block/ecc.c +++ b/hw/block/ecc.c @@ -78,7 +78,7 @@ void ecc_reset(ECCState *s) } /* Save/restore */ -VMStateDescription vmstate_ecc_state = { +const VMStateDescription vmstate_ecc_state = { .name = "ecc-state", .version_id = 0, .minimum_version_id = 0, diff --git a/hw/block/fdc-internal.h b/hw/block/fdc-internal.h new file mode 100644 index 00000000000..036392e9fc1 --- /dev/null +++ b/hw/block/fdc-internal.h @@ -0,0 +1,158 @@ +/* + * QEMU Floppy disk emulator (Intel 82078) + * + * Copyright (c) 2003, 2007 Jocelyn Mayer + * Copyright (c) 2008 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef HW_BLOCK_FDC_INTERNAL_H +#define HW_BLOCK_FDC_INTERNAL_H + +#include "exec/memory.h" +#include "exec/ioport.h" +#include "hw/block/block.h" +#include "hw/block/fdc.h" +#include "qapi/qapi-types-block.h" + +typedef struct FDCtrl FDCtrl; + +/* Floppy bus emulation */ + +typedef struct FloppyBus { + BusState bus; + FDCtrl *fdc; +} FloppyBus; + +/* Floppy disk drive emulation */ + +typedef enum FDriveRate { + FDRIVE_RATE_500K = 0x00, /* 500 Kbps */ + FDRIVE_RATE_300K = 0x01, /* 300 Kbps */ + FDRIVE_RATE_250K = 0x02, /* 250 Kbps */ + FDRIVE_RATE_1M = 0x03, /* 1 Mbps */ +} FDriveRate; + +typedef enum FDriveSize { + FDRIVE_SIZE_UNKNOWN, + FDRIVE_SIZE_350, + FDRIVE_SIZE_525, +} FDriveSize; + +typedef struct FDFormat { + FloppyDriveType drive; + uint8_t last_sect; + uint8_t max_track; + uint8_t max_head; + FDriveRate rate; +} FDFormat; + +typedef enum FDiskFlags { + FDISK_DBL_SIDES = 0x01, +} FDiskFlags; + +typedef struct FDrive { + FDCtrl *fdctrl; + BlockBackend *blk; + BlockConf *conf; + /* Drive status */ + FloppyDriveType drive; /* CMOS drive type */ + uint8_t perpendicular; /* 2.88 MB access mode */ + /* Position */ + uint8_t head; + uint8_t track; + uint8_t sect; + /* Media */ + FloppyDriveType disk; /* Current disk type */ + FDiskFlags flags; + uint8_t last_sect; /* Nb sector per track */ + uint8_t max_track; /* Nb of tracks */ + uint16_t bps; /* Bytes per sector */ + uint8_t ro; /* Is read-only */ + uint8_t media_changed; /* Is media changed */ + uint8_t media_rate; /* Data rate of medium */ + + bool media_validated; /* Have we validated the media? */ +} FDrive; + +struct FDCtrl { + MemoryRegion iomem; + qemu_irq irq; + /* Controller state */ + QEMUTimer *result_timer; + int dma_chann; + uint8_t phase; + IsaDma *dma; + /* Controller's identification */ + uint8_t version; + /* HW */ + uint8_t sra; + uint8_t srb; + uint8_t dor; + uint8_t dor_vmstate; /* only used as temp during vmstate */ + uint8_t tdr; + uint8_t dsr; + uint8_t msr; + uint8_t cur_drv; + uint8_t status0; + uint8_t status1; + uint8_t status2; + /* Command FIFO */ + uint8_t *fifo; + int32_t fifo_size; + uint32_t data_pos; + uint32_t data_len; + uint8_t data_state; + uint8_t data_dir; + uint8_t eot; /* last wanted sector */ + /* States kept only to be returned back */ + /* precompensation */ + uint8_t precomp_trk; + uint8_t config; + uint8_t lock; + /* Power down config (also with status regB access mode */ + uint8_t pwrd; + /* Floppy drives */ + FloppyBus bus; + uint8_t num_floppies; + FDrive drives[MAX_FD]; + struct { + FloppyDriveType type; + } qdev_for_drives[MAX_FD]; + int reset_sensei; + FloppyDriveType fallback; /* type=auto failure fallback */ + /* Timers state */ + uint8_t timer0; + uint8_t timer1; + PortioList portio_list; +}; + +extern const FDFormat fd_formats[]; +extern const VMStateDescription vmstate_fdc; + +uint32_t fdctrl_read(void *opaque, uint32_t reg); +void fdctrl_write(void *opaque, uint32_t reg, uint32_t value); +void fdctrl_reset(FDCtrl *fdctrl, int do_irq); +void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp); + +int fdctrl_transfer_handler(void *opaque, int nchan, int dma_pos, int dma_len); + +void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds); + +#endif diff --git a/hw/block/fdc-isa.c b/hw/block/fdc-isa.c new file mode 100644 index 00000000000..3bf64e06657 --- /dev/null +++ b/hw/block/fdc-isa.c @@ -0,0 +1,320 @@ +/* + * QEMU Floppy disk emulator (Intel 82078) + * + * Copyright (c) 2003, 2007 Jocelyn Mayer + * Copyright (c) 2008 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/* + * The controller is used in Sun4m systems in a slightly different + * way. There are changes in DOR register and DMA is not available. + */ + +#include "qemu/osdep.h" +#include "hw/block/fdc.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/timer.h" +#include "hw/acpi/aml-build.h" +#include "hw/irq.h" +#include "hw/isa/isa.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "migration/vmstate.h" +#include "hw/block/block.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "sysemu/sysemu.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "trace.h" +#include "qom/object.h" +#include "fdc-internal.h" + +OBJECT_DECLARE_SIMPLE_TYPE(FDCtrlISABus, ISA_FDC) + +struct FDCtrlISABus { + /*< private >*/ + ISADevice parent_obj; + /*< public >*/ + + uint32_t iobase; + uint32_t irq; + uint32_t dma; + struct FDCtrl state; + int32_t bootindexA; + int32_t bootindexB; +}; + +static void fdctrl_external_reset_isa(DeviceState *d) +{ + FDCtrlISABus *isa = ISA_FDC(d); + FDCtrl *s = &isa->state; + + fdctrl_reset(s, 0); +} + +void isa_fdc_init_drives(ISADevice *fdc, DriveInfo **fds) +{ + fdctrl_init_drives(&ISA_FDC(fdc)->state.bus, fds); +} + +static const MemoryRegionPortio fdc_portio_list[] = { + { 1, 5, 1, .read = fdctrl_read, .write = fdctrl_write }, + { 7, 1, 1, .read = fdctrl_read, .write = fdctrl_write }, + PORTIO_END_OF_LIST(), +}; + +static void isabus_fdc_realize(DeviceState *dev, Error **errp) +{ + ISADevice *isadev = ISA_DEVICE(dev); + FDCtrlISABus *isa = ISA_FDC(dev); + FDCtrl *fdctrl = &isa->state; + Error *err = NULL; + + isa_register_portio_list(isadev, &fdctrl->portio_list, + isa->iobase, fdc_portio_list, fdctrl, + "fdc"); + + isa_init_irq(isadev, &fdctrl->irq, isa->irq); + fdctrl->dma_chann = isa->dma; + if (fdctrl->dma_chann != -1) { + IsaDmaClass *k; + fdctrl->dma = isa_get_dma(isa_bus_from_device(isadev), isa->dma); + if (!fdctrl->dma) { + error_setg(errp, "ISA controller does not support DMA"); + return; + } + k = ISADMA_GET_CLASS(fdctrl->dma); + k->register_channel(fdctrl->dma, fdctrl->dma_chann, + &fdctrl_transfer_handler, fdctrl); + } + + qdev_set_legacy_instance_id(dev, isa->iobase, 2); + + fdctrl_realize_common(dev, fdctrl, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } +} + +FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i) +{ + FDCtrlISABus *isa = ISA_FDC(fdc); + + return isa->state.drives[i].drive; +} + +static void isa_fdc_get_drive_max_chs(FloppyDriveType type, uint8_t *maxc, + uint8_t *maxh, uint8_t *maxs) +{ + const FDFormat *fdf; + + *maxc = *maxh = *maxs = 0; + for (fdf = fd_formats; fdf->drive != FLOPPY_DRIVE_TYPE_NONE; fdf++) { + if (fdf->drive != type) { + continue; + } + if (*maxc < fdf->max_track) { + *maxc = fdf->max_track; + } + if (*maxh < fdf->max_head) { + *maxh = fdf->max_head; + } + if (*maxs < fdf->last_sect) { + *maxs = fdf->last_sect; + } + } + (*maxc)--; +} + +static Aml *build_fdinfo_aml(int idx, FloppyDriveType type) +{ + Aml *dev, *fdi; + uint8_t maxc, maxh, maxs; + + isa_fdc_get_drive_max_chs(type, &maxc, &maxh, &maxs); + + dev = aml_device("FLP%c", 'A' + idx); + + aml_append(dev, aml_name_decl("_ADR", aml_int(idx))); + + fdi = aml_package(16); + aml_append(fdi, aml_int(idx)); /* Drive Number */ + aml_append(fdi, + aml_int(cmos_get_fd_drive_type(type))); /* Device Type */ + /* + * the values below are the limits of the drive, and are thus independent + * of the inserted media + */ + aml_append(fdi, aml_int(maxc)); /* Maximum Cylinder Number */ + aml_append(fdi, aml_int(maxs)); /* Maximum Sector Number */ + aml_append(fdi, aml_int(maxh)); /* Maximum Head Number */ + /* + * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of + * the drive type, so shall we + */ + aml_append(fdi, aml_int(0xAF)); /* disk_specify_1 */ + aml_append(fdi, aml_int(0x02)); /* disk_specify_2 */ + aml_append(fdi, aml_int(0x25)); /* disk_motor_wait */ + aml_append(fdi, aml_int(0x02)); /* disk_sector_siz */ + aml_append(fdi, aml_int(0x12)); /* disk_eot */ + aml_append(fdi, aml_int(0x1B)); /* disk_rw_gap */ + aml_append(fdi, aml_int(0xFF)); /* disk_dtl */ + aml_append(fdi, aml_int(0x6C)); /* disk_formt_gap */ + aml_append(fdi, aml_int(0xF6)); /* disk_fill */ + aml_append(fdi, aml_int(0x0F)); /* disk_head_sttl */ + aml_append(fdi, aml_int(0x08)); /* disk_motor_strt */ + + aml_append(dev, aml_name_decl("_FDI", fdi)); + return dev; +} + +int cmos_get_fd_drive_type(FloppyDriveType fd0) +{ + int val; + + switch (fd0) { + case FLOPPY_DRIVE_TYPE_144: + /* 1.44 Mb 3"5 drive */ + val = 4; + break; + case FLOPPY_DRIVE_TYPE_288: + /* 2.88 Mb 3"5 drive */ + val = 5; + break; + case FLOPPY_DRIVE_TYPE_120: + /* 1.2 Mb 5"5 drive */ + val = 2; + break; + case FLOPPY_DRIVE_TYPE_NONE: + default: + val = 0; + break; + } + return val; +} + +static void fdc_isa_build_aml(ISADevice *isadev, Aml *scope) +{ + Aml *dev; + Aml *crs; + int i; + +#define ACPI_FDE_MAX_FD 4 + uint32_t fde_buf[5] = { + 0, 0, 0, 0, /* presence of floppy drives #0 - #3 */ + cpu_to_le32(2) /* tape presence (2 == never present) */ + }; + + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04)); + aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01)); + aml_append(crs, aml_irq_no_flags(6)); + aml_append(crs, + aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2)); + + dev = aml_device("FDC0"); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700"))); + aml_append(dev, aml_name_decl("_CRS", crs)); + + for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) { + FloppyDriveType type = isa_fdc_get_drive_type(isadev, i); + + if (type < FLOPPY_DRIVE_TYPE_NONE) { + fde_buf[i] = cpu_to_le32(1); /* drive present */ + aml_append(dev, build_fdinfo_aml(i, type)); + } + } + aml_append(dev, aml_name_decl("_FDE", + aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf))); + + aml_append(scope, dev); +} + +static const VMStateDescription vmstate_isa_fdc = { + .name = "fdc", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(state, FDCtrlISABus, 0, vmstate_fdc, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + +static Property isa_fdc_properties[] = { + DEFINE_PROP_UINT32("iobase", FDCtrlISABus, iobase, 0x3f0), + DEFINE_PROP_UINT32("irq", FDCtrlISABus, irq, 6), + DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2), + DEFINE_PROP_SIGNED("fdtypeA", FDCtrlISABus, state.qdev_for_drives[0].type, + FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_SIGNED("fdtypeB", FDCtrlISABus, state.qdev_for_drives[1].type, + FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback, + FLOPPY_DRIVE_TYPE_288, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_END_OF_LIST(), +}; + +static void isabus_fdc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); + + dc->desc = "virtual floppy controller"; + dc->realize = isabus_fdc_realize; + dc->fw_name = "fdc"; + dc->reset = fdctrl_external_reset_isa; + dc->vmsd = &vmstate_isa_fdc; + isa->build_aml = fdc_isa_build_aml; + device_class_set_props(dc, isa_fdc_properties); + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static void isabus_fdc_instance_init(Object *obj) +{ + FDCtrlISABus *isa = ISA_FDC(obj); + + device_add_bootindex_property(obj, &isa->bootindexA, + "bootindexA", "/floppy@0", + DEVICE(obj)); + device_add_bootindex_property(obj, &isa->bootindexB, + "bootindexB", "/floppy@1", + DEVICE(obj)); +} + +static const TypeInfo isa_fdc_info = { + .name = TYPE_ISA_FDC, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(FDCtrlISABus), + .class_init = isabus_fdc_class_init, + .instance_init = isabus_fdc_instance_init, +}; + +static void isa_fdc_register_types(void) +{ + type_register_static(&isa_fdc_info); +} + +type_init(isa_fdc_register_types) diff --git a/hw/block/fdc-sysbus.c b/hw/block/fdc-sysbus.c new file mode 100644 index 00000000000..57fc8773f12 --- /dev/null +++ b/hw/block/fdc-sysbus.c @@ -0,0 +1,251 @@ +/* + * QEMU Floppy disk emulator (Intel 82078) + * + * Copyright (c) 2003, 2007 Jocelyn Mayer + * Copyright (c) 2008 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qom/object.h" +#include "hw/sysbus.h" +#include "hw/block/fdc.h" +#include "migration/vmstate.h" +#include "fdc-internal.h" +#include "trace.h" + +#define TYPE_SYSBUS_FDC "base-sysbus-fdc" +typedef struct FDCtrlSysBusClass FDCtrlSysBusClass; +typedef struct FDCtrlSysBus FDCtrlSysBus; +DECLARE_OBJ_CHECKERS(FDCtrlSysBus, FDCtrlSysBusClass, + SYSBUS_FDC, TYPE_SYSBUS_FDC) + +struct FDCtrlSysBusClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ + + bool use_strict_io; +}; + +struct FDCtrlSysBus { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + struct FDCtrl state; +}; + +static uint64_t fdctrl_read_mem(void *opaque, hwaddr reg, unsigned ize) +{ + return fdctrl_read(opaque, (uint32_t)reg); +} + +static void fdctrl_write_mem(void *opaque, hwaddr reg, + uint64_t value, unsigned size) +{ + fdctrl_write(opaque, (uint32_t)reg, value); +} + +static const MemoryRegionOps fdctrl_mem_ops = { + .read = fdctrl_read_mem, + .write = fdctrl_write_mem, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const MemoryRegionOps fdctrl_mem_strict_ops = { + .read = fdctrl_read_mem, + .write = fdctrl_write_mem, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static void fdctrl_external_reset_sysbus(DeviceState *d) +{ + FDCtrlSysBus *sys = SYSBUS_FDC(d); + FDCtrl *s = &sys->state; + + fdctrl_reset(s, 0); +} + +static void fdctrl_handle_tc(void *opaque, int irq, int level) +{ + trace_fdctrl_tc_pulse(level); +} + +void fdctrl_init_sysbus(qemu_irq irq, int dma_chann, + hwaddr mmio_base, DriveInfo **fds) +{ + FDCtrl *fdctrl; + DeviceState *dev; + SysBusDevice *sbd; + FDCtrlSysBus *sys; + + dev = qdev_new("sysbus-fdc"); + sys = SYSBUS_FDC(dev); + fdctrl = &sys->state; + fdctrl->dma_chann = dma_chann; /* FIXME */ + sbd = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(sbd, &error_fatal); + sysbus_connect_irq(sbd, 0, irq); + sysbus_mmio_map(sbd, 0, mmio_base); + + fdctrl_init_drives(&sys->state.bus, fds); +} + +void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base, + DriveInfo **fds, qemu_irq *fdc_tc) +{ + DeviceState *dev; + FDCtrlSysBus *sys; + + dev = qdev_new("sun-fdtwo"); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sys = SYSBUS_FDC(dev); + sysbus_connect_irq(SYS_BUS_DEVICE(sys), 0, irq); + sysbus_mmio_map(SYS_BUS_DEVICE(sys), 0, io_base); + *fdc_tc = qdev_get_gpio_in(dev, 0); + + fdctrl_init_drives(&sys->state.bus, fds); +} + +static void sysbus_fdc_common_instance_init(Object *obj) +{ + DeviceState *dev = DEVICE(obj); + FDCtrlSysBusClass *sbdc = SYSBUS_FDC_GET_CLASS(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + FDCtrlSysBus *sys = SYSBUS_FDC(obj); + FDCtrl *fdctrl = &sys->state; + + qdev_set_legacy_instance_id(dev, 0 /* io */, 2); /* FIXME */ + + memory_region_init_io(&fdctrl->iomem, obj, + sbdc->use_strict_io ? &fdctrl_mem_strict_ops + : &fdctrl_mem_ops, + fdctrl, "fdc", 0x08); + sysbus_init_mmio(sbd, &fdctrl->iomem); + + sysbus_init_irq(sbd, &fdctrl->irq); + qdev_init_gpio_in(dev, fdctrl_handle_tc, 1); +} + +static void sysbus_fdc_realize(DeviceState *dev, Error **errp) +{ + FDCtrlSysBus *sys = SYSBUS_FDC(dev); + FDCtrl *fdctrl = &sys->state; + + fdctrl_realize_common(dev, fdctrl, errp); +} + +static const VMStateDescription vmstate_sysbus_fdc = { + .name = "fdc", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(state, FDCtrlSysBus, 0, vmstate_fdc, FDCtrl), + VMSTATE_END_OF_LIST() + } +}; + +static void sysbus_fdc_common_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = sysbus_fdc_realize; + dc->reset = fdctrl_external_reset_sysbus; + dc->vmsd = &vmstate_sysbus_fdc; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo sysbus_fdc_common_typeinfo = { + .name = TYPE_SYSBUS_FDC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(FDCtrlSysBus), + .instance_init = sysbus_fdc_common_instance_init, + .abstract = true, + .class_init = sysbus_fdc_common_class_init, + .class_size = sizeof(FDCtrlSysBusClass), +}; + +static Property sysbus_fdc_properties[] = { + DEFINE_PROP_SIGNED("fdtypeA", FDCtrlSysBus, state.qdev_for_drives[0].type, + FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_SIGNED("fdtypeB", FDCtrlSysBus, state.qdev_for_drives[1].type, + FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback, + FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sysbus_fdc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "virtual floppy controller"; + device_class_set_props(dc, sysbus_fdc_properties); +} + +static const TypeInfo sysbus_fdc_typeinfo = { + .name = "sysbus-fdc", + .parent = TYPE_SYSBUS_FDC, + .class_init = sysbus_fdc_class_init, +}; + +static Property sun4m_fdc_properties[] = { + DEFINE_PROP_SIGNED("fdtype", FDCtrlSysBus, state.qdev_for_drives[0].type, + FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback, + FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type, + FloppyDriveType), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sun4m_fdc_class_init(ObjectClass *klass, void *data) +{ + FDCtrlSysBusClass *sbdc = SYSBUS_FDC_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + sbdc->use_strict_io = true; + dc->desc = "virtual floppy controller"; + device_class_set_props(dc, sun4m_fdc_properties); +} + +static const TypeInfo sun4m_fdc_typeinfo = { + .name = "sun-fdtwo", + .parent = TYPE_SYSBUS_FDC, + .class_init = sun4m_fdc_class_init, +}; + +static void sysbus_fdc_register_types(void) +{ + type_register_static(&sysbus_fdc_common_typeinfo); + type_register_static(&sysbus_fdc_typeinfo); + type_register_static(&sun4m_fdc_typeinfo); +} + +type_init(sysbus_fdc_register_types) diff --git a/hw/block/fdc.c b/hw/block/fdc.c index a825c2acbae..21d18ac2e36 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -32,12 +32,10 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/timer.h" -#include "hw/acpi/aml-build.h" #include "hw/irq.h" #include "hw/isa/isa.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" -#include "hw/sysbus.h" #include "migration/vmstate.h" #include "hw/block/block.h" #include "sysemu/block-backend.h" @@ -48,6 +46,7 @@ #include "qemu/module.h" #include "trace.h" #include "qom/object.h" +#include "fdc-internal.h" /********************************************************/ /* debug Floppy devices */ @@ -62,21 +61,20 @@ } while (0) +/* Anonymous BlockBackend for empty drive */ +static BlockBackend *blk_create_empty_drive(void) +{ + return blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); +} + /********************************************************/ /* qdev floppy bus */ #define TYPE_FLOPPY_BUS "floppy-bus" OBJECT_DECLARE_SIMPLE_TYPE(FloppyBus, FLOPPY_BUS) -typedef struct FDCtrl FDCtrl; -typedef struct FDrive FDrive; static FDrive *get_drv(FDCtrl *fdctrl, int unit); -struct FloppyBus { - BusState bus; - FDCtrl *fdc; -}; - static const TypeInfo floppy_bus_info = { .name = TYPE_FLOPPY_BUS, .parent = TYPE_BUS, @@ -85,7 +83,7 @@ static const TypeInfo floppy_bus_info = { static void floppy_bus_create(FDCtrl *fdc, FloppyBus *bus, DeviceState *dev) { - qbus_create_inplace(bus, sizeof(FloppyBus), TYPE_FLOPPY_BUS, dev, NULL); + qbus_init(bus, sizeof(FloppyBus), TYPE_FLOPPY_BUS, dev, NULL); bus->fdc = fdc; } @@ -93,32 +91,11 @@ static void floppy_bus_create(FDCtrl *fdc, FloppyBus *bus, DeviceState *dev) /********************************************************/ /* Floppy drive emulation */ -typedef enum FDriveRate { - FDRIVE_RATE_500K = 0x00, /* 500 Kbps */ - FDRIVE_RATE_300K = 0x01, /* 300 Kbps */ - FDRIVE_RATE_250K = 0x02, /* 250 Kbps */ - FDRIVE_RATE_1M = 0x03, /* 1 Mbps */ -} FDriveRate; - -typedef enum FDriveSize { - FDRIVE_SIZE_UNKNOWN, - FDRIVE_SIZE_350, - FDRIVE_SIZE_525, -} FDriveSize; - -typedef struct FDFormat { - FloppyDriveType drive; - uint8_t last_sect; - uint8_t max_track; - uint8_t max_head; - FDriveRate rate; -} FDFormat; - /* In many cases, the total sector size of a format is enough to uniquely * identify it. However, there are some total sector collisions between * formats of different physical size, and these are noted below by * highlighting the total sector size for entries with collisions. */ -static const FDFormat fd_formats[] = { +const FDFormat fd_formats[] = { /* First entry is default format */ /* 1.44 MB 3"1/2 floppy disks */ { FLOPPY_DRIVE_TYPE_144, 18, 80, 1, FDRIVE_RATE_500K, }, /* 3.5" 2880 */ @@ -186,35 +163,6 @@ static FDriveSize drive_size(FloppyDriveType drive) #define FD_SECTOR_SC 2 /* Sector size code */ #define FD_RESET_SENSEI_COUNT 4 /* Number of sense interrupts on RESET */ -/* Floppy disk drive emulation */ -typedef enum FDiskFlags { - FDISK_DBL_SIDES = 0x01, -} FDiskFlags; - -struct FDrive { - FDCtrl *fdctrl; - BlockBackend *blk; - BlockConf *conf; - /* Drive status */ - FloppyDriveType drive; /* CMOS drive type */ - uint8_t perpendicular; /* 2.88 MB access mode */ - /* Position */ - uint8_t head; - uint8_t track; - uint8_t sect; - /* Media */ - FloppyDriveType disk; /* Current disk type */ - FDiskFlags flags; - uint8_t last_sect; /* Nb sector per track */ - uint8_t max_track; /* Nb of tracks */ - uint16_t bps; /* Bytes per sector */ - uint8_t ro; /* Is read-only */ - uint8_t media_changed; /* Is media changed */ - uint8_t media_rate; /* Data rate of medium */ - - bool media_validated; /* Have we validated the media? */ -}; - static FloppyDriveType get_fallback_drive_type(FDrive *drv); @@ -544,8 +492,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp) } if (!dev->conf.blk) { - /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + dev->conf.blk = blk_create_empty_drive(); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); @@ -626,10 +573,7 @@ static const TypeInfo floppy_drive_info = { /********************************************************/ /* Intel 82078 floppy disk controller emulation */ -static void fdctrl_reset(FDCtrl *fdctrl, int do_irq); static void fdctrl_to_command_phase(FDCtrl *fdctrl); -static int fdctrl_transfer_handler (void *opaque, int nchan, - int dma_pos, int dma_len); static void fdctrl_raise_irq(FDCtrl *fdctrl); static FDrive *get_cur_drv(FDCtrl *fdctrl); @@ -828,88 +772,12 @@ enum { #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI) #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT) -struct FDCtrl { - MemoryRegion iomem; - qemu_irq irq; - /* Controller state */ - QEMUTimer *result_timer; - int dma_chann; - uint8_t phase; - IsaDma *dma; - /* Controller's identification */ - uint8_t version; - /* HW */ - uint8_t sra; - uint8_t srb; - uint8_t dor; - uint8_t dor_vmstate; /* only used as temp during vmstate */ - uint8_t tdr; - uint8_t dsr; - uint8_t msr; - uint8_t cur_drv; - uint8_t status0; - uint8_t status1; - uint8_t status2; - /* Command FIFO */ - uint8_t *fifo; - int32_t fifo_size; - uint32_t data_pos; - uint32_t data_len; - uint8_t data_state; - uint8_t data_dir; - uint8_t eot; /* last wanted sector */ - /* States kept only to be returned back */ - /* precompensation */ - uint8_t precomp_trk; - uint8_t config; - uint8_t lock; - /* Power down config (also with status regB access mode */ - uint8_t pwrd; - /* Floppy drives */ - FloppyBus bus; - uint8_t num_floppies; - FDrive drives[MAX_FD]; - struct { - FloppyDriveType type; - } qdev_for_drives[MAX_FD]; - int reset_sensei; - FloppyDriveType fallback; /* type=auto failure fallback */ - /* Timers state */ - uint8_t timer0; - uint8_t timer1; - PortioList portio_list; -}; - static FloppyDriveType get_fallback_drive_type(FDrive *drv) { return drv->fdctrl->fallback; } -#define TYPE_SYSBUS_FDC "base-sysbus-fdc" -OBJECT_DECLARE_SIMPLE_TYPE(FDCtrlSysBus, SYSBUS_FDC) - -struct FDCtrlSysBus { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ - - struct FDCtrl state; -}; - -OBJECT_DECLARE_SIMPLE_TYPE(FDCtrlISABus, ISA_FDC) - -struct FDCtrlISABus { - ISADevice parent_obj; - - uint32_t iobase; - uint32_t irq; - uint32_t dma; - struct FDCtrl state; - int32_t bootindexA; - int32_t bootindexB; -}; - -static uint32_t fdctrl_read (void *opaque, uint32_t reg) +uint32_t fdctrl_read(void *opaque, uint32_t reg) { FDCtrl *fdctrl = opaque; uint32_t retval; @@ -946,7 +814,7 @@ static uint32_t fdctrl_read (void *opaque, uint32_t reg) return retval; } -static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value) +void fdctrl_write(void *opaque, uint32_t reg, uint32_t value) { FDCtrl *fdctrl = opaque; @@ -973,34 +841,6 @@ static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value) } } -static uint64_t fdctrl_read_mem (void *opaque, hwaddr reg, - unsigned ize) -{ - return fdctrl_read(opaque, (uint32_t)reg); -} - -static void fdctrl_write_mem (void *opaque, hwaddr reg, - uint64_t value, unsigned size) -{ - fdctrl_write(opaque, (uint32_t)reg, value); -} - -static const MemoryRegionOps fdctrl_mem_ops = { - .read = fdctrl_read_mem, - .write = fdctrl_write_mem, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static const MemoryRegionOps fdctrl_mem_strict_ops = { - .read = fdctrl_read_mem, - .write = fdctrl_write_mem, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 1, - }, -}; - static bool fdrive_media_changed_needed(void *opaque) { FDrive *drive = opaque; @@ -1178,7 +1018,7 @@ static const VMStateDescription vmstate_fdc_phase = { } }; -static const VMStateDescription vmstate_fdc = { +const VMStateDescription vmstate_fdc = { .name = "fdc", .version_id = 2, .minimum_version_id = 2, @@ -1224,32 +1064,6 @@ static const VMStateDescription vmstate_fdc = { } }; -static void fdctrl_external_reset_sysbus(DeviceState *d) -{ - FDCtrlSysBus *sys = SYSBUS_FDC(d); - FDCtrl *s = &sys->state; - - fdctrl_reset(s, 0); -} - -static void fdctrl_external_reset_isa(DeviceState *d) -{ - FDCtrlISABus *isa = ISA_FDC(d); - FDCtrl *s = &isa->state; - - fdctrl_reset(s, 0); -} - -static void fdctrl_handle_tc(void *opaque, int irq, int level) -{ - //FDCtrl *s = opaque; - - if (level) { - // XXX - FLOPPY_DPRINTF("TC pulsed\n"); - } -} - /* Change IRQ state */ static void fdctrl_reset_irq(FDCtrl *fdctrl) { @@ -1273,7 +1087,7 @@ static void fdctrl_raise_irq(FDCtrl *fdctrl) } /* Reset controller */ -static void fdctrl_reset(FDCtrl *fdctrl, int do_irq) +void fdctrl_reset(FDCtrl *fdctrl, int do_irq) { int i; @@ -1352,7 +1166,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit) static FDrive *get_cur_drv(FDCtrl *fdctrl) { - return get_drv(fdctrl, fdctrl->cur_drv); + FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv); + + if (!cur_drv->blk) { + /* + * Kludge: empty drive line selected. Create an anonymous + * BlockBackend to avoid NULL deref with various BlockBackend + * API calls within this model (CVE-2021-20196). + * Due to the controller QOM model limitations, we don't + * attach the created to the controller device. + */ + cur_drv->blk = blk_create_empty_drive(); + } + return cur_drv; } /* Status A register : 0x00 (read-only) */ @@ -1752,8 +1578,7 @@ static void fdctrl_start_transfer_del(FDCtrl *fdctrl, int direction) } /* handlers for DMA transfers */ -static int fdctrl_transfer_handler (void *opaque, int nchan, - int dma_pos, int dma_len) +int fdctrl_transfer_handler(void *opaque, int nchan, int dma_pos, int dma_len) { FDCtrl *fdctrl; FDrive *cur_drv; @@ -2489,7 +2314,7 @@ static void fdctrl_result_timer(void *opaque) /* Init functions */ -static void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds) +void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds) { DeviceState *dev; int i; @@ -2506,49 +2331,7 @@ static void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds) } } -void isa_fdc_init_drives(ISADevice *fdc, DriveInfo **fds) -{ - fdctrl_init_drives(&ISA_FDC(fdc)->state.bus, fds); -} - -void fdctrl_init_sysbus(qemu_irq irq, int dma_chann, - hwaddr mmio_base, DriveInfo **fds) -{ - FDCtrl *fdctrl; - DeviceState *dev; - SysBusDevice *sbd; - FDCtrlSysBus *sys; - - dev = qdev_new("sysbus-fdc"); - sys = SYSBUS_FDC(dev); - fdctrl = &sys->state; - fdctrl->dma_chann = dma_chann; /* FIXME */ - sbd = SYS_BUS_DEVICE(dev); - sysbus_realize_and_unref(sbd, &error_fatal); - sysbus_connect_irq(sbd, 0, irq); - sysbus_mmio_map(sbd, 0, mmio_base); - - fdctrl_init_drives(&sys->state.bus, fds); -} - -void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base, - DriveInfo **fds, qemu_irq *fdc_tc) -{ - DeviceState *dev; - FDCtrlSysBus *sys; - - dev = qdev_new("sun-fdtwo"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sys = SYSBUS_FDC(dev); - sysbus_connect_irq(SYS_BUS_DEVICE(sys), 0, irq); - sysbus_mmio_map(SYS_BUS_DEVICE(sys), 0, io_base); - *fdc_tc = qdev_get_gpio_in(dev, 0); - - fdctrl_init_drives(&sys->state.bus, fds); -} - -static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, - Error **errp) +void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) { int i, j; FDrive *drive; @@ -2582,14 +2365,6 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, fdctrl->config = FD_CONFIG_EIS | FD_CONFIG_EFIFO; /* Implicit seek, polling & FIFO enabled */ fdctrl->num_floppies = MAX_FD; - if (fdctrl->dma_chann != -1) { - IsaDmaClass *k; - assert(fdctrl->dma); - k = ISADMA_GET_CLASS(fdctrl->dma); - k->register_channel(fdctrl->dma, fdctrl->dma_chann, - &fdctrl_transfer_handler, fdctrl); - } - floppy_bus_create(fdctrl, &fdctrl->bus, dev); for (i = 0; i < MAX_FD; i++) { @@ -2600,369 +2375,8 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, } } -static const MemoryRegionPortio fdc_portio_list[] = { - { 1, 5, 1, .read = fdctrl_read, .write = fdctrl_write }, - { 7, 1, 1, .read = fdctrl_read, .write = fdctrl_write }, - PORTIO_END_OF_LIST(), -}; - -static void isabus_fdc_realize(DeviceState *dev, Error **errp) -{ - ISADevice *isadev = ISA_DEVICE(dev); - FDCtrlISABus *isa = ISA_FDC(dev); - FDCtrl *fdctrl = &isa->state; - Error *err = NULL; - - isa_register_portio_list(isadev, &fdctrl->portio_list, - isa->iobase, fdc_portio_list, fdctrl, - "fdc"); - - isa_init_irq(isadev, &fdctrl->irq, isa->irq); - fdctrl->dma_chann = isa->dma; - if (fdctrl->dma_chann != -1) { - fdctrl->dma = isa_get_dma(isa_bus_from_device(isadev), isa->dma); - if (!fdctrl->dma) { - error_setg(errp, "ISA controller does not support DMA"); - return; - } - } - - qdev_set_legacy_instance_id(dev, isa->iobase, 2); - fdctrl_realize_common(dev, fdctrl, &err); - if (err != NULL) { - error_propagate(errp, err); - return; - } -} - -static void sysbus_fdc_initfn(Object *obj) -{ - SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - FDCtrlSysBus *sys = SYSBUS_FDC(obj); - FDCtrl *fdctrl = &sys->state; - - fdctrl->dma_chann = -1; - - memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_ops, fdctrl, - "fdc", 0x08); - sysbus_init_mmio(sbd, &fdctrl->iomem); -} - -static void sun4m_fdc_initfn(Object *obj) -{ - SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - FDCtrlSysBus *sys = SYSBUS_FDC(obj); - FDCtrl *fdctrl = &sys->state; - - fdctrl->dma_chann = -1; - - memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_strict_ops, - fdctrl, "fdctrl", 0x08); - sysbus_init_mmio(sbd, &fdctrl->iomem); -} - -static void sysbus_fdc_common_initfn(Object *obj) -{ - DeviceState *dev = DEVICE(obj); - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - FDCtrlSysBus *sys = SYSBUS_FDC(obj); - FDCtrl *fdctrl = &sys->state; - - qdev_set_legacy_instance_id(dev, 0 /* io */, 2); /* FIXME */ - - sysbus_init_irq(sbd, &fdctrl->irq); - qdev_init_gpio_in(dev, fdctrl_handle_tc, 1); -} - -static void sysbus_fdc_common_realize(DeviceState *dev, Error **errp) -{ - FDCtrlSysBus *sys = SYSBUS_FDC(dev); - FDCtrl *fdctrl = &sys->state; - - fdctrl_realize_common(dev, fdctrl, errp); -} - -FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i) -{ - FDCtrlISABus *isa = ISA_FDC(fdc); - - return isa->state.drives[i].drive; -} - -static void isa_fdc_get_drive_max_chs(FloppyDriveType type, uint8_t *maxc, - uint8_t *maxh, uint8_t *maxs) -{ - const FDFormat *fdf; - - *maxc = *maxh = *maxs = 0; - for (fdf = fd_formats; fdf->drive != FLOPPY_DRIVE_TYPE_NONE; fdf++) { - if (fdf->drive != type) { - continue; - } - if (*maxc < fdf->max_track) { - *maxc = fdf->max_track; - } - if (*maxh < fdf->max_head) { - *maxh = fdf->max_head; - } - if (*maxs < fdf->last_sect) { - *maxs = fdf->last_sect; - } - } - (*maxc)--; -} - -static Aml *build_fdinfo_aml(int idx, FloppyDriveType type) -{ - Aml *dev, *fdi; - uint8_t maxc, maxh, maxs; - - isa_fdc_get_drive_max_chs(type, &maxc, &maxh, &maxs); - - dev = aml_device("FLP%c", 'A' + idx); - - aml_append(dev, aml_name_decl("_ADR", aml_int(idx))); - - fdi = aml_package(16); - aml_append(fdi, aml_int(idx)); /* Drive Number */ - aml_append(fdi, - aml_int(cmos_get_fd_drive_type(type))); /* Device Type */ - /* - * the values below are the limits of the drive, and are thus independent - * of the inserted media - */ - aml_append(fdi, aml_int(maxc)); /* Maximum Cylinder Number */ - aml_append(fdi, aml_int(maxs)); /* Maximum Sector Number */ - aml_append(fdi, aml_int(maxh)); /* Maximum Head Number */ - /* - * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of - * the drive type, so shall we - */ - aml_append(fdi, aml_int(0xAF)); /* disk_specify_1 */ - aml_append(fdi, aml_int(0x02)); /* disk_specify_2 */ - aml_append(fdi, aml_int(0x25)); /* disk_motor_wait */ - aml_append(fdi, aml_int(0x02)); /* disk_sector_siz */ - aml_append(fdi, aml_int(0x12)); /* disk_eot */ - aml_append(fdi, aml_int(0x1B)); /* disk_rw_gap */ - aml_append(fdi, aml_int(0xFF)); /* disk_dtl */ - aml_append(fdi, aml_int(0x6C)); /* disk_formt_gap */ - aml_append(fdi, aml_int(0xF6)); /* disk_fill */ - aml_append(fdi, aml_int(0x0F)); /* disk_head_sttl */ - aml_append(fdi, aml_int(0x08)); /* disk_motor_strt */ - - aml_append(dev, aml_name_decl("_FDI", fdi)); - return dev; -} - -int cmos_get_fd_drive_type(FloppyDriveType fd0) -{ - int val; - - switch (fd0) { - case FLOPPY_DRIVE_TYPE_144: - /* 1.44 Mb 3"5 drive */ - val = 4; - break; - case FLOPPY_DRIVE_TYPE_288: - /* 2.88 Mb 3"5 drive */ - val = 5; - break; - case FLOPPY_DRIVE_TYPE_120: - /* 1.2 Mb 5"5 drive */ - val = 2; - break; - case FLOPPY_DRIVE_TYPE_NONE: - default: - val = 0; - break; - } - return val; -} - -static void fdc_isa_build_aml(ISADevice *isadev, Aml *scope) -{ - Aml *dev; - Aml *crs; - int i; - -#define ACPI_FDE_MAX_FD 4 - uint32_t fde_buf[5] = { - 0, 0, 0, 0, /* presence of floppy drives #0 - #3 */ - cpu_to_le32(2) /* tape presence (2 == never present) */ - }; - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04)); - aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01)); - aml_append(crs, aml_irq_no_flags(6)); - aml_append(crs, - aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2)); - - dev = aml_device("FDC0"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700"))); - aml_append(dev, aml_name_decl("_CRS", crs)); - - for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) { - FloppyDriveType type = isa_fdc_get_drive_type(isadev, i); - - if (type < FLOPPY_DRIVE_TYPE_NONE) { - fde_buf[i] = cpu_to_le32(1); /* drive present */ - aml_append(dev, build_fdinfo_aml(i, type)); - } - } - aml_append(dev, aml_name_decl("_FDE", - aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf))); - - aml_append(scope, dev); -} - -static const VMStateDescription vmstate_isa_fdc ={ - .name = "fdc", - .version_id = 2, - .minimum_version_id = 2, - .fields = (VMStateField[]) { - VMSTATE_STRUCT(state, FDCtrlISABus, 0, vmstate_fdc, FDCtrl), - VMSTATE_END_OF_LIST() - } -}; - -static Property isa_fdc_properties[] = { - DEFINE_PROP_UINT32("iobase", FDCtrlISABus, iobase, 0x3f0), - DEFINE_PROP_UINT32("irq", FDCtrlISABus, irq, 6), - DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2), - DEFINE_PROP_SIGNED("fdtypeA", FDCtrlISABus, state.qdev_for_drives[0].type, - FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_SIGNED("fdtypeB", FDCtrlISABus, state.qdev_for_drives[1].type, - FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback, - FLOPPY_DRIVE_TYPE_288, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_END_OF_LIST(), -}; - -static void isabus_fdc_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); - - dc->realize = isabus_fdc_realize; - dc->fw_name = "fdc"; - dc->reset = fdctrl_external_reset_isa; - dc->vmsd = &vmstate_isa_fdc; - isa->build_aml = fdc_isa_build_aml; - device_class_set_props(dc, isa_fdc_properties); - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -} - -static void isabus_fdc_instance_init(Object *obj) -{ - FDCtrlISABus *isa = ISA_FDC(obj); - - device_add_bootindex_property(obj, &isa->bootindexA, - "bootindexA", "/floppy@0", - DEVICE(obj)); - device_add_bootindex_property(obj, &isa->bootindexB, - "bootindexB", "/floppy@1", - DEVICE(obj)); -} - -static const TypeInfo isa_fdc_info = { - .name = TYPE_ISA_FDC, - .parent = TYPE_ISA_DEVICE, - .instance_size = sizeof(FDCtrlISABus), - .class_init = isabus_fdc_class_init, - .instance_init = isabus_fdc_instance_init, -}; - -static const VMStateDescription vmstate_sysbus_fdc ={ - .name = "fdc", - .version_id = 2, - .minimum_version_id = 2, - .fields = (VMStateField[]) { - VMSTATE_STRUCT(state, FDCtrlSysBus, 0, vmstate_fdc, FDCtrl), - VMSTATE_END_OF_LIST() - } -}; - -static Property sysbus_fdc_properties[] = { - DEFINE_PROP_SIGNED("fdtypeA", FDCtrlSysBus, state.qdev_for_drives[0].type, - FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_SIGNED("fdtypeB", FDCtrlSysBus, state.qdev_for_drives[1].type, - FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback, - FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_END_OF_LIST(), -}; - -static void sysbus_fdc_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - device_class_set_props(dc, sysbus_fdc_properties); - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -} - -static const TypeInfo sysbus_fdc_info = { - .name = "sysbus-fdc", - .parent = TYPE_SYSBUS_FDC, - .instance_init = sysbus_fdc_initfn, - .class_init = sysbus_fdc_class_init, -}; - -static Property sun4m_fdc_properties[] = { - DEFINE_PROP_SIGNED("fdtype", FDCtrlSysBus, state.qdev_for_drives[0].type, - FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback, - FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type, - FloppyDriveType), - DEFINE_PROP_END_OF_LIST(), -}; - -static void sun4m_fdc_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - device_class_set_props(dc, sun4m_fdc_properties); - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -} - -static const TypeInfo sun4m_fdc_info = { - .name = "sun-fdtwo", - .parent = TYPE_SYSBUS_FDC, - .instance_init = sun4m_fdc_initfn, - .class_init = sun4m_fdc_class_init, -}; - -static void sysbus_fdc_common_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = sysbus_fdc_common_realize; - dc->reset = fdctrl_external_reset_sysbus; - dc->vmsd = &vmstate_sysbus_fdc; -} - -static const TypeInfo sysbus_fdc_type_info = { - .name = TYPE_SYSBUS_FDC, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(FDCtrlSysBus), - .instance_init = sysbus_fdc_common_initfn, - .abstract = true, - .class_init = sysbus_fdc_common_class_init, -}; - static void fdc_register_types(void) { - type_register_static(&isa_fdc_info); - type_register_static(&sysbus_fdc_type_info); - type_register_static(&sysbus_fdc_info); - type_register_static(&sun4m_fdc_info); type_register_static(&floppy_bus_info); type_register_static(&floppy_drive_info); } diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 183d3f44c25..b77503dc845 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -259,6 +259,8 @@ static const FlashPartInfo known_devices[] = { { INFO_STACKED("n25q00a", 0x20bb21, 0x1000, 64 << 10, 2048, ER_4K, 4) }, { INFO_STACKED("mt25ql01g", 0x20ba21, 0x1040, 64 << 10, 2048, ER_4K, 2) }, { INFO_STACKED("mt25qu01g", 0x20bb21, 0x1040, 64 << 10, 2048, ER_4K, 2) }, + { INFO_STACKED("mt25ql02g", 0x20ba22, 0x1040, 64 << 10, 4096, ER_4K | ER_32K, 2) }, + { INFO_STACKED("mt25qu02g", 0x20bb22, 0x1040, 64 << 10, 4096, ER_4K | ER_32K, 2) }, /* Spansion -- single (large) sector size only, at least * for the chips listed here (without boot sectors). diff --git a/hw/block/meson.build b/hw/block/meson.build index 5b4a7699f98..2389326112a 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -5,6 +5,8 @@ softmmu_ss.add(files( )) softmmu_ss.add(when: 'CONFIG_ECC', if_true: files('ecc.c')) softmmu_ss.add(when: 'CONFIG_FDC', if_true: files('fdc.c')) +softmmu_ss.add(when: 'CONFIG_FDC_ISA', if_true: files('fdc-isa.c')) +softmmu_ss.add(when: 'CONFIG_FDC_SYSBUS', if_true: files('fdc-sysbus.c')) softmmu_ss.add(when: 'CONFIG_NAND', if_true: files('nand.c')) softmmu_ss.add(when: 'CONFIG_ONENAND', if_true: files('onenand.c')) softmmu_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c')) @@ -13,7 +15,6 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c', 'nvme-dif.c')) specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c')) diff --git a/hw/block/nvme-dif.c b/hw/block/nvme-dif.c deleted file mode 100644 index 81b0a4cb138..00000000000 --- a/hw/block/nvme-dif.c +++ /dev/null @@ -1,518 +0,0 @@ -/* - * QEMU NVM Express End-to-End Data Protection support - * - * Copyright (c) 2021 Samsung Electronics Co., Ltd. - * - * Authors: - * Klaus Jensen - * Gollu Appalanaidu - */ - -#include "qemu/osdep.h" -#include "hw/block/block.h" -#include "sysemu/dma.h" -#include "sysemu/block-backend.h" -#include "qapi/error.h" -#include "trace.h" -#include "nvme.h" -#include "nvme-dif.h" - -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, - uint32_t reftag) -{ - if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && - (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { - return NVME_INVALID_PROT_INFO | NVME_DNR; - } - - return NVME_SUCCESS; -} - -/* from Linux kernel (crypto/crct10dif_common.c) */ -static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, - size_t len) -{ - unsigned int i; - - for (i = 0; i < len; i++) { - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; - } - - return crc; -} - -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t reftag) -{ - uint8_t *end = buf + len; - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - int16_t pil = 0; - - if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); - } - - trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag, - reftag); - - for (; buf < end; buf += lsize, mbuf += msize) { - NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - uint16_t crc = crc_t10dif(0x0, buf, lsize); - - if (pil) { - crc = crc_t10dif(crc, mbuf, pil); - } - - dif->guard = cpu_to_be16(crc); - dif->apptag = cpu_to_be16(apptag); - dif->reftag = cpu_to_be32(reftag); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { - reftag++; - } - } -} - -static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, - uint8_t *buf, uint8_t *mbuf, size_t pil, - uint16_t ctrl, uint16_t apptag, - uint16_t appmask, uint32_t reftag) -{ - switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - case NVME_ID_NS_DPS_TYPE_3: - if (be32_to_cpu(dif->reftag) != 0xffffffff) { - break; - } - - /* fallthrough */ - case NVME_ID_NS_DPS_TYPE_1: - case NVME_ID_NS_DPS_TYPE_2: - if (be16_to_cpu(dif->apptag) != 0xffff) { - break; - } - - trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag), - be32_to_cpu(dif->reftag)); - - return NVME_SUCCESS; - } - - if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) { - uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns)); - - if (pil) { - crc = crc_t10dif(crc, mbuf, pil); - } - - trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc); - - if (be16_to_cpu(dif->guard) != crc) { - return NVME_E2E_GUARD_ERROR; - } - } - - if (ctrl & NVME_RW_PRINFO_PRCHK_APP) { - trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, - appmask); - - if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { - return NVME_E2E_APP_ERROR; - } - } - - if (ctrl & NVME_RW_PRINFO_PRCHK_REF) { - trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); - - if (be32_to_cpu(dif->reftag) != reftag) { - return NVME_E2E_REF_ERROR; - } - } - - return NVME_SUCCESS; -} - -uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t ctrl, - uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t reftag) -{ - uint8_t *end = buf + len; - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - int16_t pil = 0; - uint16_t status; - - status = nvme_check_prinfo(ns, ctrl, slba, reftag); - if (status) { - return status; - } - - if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); - } - - trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil); - - for (; buf < end; buf += lsize, mbuf += msize) { - NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - - status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag, - appmask, reftag); - if (status) { - return status; - } - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { - reftag++; - } - } - - return NVME_SUCCESS; -} - -uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, - uint64_t slba) -{ - BlockBackend *blk = ns->blkconf.blk; - BlockDriverState *bs = blk_bs(blk); - - size_t msize = nvme_msize(ns); - size_t lsize = nvme_lsize(ns); - int64_t moffset = 0, offset = nvme_l2b(ns, slba); - uint8_t *mbufp, *end; - bool zeroed; - int16_t pil = 0; - int64_t bytes = (mlen / msize) * lsize; - int64_t pnum = 0; - - Error *err = NULL; - - - if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); - } - - do { - int ret; - - bytes -= pnum; - - ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); - if (ret < 0) { - error_setg_errno(&err, -ret, "unable to get block status"); - error_report_err(err); - - return NVME_INTERNAL_DEV_ERROR; - } - - zeroed = !!(ret & BDRV_BLOCK_ZERO); - - trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed); - - if (zeroed) { - mbufp = mbuf + moffset; - mlen = (pnum / lsize) * msize; - end = mbufp + mlen; - - for (; mbufp < end; mbufp += msize) { - memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); - } - } - - moffset += (pnum / lsize) * msize; - offset += pnum; - } while (pnum != bytes); - - return NVME_SUCCESS; -} - -static void nvme_dif_rw_cb(void *opaque, int ret) -{ - NvmeBounceContext *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - - trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk)); - - qemu_iovec_destroy(&ctx->data.iov); - g_free(ctx->data.bounce); - - qemu_iovec_destroy(&ctx->mdata.iov); - g_free(ctx->mdata.bounce); - - g_free(ctx); - - nvme_rw_complete_cb(req, ret); -} - -static void nvme_dif_rw_check_cb(void *opaque, int ret) -{ - NvmeBounceContext *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeCtrl *n = nvme_ctrl(req); - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint64_t slba = le64_to_cpu(rw->slba); - uint16_t ctrl = le16_to_cpu(rw->control); - uint16_t apptag = le16_to_cpu(rw->apptag); - uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); - uint16_t status; - - trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, - appmask, reftag); - - if (ret) { - goto out; - } - - status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size, - slba); - if (status) { - req->status = status; - goto out; - } - - status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, - slba, apptag, appmask, reftag); - if (status) { - req->status = status; - goto out; - } - - status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, - NVME_TX_DIRECTION_FROM_DEVICE, req); - if (status) { - req->status = status; - goto out; - } - - if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) { - goto out; - } - - status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, - NVME_TX_DIRECTION_FROM_DEVICE, req); - if (status) { - req->status = status; - } - -out: - nvme_dif_rw_cb(ctx, ret); -} - -static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret) -{ - NvmeBounceContext *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); - BlockBackend *blk = ns->blkconf.blk; - - trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk)); - - if (ret) { - goto out; - } - - ctx->mdata.bounce = g_malloc(mlen); - - qemu_iovec_reset(&ctx->mdata.iov); - qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - - req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, - nvme_dif_rw_check_cb, ctx); - return; - -out: - nvme_dif_rw_cb(ctx, ret); -} - -static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret) -{ - NvmeBounceContext *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint64_t slba = le64_to_cpu(rw->slba); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); - BlockBackend *blk = ns->blkconf.blk; - - trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk)); - - if (ret) { - goto out; - } - - req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0, - nvme_dif_rw_cb, ctx); - return; - -out: - nvme_dif_rw_cb(ctx, ret); -} - -uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES; - uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - uint64_t slba = le64_to_cpu(rw->slba); - size_t len = nvme_l2b(ns, nlb); - size_t mlen = nvme_m2b(ns, nlb); - size_t mapped_len = len; - int64_t offset = nvme_l2b(ns, slba); - uint16_t ctrl = le16_to_cpu(rw->control); - uint16_t apptag = le16_to_cpu(rw->apptag); - uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); - bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT); - NvmeBounceContext *ctx; - uint16_t status; - - trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl)); - - ctx = g_new0(NvmeBounceContext, 1); - ctx->req = req; - - if (wrz) { - BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP; - - if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) { - status = NVME_INVALID_PROT_INFO | NVME_DNR; - goto err; - } - - if (pract) { - uint8_t *mbuf, *end; - size_t msize = nvme_msize(ns); - int16_t pil = msize - sizeof(NvmeDifTuple); - - status = nvme_check_prinfo(ns, ctrl, slba, reftag); - if (status) { - goto err; - } - - flags = 0; - - ctx->mdata.bounce = g_malloc0(mlen); - - qemu_iovec_init(&ctx->mdata.iov, 1); - qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - - mbuf = ctx->mdata.bounce; - end = mbuf + mlen; - - if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) { - pil = 0; - } - - for (; mbuf < end; mbuf += msize) { - NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - - dif->apptag = cpu_to_be16(apptag); - dif->reftag = cpu_to_be32(reftag); - - switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - case NVME_ID_NS_DPS_TYPE_1: - case NVME_ID_NS_DPS_TYPE_2: - reftag++; - } - } - } - - req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags, - nvme_dif_rw_mdata_out_cb, ctx); - return NVME_NO_COMPLETE; - } - - if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) { - mapped_len += mlen; - } - - status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd); - if (status) { - goto err; - } - - ctx->data.bounce = g_malloc(len); - - qemu_iovec_init(&ctx->data.iov, 1); - qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); - - if (req->cmd.opcode == NVME_CMD_READ) { - block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, - BLOCK_ACCT_READ); - - req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, - nvme_dif_rw_mdata_in_cb, ctx); - return NVME_NO_COMPLETE; - } - - status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, - NVME_TX_DIRECTION_TO_DEVICE, req); - if (status) { - goto err; - } - - ctx->mdata.bounce = g_malloc(mlen); - - qemu_iovec_init(&ctx->mdata.iov, 1); - qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - - if (!(pract && nvme_msize(ns) == 8)) { - status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, - NVME_TX_DIRECTION_TO_DEVICE, req); - if (status) { - goto err; - } - } - - status = nvme_check_prinfo(ns, ctrl, slba, reftag); - if (status) { - goto err; - } - - if (pract) { - /* splice generated protection information into the buffer */ - nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, - apptag, reftag); - } else { - status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, - slba, apptag, appmask, reftag); - if (status) { - goto err; - } - } - - block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, - BLOCK_ACCT_WRITE); - - req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0, - nvme_dif_rw_mdata_out_cb, ctx); - - return NVME_NO_COMPLETE; - -err: - qemu_iovec_destroy(&ctx->data.iov); - g_free(ctx->data.bounce); - - qemu_iovec_destroy(&ctx->mdata.iov); - g_free(ctx->mdata.bounce); - - g_free(ctx); - - return status; -} diff --git a/hw/block/nvme-dif.h b/hw/block/nvme-dif.h deleted file mode 100644 index 524faffbd7a..00000000000 --- a/hw/block/nvme-dif.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * QEMU NVM Express End-to-End Data Protection support - * - * Copyright (c) 2021 Samsung Electronics Co., Ltd. - * - * Authors: - * Klaus Jensen - * Gollu Appalanaidu - */ - -#ifndef HW_NVME_DIF_H -#define HW_NVME_DIF_H - -/* from Linux kernel (crypto/crct10dif_common.c) */ -static const uint16_t t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, - uint32_t reftag); -uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, - uint64_t slba); -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t reftag); -uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t ctrl, - uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t reftag); -uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); - -#endif /* HW_NVME_DIF_H */ diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c deleted file mode 100644 index 7bb618f1820..00000000000 --- a/hw/block/nvme-ns.c +++ /dev/null @@ -1,594 +0,0 @@ -/* - * QEMU NVM Express Virtual Namespace - * - * Copyright (c) 2019 CNEX Labs - * Copyright (c) 2020 Samsung Electronics - * - * Authors: - * Klaus Jensen - * - * This work is licensed under the terms of the GNU GPL, version 2. See the - * COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qemu/units.h" -#include "qemu/cutils.h" -#include "qemu/log.h" -#include "qemu/error-report.h" -#include "hw/block/block.h" -#include "hw/pci/pci.h" -#include "sysemu/sysemu.h" -#include "sysemu/block-backend.h" -#include "qapi/error.h" - -#include "hw/qdev-properties.h" -#include "hw/qdev-core.h" - -#include "trace.h" -#include "nvme.h" -#include "nvme-ns.h" - -#define MIN_DISCARD_GRANULARITY (4 * KiB) - -void nvme_ns_init_format(NvmeNamespace *ns) -{ - NvmeIdNs *id_ns = &ns->id_ns; - BlockDriverInfo bdi; - int npdg, nlbas, ret; - - nlbas = nvme_ns_nlbas(ns); - - id_ns->nsze = cpu_to_le64(nlbas); - - /* no thin provisioning */ - id_ns->ncap = id_ns->nsze; - id_ns->nuse = id_ns->ncap; - - ns->mdata_offset = nvme_l2b(ns, nlbas); - - npdg = ns->blkconf.discard_granularity / nvme_lsize(ns); - - ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); - if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { - npdg = bdi.cluster_size / nvme_lsize(ns); - } - - id_ns->npda = id_ns->npdg = npdg - 1; -} - -static int nvme_ns_init(NvmeNamespace *ns, Error **errp) -{ - NvmeIdNs *id_ns = &ns->id_ns; - uint8_t ds; - uint16_t ms; - int i; - - ns->csi = NVME_CSI_NVM; - ns->status = 0x0; - - ns->id_ns.dlfeat = 0x1; - - /* support DULBE and I/O optimization fields */ - id_ns->nsfeat |= (0x4 | 0x10); - - if (ns->params.shared) { - id_ns->nmic |= NVME_NMIC_NS_SHARED; - } - - /* simple copy */ - id_ns->mssrl = cpu_to_le16(ns->params.mssrl); - id_ns->mcl = cpu_to_le32(ns->params.mcl); - id_ns->msrc = ns->params.msrc; - - ds = 31 - clz32(ns->blkconf.logical_block_size); - ms = ns->params.ms; - - if (ns->params.ms) { - id_ns->mc = 0x3; - - if (ns->params.mset) { - id_ns->flbas |= 0x10; - } - - id_ns->dpc = 0x1f; - id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - - NvmeLBAF lbaf[16] = { - [0] = { .ds = 9 }, - [1] = { .ds = 9, .ms = 8 }, - [2] = { .ds = 9, .ms = 16 }, - [3] = { .ds = 9, .ms = 64 }, - [4] = { .ds = 12 }, - [5] = { .ds = 12, .ms = 8 }, - [6] = { .ds = 12, .ms = 16 }, - [7] = { .ds = 12, .ms = 64 }, - }; - - memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); - id_ns->nlbaf = 7; - } else { - NvmeLBAF lbaf[16] = { - [0] = { .ds = 9 }, - [1] = { .ds = 12 }, - }; - - memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); - id_ns->nlbaf = 1; - } - - for (i = 0; i <= id_ns->nlbaf; i++) { - NvmeLBAF *lbaf = &id_ns->lbaf[i]; - if (lbaf->ds == ds) { - if (lbaf->ms == ms) { - id_ns->flbas |= i; - goto lbaf_found; - } - } - } - - /* add non-standard lba format */ - id_ns->nlbaf++; - id_ns->lbaf[id_ns->nlbaf].ds = ds; - id_ns->lbaf[id_ns->nlbaf].ms = ms; - id_ns->flbas |= id_ns->nlbaf; - -lbaf_found: - nvme_ns_init_format(ns); - - return 0; -} - -static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) -{ - bool read_only; - - if (!blkconf_blocksizes(&ns->blkconf, errp)) { - return -1; - } - - read_only = !blk_supports_write_perm(ns->blkconf.blk); - if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) { - return -1; - } - - if (ns->blkconf.discard_granularity == -1) { - ns->blkconf.discard_granularity = - MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY); - } - - ns->size = blk_getlength(ns->blkconf.blk); - if (ns->size < 0) { - error_setg_errno(errp, -ns->size, "could not get blockdev size"); - return -1; - } - - return 0; -} - -static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) -{ - uint64_t zone_size, zone_cap; - uint32_t lbasz = nvme_lsize(ns); - - /* Make sure that the values of ZNS properties are sane */ - if (ns->params.zone_size_bs) { - zone_size = ns->params.zone_size_bs; - } else { - zone_size = NVME_DEFAULT_ZONE_SIZE; - } - if (ns->params.zone_cap_bs) { - zone_cap = ns->params.zone_cap_bs; - } else { - zone_cap = zone_size; - } - if (zone_cap > zone_size) { - error_setg(errp, "zone capacity %"PRIu64"B exceeds " - "zone size %"PRIu64"B", zone_cap, zone_size); - return -1; - } - if (zone_size < lbasz) { - error_setg(errp, "zone size %"PRIu64"B too small, " - "must be at least %"PRIu32"B", zone_size, lbasz); - return -1; - } - if (zone_cap < lbasz) { - error_setg(errp, "zone capacity %"PRIu64"B too small, " - "must be at least %"PRIu32"B", zone_cap, lbasz); - return -1; - } - - /* - * Save the main zone geometry values to avoid - * calculating them later again. - */ - ns->zone_size = zone_size / lbasz; - ns->zone_capacity = zone_cap / lbasz; - ns->num_zones = nvme_ns_nlbas(ns) / ns->zone_size; - - /* Do a few more sanity checks of ZNS properties */ - if (!ns->num_zones) { - error_setg(errp, - "insufficient drive capacity, must be at least the size " - "of one zone (%"PRIu64"B)", zone_size); - return -1; - } - - if (ns->params.max_open_zones > ns->num_zones) { - error_setg(errp, - "max_open_zones value %u exceeds the number of zones %u", - ns->params.max_open_zones, ns->num_zones); - return -1; - } - if (ns->params.max_active_zones > ns->num_zones) { - error_setg(errp, - "max_active_zones value %u exceeds the number of zones %u", - ns->params.max_active_zones, ns->num_zones); - return -1; - } - - if (ns->params.max_active_zones) { - if (ns->params.max_open_zones > ns->params.max_active_zones) { - error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)", - ns->params.max_open_zones, ns->params.max_active_zones); - return -1; - } - - if (!ns->params.max_open_zones) { - ns->params.max_open_zones = ns->params.max_active_zones; - } - } - - if (ns->params.zd_extension_size) { - if (ns->params.zd_extension_size & 0x3f) { - error_setg(errp, - "zone descriptor extension size must be a multiple of 64B"); - return -1; - } - if ((ns->params.zd_extension_size >> 6) > 0xff) { - error_setg(errp, "zone descriptor extension size is too large"); - return -1; - } - } - - return 0; -} - -static void nvme_ns_zoned_init_state(NvmeNamespace *ns) -{ - uint64_t start = 0, zone_size = ns->zone_size; - uint64_t capacity = ns->num_zones * zone_size; - NvmeZone *zone; - int i; - - ns->zone_array = g_new0(NvmeZone, ns->num_zones); - if (ns->params.zd_extension_size) { - ns->zd_extensions = g_malloc0(ns->params.zd_extension_size * - ns->num_zones); - } - - QTAILQ_INIT(&ns->exp_open_zones); - QTAILQ_INIT(&ns->imp_open_zones); - QTAILQ_INIT(&ns->closed_zones); - QTAILQ_INIT(&ns->full_zones); - - zone = ns->zone_array; - for (i = 0; i < ns->num_zones; i++, zone++) { - if (start + zone_size > capacity) { - zone_size = capacity - start; - } - zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE; - nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); - zone->d.za = 0; - zone->d.zcap = ns->zone_capacity; - zone->d.zslba = start; - zone->d.wp = start; - zone->w_ptr = start; - start += zone_size; - } - - ns->zone_size_log2 = 0; - if (is_power_of_2(ns->zone_size)) { - ns->zone_size_log2 = 63 - clz64(ns->zone_size); - } -} - -static void nvme_ns_init_zoned(NvmeNamespace *ns) -{ - NvmeIdNsZoned *id_ns_z; - int i; - - nvme_ns_zoned_init_state(ns); - - id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); - - /* MAR/MOR are zeroes-based, 0xffffffff means no limit */ - id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); - id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); - id_ns_z->zoc = 0; - id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00; - - for (i = 0; i <= ns->id_ns.nlbaf; i++) { - id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size); - id_ns_z->lbafe[i].zdes = - ns->params.zd_extension_size >> 6; /* Units of 64B */ - } - - ns->csi = NVME_CSI_ZONED; - ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); - ns->id_ns.ncap = ns->id_ns.nsze; - ns->id_ns.nuse = ns->id_ns.ncap; - - /* - * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated" - * status of logical blocks. Since the spec defines that logical blocks - * SHALL be deallocated when then zone is in the Empty or Offline states, - * we can only support DULBE if the zone size is a multiple of the - * calculated NPDG. - */ - if (ns->zone_size % (ns->id_ns.npdg + 1)) { - warn_report("the zone size (%"PRIu64" blocks) is not a multiple of " - "the calculated deallocation granularity (%d blocks); " - "DULBE support disabled", - ns->zone_size, ns->id_ns.npdg + 1); - - ns->id_ns.nsfeat &= ~0x4; - } - - ns->id_ns_zoned = id_ns_z; -} - -static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone) -{ - uint8_t state; - - zone->w_ptr = zone->d.wp; - state = nvme_get_zone_state(zone); - if (zone->d.wp != zone->d.zslba || - (zone->d.za & NVME_ZA_ZD_EXT_VALID)) { - if (state != NVME_ZONE_STATE_CLOSED) { - trace_pci_nvme_clear_ns_close(state, zone->d.zslba); - nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED); - } - nvme_aor_inc_active(ns); - QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry); - } else { - trace_pci_nvme_clear_ns_reset(state, zone->d.zslba); - nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); - } -} - -/* - * Close all the zones that are currently open. - */ -static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) -{ - NvmeZone *zone, *next; - - QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { - QTAILQ_REMOVE(&ns->closed_zones, zone, entry); - nvme_aor_dec_active(ns); - nvme_clear_zone(ns, zone); - } - QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { - QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); - nvme_aor_dec_open(ns); - nvme_aor_dec_active(ns); - nvme_clear_zone(ns, zone); - } - QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { - QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); - nvme_aor_dec_open(ns); - nvme_aor_dec_active(ns); - nvme_clear_zone(ns, zone); - } - - assert(ns->nr_open_zones == 0); -} - -static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, - Error **errp) -{ - if (!ns->blkconf.blk) { - error_setg(errp, "block backend not configured"); - return -1; - } - - if (ns->params.pi && ns->params.ms < 8) { - error_setg(errp, "at least 8 bytes of metadata required to enable " - "protection information"); - return -1; - } - - if (ns->params.nsid > NVME_MAX_NAMESPACES) { - error_setg(errp, "invalid namespace id (must be between 0 and %d)", - NVME_MAX_NAMESPACES); - return -1; - } - - if (!n->subsys) { - if (ns->params.detached) { - error_setg(errp, "detached requires that the nvme device is " - "linked to an nvme-subsys device"); - return -1; - } - - if (ns->params.shared) { - error_setg(errp, "shared requires that the nvme device is " - "linked to an nvme-subsys device"); - return -1; - } - } - - return 0; -} - -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) -{ - if (nvme_ns_check_constraints(n, ns, errp)) { - return -1; - } - - if (nvme_ns_init_blk(ns, errp)) { - return -1; - } - - if (nvme_ns_init(ns, errp)) { - return -1; - } - if (ns->params.zoned) { - if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) { - return -1; - } - nvme_ns_init_zoned(ns); - } - - return 0; -} - -void nvme_ns_drain(NvmeNamespace *ns) -{ - blk_drain(ns->blkconf.blk); -} - -void nvme_ns_shutdown(NvmeNamespace *ns) -{ - blk_flush(ns->blkconf.blk); - if (ns->params.zoned) { - nvme_zoned_ns_shutdown(ns); - } -} - -void nvme_ns_cleanup(NvmeNamespace *ns) -{ - if (ns->params.zoned) { - g_free(ns->id_ns_zoned); - g_free(ns->zone_array); - g_free(ns->zd_extensions); - } -} - -static void nvme_ns_realize(DeviceState *dev, Error **errp) -{ - NvmeNamespace *ns = NVME_NS(dev); - BusState *s = qdev_get_parent_bus(dev); - NvmeCtrl *n = NVME(s->parent); - NvmeSubsystem *subsys = n->subsys; - uint32_t nsid = ns->params.nsid; - int i; - - if (nvme_ns_setup(n, ns, errp)) { - return; - } - - if (!nsid) { - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) { - continue; - } - - nsid = ns->params.nsid = i; - break; - } - - if (!nsid) { - error_setg(errp, "no free namespace id"); - return; - } - } else { - if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) { - error_setg(errp, "namespace id '%d' already allocated", nsid); - return; - } - } - - if (subsys) { - subsys->namespaces[nsid] = ns; - - if (ns->params.detached) { - return; - } - - if (ns->params.shared) { - for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { - NvmeCtrl *ctrl = subsys->ctrls[i]; - - if (ctrl) { - nvme_attach_ns(ctrl, ns); - } - } - - return; - } - } - - nvme_attach_ns(n, ns); -} - -static Property nvme_ns_props[] = { - DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), - DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), - DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false), - DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), - DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), - DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), - DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), - DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), - DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), - DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), - DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), - DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), - DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false), - DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs, - NVME_DEFAULT_ZONE_SIZE), - DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs, - 0), - DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace, - params.cross_zone_read, false), - DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace, - params.max_active_zones, 0), - DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace, - params.max_open_zones, 0), - DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, - params.zd_extension_size, 0), - DEFINE_PROP_END_OF_LIST(), -}; - -static void nvme_ns_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); - - dc->bus_type = TYPE_NVME_BUS; - dc->realize = nvme_ns_realize; - device_class_set_props(dc, nvme_ns_props); - dc->desc = "Virtual NVMe namespace"; -} - -static void nvme_ns_instance_init(Object *obj) -{ - NvmeNamespace *ns = NVME_NS(obj); - char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid); - - device_add_bootindex_property(obj, &ns->bootindex, "bootindex", - bootindex, DEVICE(obj)); - - g_free(bootindex); -} - -static const TypeInfo nvme_ns_info = { - .name = TYPE_NVME_NS, - .parent = TYPE_DEVICE, - .class_init = nvme_ns_class_init, - .instance_size = sizeof(NvmeNamespace), - .instance_init = nvme_ns_instance_init, -}; - -static void nvme_ns_register_types(void) -{ - type_register_static(&nvme_ns_info); -} - -type_init(nvme_ns_register_types) diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h deleted file mode 100644 index fb0a41f912e..00000000000 --- a/hw/block/nvme-ns.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * QEMU NVM Express Virtual Namespace - * - * Copyright (c) 2019 CNEX Labs - * Copyright (c) 2020 Samsung Electronics - * - * Authors: - * Klaus Jensen - * - * This work is licensed under the terms of the GNU GPL, version 2. See the - * COPYING file in the top-level directory. - * - */ - -#ifndef NVME_NS_H -#define NVME_NS_H - -#include "qemu/uuid.h" - -#define TYPE_NVME_NS "nvme-ns" -#define NVME_NS(obj) \ - OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) - -typedef struct NvmeZone { - NvmeZoneDescr d; - uint64_t w_ptr; - QTAILQ_ENTRY(NvmeZone) entry; -} NvmeZone; - -typedef struct NvmeNamespaceParams { - bool detached; - bool shared; - uint32_t nsid; - QemuUUID uuid; - - uint16_t ms; - uint8_t mset; - uint8_t pi; - uint8_t pil; - - uint16_t mssrl; - uint32_t mcl; - uint8_t msrc; - - bool zoned; - bool cross_zone_read; - uint64_t zone_size_bs; - uint64_t zone_cap_bs; - uint32_t max_active_zones; - uint32_t max_open_zones; - uint32_t zd_extension_size; -} NvmeNamespaceParams; - -typedef struct NvmeNamespace { - DeviceState parent_obj; - BlockConf blkconf; - int32_t bootindex; - int64_t size; - int64_t mdata_offset; - NvmeIdNs id_ns; - const uint32_t *iocs; - uint8_t csi; - uint16_t status; - int attached; - - QTAILQ_ENTRY(NvmeNamespace) entry; - - NvmeIdNsZoned *id_ns_zoned; - NvmeZone *zone_array; - QTAILQ_HEAD(, NvmeZone) exp_open_zones; - QTAILQ_HEAD(, NvmeZone) imp_open_zones; - QTAILQ_HEAD(, NvmeZone) closed_zones; - QTAILQ_HEAD(, NvmeZone) full_zones; - uint32_t num_zones; - uint64_t zone_size; - uint64_t zone_capacity; - uint32_t zone_size_log2; - uint8_t *zd_extensions; - int32_t nr_open_zones; - int32_t nr_active_zones; - - NvmeNamespaceParams params; - - struct { - uint32_t err_rec; - } features; -} NvmeNamespace; - -static inline uint16_t nvme_ns_status(NvmeNamespace *ns) -{ - return ns->status; -} - -static inline uint32_t nvme_nsid(NvmeNamespace *ns) -{ - if (ns) { - return ns->params.nsid; - } - - return 0; -} - -static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns) -{ - NvmeIdNs *id_ns = &ns->id_ns; - return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; -} - -static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) -{ - return nvme_ns_lbaf(ns)->ds; -} - -/* convert an LBA to the equivalent in bytes */ -static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) -{ - return lba << nvme_ns_lbads(ns); -} - -static inline size_t nvme_lsize(NvmeNamespace *ns) -{ - return 1 << nvme_ns_lbads(ns); -} - -static inline uint16_t nvme_msize(NvmeNamespace *ns) -{ - return nvme_ns_lbaf(ns)->ms; -} - -static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) -{ - return nvme_msize(ns) * lba; -} - -static inline bool nvme_ns_ext(NvmeNamespace *ns) -{ - return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); -} - -/* calculate the number of LBAs that the namespace can accomodate */ -static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns) -{ - if (nvme_msize(ns)) { - return ns->size / (nvme_lsize(ns) + nvme_msize(ns)); - } - return ns->size >> nvme_ns_lbads(ns); -} - -typedef struct NvmeCtrl NvmeCtrl; - -static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) -{ - return zone->d.zs >> 4; -} - -static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) -{ - zone->d.zs = state << 4; -} - -static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) -{ - return zone->d.zslba + ns->zone_size; -} - -static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) -{ - return zone->d.zslba + zone->d.zcap; -} - -static inline bool nvme_wp_is_valid(NvmeZone *zone) -{ - uint8_t st = nvme_get_zone_state(zone); - - return st != NVME_ZONE_STATE_FULL && - st != NVME_ZONE_STATE_READ_ONLY && - st != NVME_ZONE_STATE_OFFLINE; -} - -static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, - uint32_t zone_idx) -{ - return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; -} - -static inline void nvme_aor_inc_open(NvmeNamespace *ns) -{ - assert(ns->nr_open_zones >= 0); - if (ns->params.max_open_zones) { - ns->nr_open_zones++; - assert(ns->nr_open_zones <= ns->params.max_open_zones); - } -} - -static inline void nvme_aor_dec_open(NvmeNamespace *ns) -{ - if (ns->params.max_open_zones) { - assert(ns->nr_open_zones > 0); - ns->nr_open_zones--; - } - assert(ns->nr_open_zones >= 0); -} - -static inline void nvme_aor_inc_active(NvmeNamespace *ns) -{ - assert(ns->nr_active_zones >= 0); - if (ns->params.max_active_zones) { - ns->nr_active_zones++; - assert(ns->nr_active_zones <= ns->params.max_active_zones); - } -} - -static inline void nvme_aor_dec_active(NvmeNamespace *ns) -{ - if (ns->params.max_active_zones) { - assert(ns->nr_active_zones > 0); - ns->nr_active_zones--; - assert(ns->nr_active_zones >= ns->nr_open_zones); - } - assert(ns->nr_active_zones >= 0); -} - -void nvme_ns_init_format(NvmeNamespace *ns); -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); -void nvme_ns_drain(NvmeNamespace *ns); -void nvme_ns_shutdown(NvmeNamespace *ns); -void nvme_ns_cleanup(NvmeNamespace *ns); - -#endif /* NVME_NS_H */ diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c deleted file mode 100644 index 283a97b79d5..00000000000 --- a/hw/block/nvme-subsys.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * QEMU NVM Express Subsystem: nvme-subsys - * - * Copyright (c) 2021 Minwoo Im - * - * This code is licensed under the GNU GPL v2. Refer COPYING. - */ - -#include "qemu/units.h" -#include "qemu/osdep.h" -#include "qemu/uuid.h" -#include "qemu/iov.h" -#include "qemu/cutils.h" -#include "qapi/error.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-core.h" -#include "hw/block/block.h" -#include "block/aio.h" -#include "block/accounting.h" -#include "sysemu/sysemu.h" -#include "hw/pci/pci.h" -#include "nvme.h" -#include "nvme-subsys.h" - -int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) -{ - NvmeSubsystem *subsys = n->subsys; - int cntlid; - - for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { - if (!subsys->ctrls[cntlid]) { - break; - } - } - - if (cntlid == ARRAY_SIZE(subsys->ctrls)) { - error_setg(errp, "no more free controller id"); - return -1; - } - - subsys->ctrls[cntlid] = n; - - return cntlid; -} - -static void nvme_subsys_setup(NvmeSubsystem *subsys) -{ - const char *nqn = subsys->params.nqn ? - subsys->params.nqn : subsys->parent_obj.id; - - snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn), - "nqn.2019-08.org.qemu:%s", nqn); -} - -static void nvme_subsys_realize(DeviceState *dev, Error **errp) -{ - NvmeSubsystem *subsys = NVME_SUBSYS(dev); - - nvme_subsys_setup(subsys); -} - -static Property nvme_subsystem_props[] = { - DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn), - DEFINE_PROP_END_OF_LIST(), -}; - -static void nvme_subsys_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); - - dc->realize = nvme_subsys_realize; - dc->desc = "Virtual NVMe subsystem"; - - device_class_set_props(dc, nvme_subsystem_props); -} - -static const TypeInfo nvme_subsys_info = { - .name = TYPE_NVME_SUBSYS, - .parent = TYPE_DEVICE, - .class_init = nvme_subsys_class_init, - .instance_size = sizeof(NvmeSubsystem), -}; - -static void nvme_subsys_register_types(void) -{ - type_register_static(&nvme_subsys_info); -} - -type_init(nvme_subsys_register_types) diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h deleted file mode 100644 index 7d7ef5f7f12..00000000000 --- a/hw/block/nvme-subsys.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * QEMU NVM Express Subsystem: nvme-subsys - * - * Copyright (c) 2021 Minwoo Im - * - * This code is licensed under the GNU GPL v2. Refer COPYING. - */ - -#ifndef NVME_SUBSYS_H -#define NVME_SUBSYS_H - -#define TYPE_NVME_SUBSYS "nvme-subsys" -#define NVME_SUBSYS(obj) \ - OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) - -#define NVME_SUBSYS_MAX_CTRLS 32 -#define NVME_MAX_NAMESPACES 256 - -typedef struct NvmeCtrl NvmeCtrl; -typedef struct NvmeNamespace NvmeNamespace; -typedef struct NvmeSubsystem { - DeviceState parent_obj; - uint8_t subnqn[256]; - - NvmeCtrl *ctrls[NVME_SUBSYS_MAX_CTRLS]; - /* Allocated namespaces for this subsystem */ - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; - - struct { - char *nqn; - } params; -} NvmeSubsystem; - -int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); - -static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, - uint32_t cntlid) -{ - if (!subsys || cntlid >= NVME_SUBSYS_MAX_CTRLS) { - return NULL; - } - - return subsys->ctrls[cntlid]; -} - -/* - * Return allocated namespace of the specified nsid in the subsystem. - */ -static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, - uint32_t nsid) -{ - if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { - return NULL; - } - - return subsys->namespaces[nsid]; -} - -#endif /* NVME_SUBSYS_H */ diff --git a/hw/block/nvme.c b/hw/block/nvme.c deleted file mode 100644 index 5fe082ec34c..00000000000 --- a/hw/block/nvme.c +++ /dev/null @@ -1,6363 +0,0 @@ -/* - * QEMU NVM Express Controller - * - * Copyright (c) 2012, Intel Corporation - * - * Written by Keith Busch - * - * This code is licensed under the GNU GPL v2 or later. - */ - -/** - * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e - * - * https://nvmexpress.org/developers/nvme-specification/ - */ - -/** - * Usage: add options: - * -drive file=,if=none,id= - * -device nvme-subsys,id=,nqn= - * -device nvme,serial=,id=, \ - * cmb_size_mb=, \ - * [pmrdev=,] \ - * max_ioqpairs=, \ - * aerl=,aer_max_queued=, \ - * mdts=,vsl=, \ - * zoned.zasl=, \ - * subsys= - * -device nvme-ns,drive=,bus=,nsid=,\ - * zoned=, \ - * subsys=,detached= - * - * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at - * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the - * device will use the "v1.4 CMB scheme" - use the `legacy-cmb` parameter to - * always enable the CMBLOC and CMBSZ registers (v1.3 behavior). - * - * Enabling pmr emulation can be achieved by pointing to memory-backend-file. - * For example: - * -object memory-backend-file,id=,share=on,mem-path=, \ - * size= .... -device nvme,...,pmrdev= - * - * The PMR will use BAR 4/5 exclusively. - * - * To place controller(s) and namespace(s) to a subsystem, then provide - * nvme-subsys device as above. - * - * nvme subsystem device parameters - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - `nqn` - * This parameter provides the `` part of the string - * `nqn.2019-08.org.qemu:` which will be reported in the SUBNQN field - * of subsystem controllers. Note that `` should be unique per - * subsystem, but this is not enforced by QEMU. If not specified, it will - * default to the value of the `id` parameter (``). - * - * nvme device parameters - * ~~~~~~~~~~~~~~~~~~~~~~ - * - `subsys` - * Specifying this parameter attaches the controller to the subsystem and - * the SUBNQN field in the controller will report the NQN of the subsystem - * device. This also enables multi controller capability represented in - * Identify Controller data structure in CMIC (Controller Multi-path I/O and - * Namesapce Sharing Capabilities). - * - * - `aerl` - * The Asynchronous Event Request Limit (AERL). Indicates the maximum number - * of concurrently outstanding Asynchronous Event Request commands support - * by the controller. This is a 0's based value. - * - * - `aer_max_queued` - * This is the maximum number of events that the device will enqueue for - * completion when there are no outstanding AERs. When the maximum number of - * enqueued events are reached, subsequent events will be dropped. - * - * - `mdts` - * Indicates the maximum data transfer size for a command that transfers data - * between host-accessible memory and the controller. The value is specified - * as a power of two (2^n) and is in units of the minimum memory page size - * (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB). - * - * - `vsl` - * Indicates the maximum data size limit for the Verify command. Like `mdts`, - * this value is specified as a power of two (2^n) and is in units of the - * minimum memory page size (CAP.MPSMIN). The default value is 7 (i.e. 512 - * KiB). - * - * - `zoned.zasl` - * Indicates the maximum data transfer size for the Zone Append command. Like - * `mdts`, the value is specified as a power of two (2^n) and is in units of - * the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e. - * defaulting to the value of `mdts`). - * - * nvme namespace device parameters - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - `shared` - * When the parent nvme device (as defined explicitly by the 'bus' parameter - * or implicitly by the most recently defined NvmeBus) is linked to an - * nvme-subsys device, the namespace will be attached to all controllers in - * the subsystem. If set to 'off' (the default), the namespace will remain a - * private namespace and may only be attached to a single controller at a - * time. - * - * - `detached` - * This parameter is only valid together with the `subsys` parameter. If left - * at the default value (`false/off`), the namespace will be attached to all - * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the - * namespace will be be available in the subsystem not not attached to any - * controllers. - * - * Setting `zoned` to true selects Zoned Command Set at the namespace. - * In this case, the following namespace properties are available to configure - * zoned operation: - * zoned.zone_size= - * The number may be followed by K, M, G as in kilo-, mega- or giga-. - * - * zoned.zone_capacity= - * The value 0 (default) forces zone capacity to be the same as zone - * size. The value of this property may not exceed zone size. - * - * zoned.descr_ext_size= - * This value needs to be specified in 64B units. If it is zero, - * namespace(s) will not support zone descriptor extensions. - * - * zoned.max_active= - * The default value means there is no limit to the number of - * concurrently active zones. - * - * zoned.max_open= - * The default value means there is no limit to the number of - * concurrently open zones. - * - * zoned.cross_read= - * Setting this property to true enables Read Across Zone Boundaries. - */ - -#include "qemu/osdep.h" -#include "qemu/units.h" -#include "qemu/error-report.h" -#include "hw/block/block.h" -#include "hw/pci/msix.h" -#include "hw/pci/pci.h" -#include "hw/qdev-properties.h" -#include "migration/vmstate.h" -#include "sysemu/sysemu.h" -#include "qapi/error.h" -#include "qapi/visitor.h" -#include "sysemu/hostmem.h" -#include "sysemu/block-backend.h" -#include "exec/memory.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "qemu/cutils.h" -#include "trace.h" -#include "nvme.h" -#include "nvme-ns.h" -#include "nvme-dif.h" - -#define NVME_MAX_IOQPAIRS 0xffff -#define NVME_DB_SIZE 4 -#define NVME_SPEC_VER 0x00010400 -#define NVME_CMB_BIR 2 -#define NVME_PMR_BIR 4 -#define NVME_TEMPERATURE 0x143 -#define NVME_TEMPERATURE_WARNING 0x157 -#define NVME_TEMPERATURE_CRITICAL 0x175 -#define NVME_NUM_FW_SLOTS 1 - -#define NVME_GUEST_ERR(trace, fmt, ...) \ - do { \ - (trace_##trace)(__VA_ARGS__); \ - qemu_log_mask(LOG_GUEST_ERROR, #trace \ - " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ - } while (0) - -static const bool nvme_feature_support[NVME_FID_MAX] = { - [NVME_ARBITRATION] = true, - [NVME_POWER_MANAGEMENT] = true, - [NVME_TEMPERATURE_THRESHOLD] = true, - [NVME_ERROR_RECOVERY] = true, - [NVME_VOLATILE_WRITE_CACHE] = true, - [NVME_NUMBER_OF_QUEUES] = true, - [NVME_INTERRUPT_COALESCING] = true, - [NVME_INTERRUPT_VECTOR_CONF] = true, - [NVME_WRITE_ATOMICITY] = true, - [NVME_ASYNCHRONOUS_EVENT_CONF] = true, - [NVME_TIMESTAMP] = true, -}; - -static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { - [NVME_TEMPERATURE_THRESHOLD] = NVME_FEAT_CAP_CHANGE, - [NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, - [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE, - [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, - [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, - [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, -}; - -static const uint32_t nvme_cse_acs[256] = { - [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_DELETE_CQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_CREATE_CQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_ABORT] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, - [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, -}; - -static const uint32_t nvme_cse_iocs_none[256]; - -static const uint32_t nvme_cse_iocs_nvm[256] = { - [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, - [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, - [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, -}; - -static const uint32_t nvme_cse_iocs_zoned[256] = { - [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, - [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, - [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, - [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, - [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, -}; - -static void nvme_process_sq(void *opaque); - -static uint16_t nvme_sqid(NvmeRequest *req) -{ - return le16_to_cpu(req->sq->sqid); -} - -static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone, - NvmeZoneState state) -{ - if (QTAILQ_IN_USE(zone, entry)) { - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); - break; - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); - break; - case NVME_ZONE_STATE_CLOSED: - QTAILQ_REMOVE(&ns->closed_zones, zone, entry); - break; - case NVME_ZONE_STATE_FULL: - QTAILQ_REMOVE(&ns->full_zones, zone, entry); - default: - ; - } - } - - nvme_set_zone_state(zone, state); - - switch (state) { - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - QTAILQ_INSERT_TAIL(&ns->exp_open_zones, zone, entry); - break; - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - QTAILQ_INSERT_TAIL(&ns->imp_open_zones, zone, entry); - break; - case NVME_ZONE_STATE_CLOSED: - QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry); - break; - case NVME_ZONE_STATE_FULL: - QTAILQ_INSERT_TAIL(&ns->full_zones, zone, entry); - case NVME_ZONE_STATE_READ_ONLY: - break; - default: - zone->d.za = 0; - } -} - -/* - * Check if we can open a zone without exceeding open/active limits. - * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5). - */ -static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) -{ - if (ns->params.max_active_zones != 0 && - ns->nr_active_zones + act > ns->params.max_active_zones) { - trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones); - return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR; - } - if (ns->params.max_open_zones != 0 && - ns->nr_open_zones + opn > ns->params.max_open_zones) { - trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones); - return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR; - } - - return NVME_SUCCESS; -} - -static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) -{ - hwaddr hi, lo; - - if (!n->cmb.cmse) { - return false; - } - - lo = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba; - hi = lo + int128_get64(n->cmb.mem.size); - - return addr >= lo && addr < hi; -} - -static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr) -{ - hwaddr base = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba; - return &n->cmb.buf[addr - base]; -} - -static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr) -{ - hwaddr hi; - - if (!n->pmr.cmse) { - return false; - } - - hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size); - - return addr >= n->pmr.cba && addr < hi; -} - -static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr) -{ - return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba); -} - -static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) -{ - hwaddr hi = addr + size - 1; - if (hi < addr) { - return 1; - } - - if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) { - memcpy(buf, nvme_addr_to_cmb(n, addr), size); - return 0; - } - - if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) { - memcpy(buf, nvme_addr_to_pmr(n, addr), size); - return 0; - } - - return pci_dma_read(&n->parent_obj, addr, buf, size); -} - -static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) -{ - hwaddr hi = addr + size - 1; - if (hi < addr) { - return 1; - } - - if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) { - memcpy(nvme_addr_to_cmb(n, addr), buf, size); - return 0; - } - - if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) { - memcpy(nvme_addr_to_pmr(n, addr), buf, size); - return 0; - } - - return pci_dma_write(&n->parent_obj, addr, buf, size); -} - -static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) -{ - return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces); -} - -static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) -{ - return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; -} - -static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) -{ - return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; -} - -static void nvme_inc_cq_tail(NvmeCQueue *cq) -{ - cq->tail++; - if (cq->tail >= cq->size) { - cq->tail = 0; - cq->phase = !cq->phase; - } -} - -static void nvme_inc_sq_head(NvmeSQueue *sq) -{ - sq->head = (sq->head + 1) % sq->size; -} - -static uint8_t nvme_cq_full(NvmeCQueue *cq) -{ - return (cq->tail + 1) % cq->size == cq->head; -} - -static uint8_t nvme_sq_empty(NvmeSQueue *sq) -{ - return sq->head == sq->tail; -} - -static void nvme_irq_check(NvmeCtrl *n) -{ - if (msix_enabled(&(n->parent_obj))) { - return; - } - if (~n->bar.intms & n->irq_status) { - pci_irq_assert(&n->parent_obj); - } else { - pci_irq_deassert(&n->parent_obj); - } -} - -static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) -{ - if (cq->irq_enabled) { - if (msix_enabled(&(n->parent_obj))) { - trace_pci_nvme_irq_msix(cq->vector); - msix_notify(&(n->parent_obj), cq->vector); - } else { - trace_pci_nvme_irq_pin(); - assert(cq->vector < 32); - n->irq_status |= 1 << cq->vector; - nvme_irq_check(n); - } - } else { - trace_pci_nvme_irq_masked(); - } -} - -static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) -{ - if (cq->irq_enabled) { - if (msix_enabled(&(n->parent_obj))) { - return; - } else { - assert(cq->vector < 32); - n->irq_status &= ~(1 << cq->vector); - nvme_irq_check(n); - } - } -} - -static void nvme_req_clear(NvmeRequest *req) -{ - req->ns = NULL; - req->opaque = NULL; - req->aiocb = NULL; - memset(&req->cqe, 0x0, sizeof(req->cqe)); - req->status = NVME_SUCCESS; -} - -static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma) -{ - if (dma) { - pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0); - sg->flags = NVME_SG_DMA; - } else { - qemu_iovec_init(&sg->iov, 0); - } - - sg->flags |= NVME_SG_ALLOC; -} - -static inline void nvme_sg_unmap(NvmeSg *sg) -{ - if (!(sg->flags & NVME_SG_ALLOC)) { - return; - } - - if (sg->flags & NVME_SG_DMA) { - qemu_sglist_destroy(&sg->qsg); - } else { - qemu_iovec_destroy(&sg->iov); - } - - memset(sg, 0x0, sizeof(*sg)); -} - -/* - * When metadata is transfered as extended LBAs, the DPTR mapped into `sg` - * holds both data and metadata. This function splits the data and metadata - * into two separate QSG/IOVs. - */ -static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data, - NvmeSg *mdata) -{ - NvmeSg *dst = data; - size_t size = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - uint32_t trans_len, count = size; - uint64_t offset = 0; - bool dma = sg->flags & NVME_SG_DMA; - size_t sge_len; - size_t sg_len = dma ? sg->qsg.size : sg->iov.size; - int sg_idx = 0; - - assert(sg->flags & NVME_SG_ALLOC); - - while (sg_len) { - sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len; - - trans_len = MIN(sg_len, count); - trans_len = MIN(trans_len, sge_len - offset); - - if (dst) { - if (dma) { - qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset, - trans_len); - } else { - qemu_iovec_add(&dst->iov, - sg->iov.iov[sg_idx].iov_base + offset, - trans_len); - } - } - - sg_len -= trans_len; - count -= trans_len; - offset += trans_len; - - if (count == 0) { - dst = (dst == data) ? mdata : data; - count = (dst == data) ? size : msize; - } - - if (sge_len == offset) { - offset = 0; - sg_idx++; - } - } -} - -static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, - size_t len) -{ - if (!len) { - return NVME_SUCCESS; - } - - trace_pci_nvme_map_addr_cmb(addr, len); - - if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) { - return NVME_DATA_TRAS_ERROR; - } - - qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len); - - return NVME_SUCCESS; -} - -static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, - size_t len) -{ - if (!len) { - return NVME_SUCCESS; - } - - if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) { - return NVME_DATA_TRAS_ERROR; - } - - qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len); - - return NVME_SUCCESS; -} - -static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) -{ - bool cmb = false, pmr = false; - - if (!len) { - return NVME_SUCCESS; - } - - trace_pci_nvme_map_addr(addr, len); - - if (nvme_addr_is_cmb(n, addr)) { - cmb = true; - } else if (nvme_addr_is_pmr(n, addr)) { - pmr = true; - } - - if (cmb || pmr) { - if (sg->flags & NVME_SG_DMA) { - return NVME_INVALID_USE_OF_CMB | NVME_DNR; - } - - if (cmb) { - return nvme_map_addr_cmb(n, &sg->iov, addr, len); - } else { - return nvme_map_addr_pmr(n, &sg->iov, addr, len); - } - } - - if (!(sg->flags & NVME_SG_DMA)) { - return NVME_INVALID_USE_OF_CMB | NVME_DNR; - } - - qemu_sglist_add(&sg->qsg, addr, len); - - return NVME_SUCCESS; -} - -static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr) -{ - return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr)); -} - -static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, - uint64_t prp2, uint32_t len) -{ - hwaddr trans_len = n->page_size - (prp1 % n->page_size); - trans_len = MIN(len, trans_len); - int num_prps = (len >> n->page_bits) + 1; - uint16_t status; - int ret; - - trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps); - - nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1)); - - status = nvme_map_addr(n, sg, prp1, trans_len); - if (status) { - goto unmap; - } - - len -= trans_len; - if (len) { - if (len > n->page_size) { - uint64_t prp_list[n->max_prp_ents]; - uint32_t nents, prp_trans; - int i = 0; - - /* - * The first PRP list entry, pointed to by PRP2 may contain offset. - * Hence, we need to calculate the number of entries in based on - * that offset. - */ - nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3; - prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); - if (ret) { - trace_pci_nvme_err_addr_read(prp2); - status = NVME_DATA_TRAS_ERROR; - goto unmap; - } - while (len != 0) { - uint64_t prp_ent = le64_to_cpu(prp_list[i]); - - if (i == nents - 1 && len > n->page_size) { - if (unlikely(prp_ent & (n->page_size - 1))) { - trace_pci_nvme_err_invalid_prplist_ent(prp_ent); - status = NVME_INVALID_PRP_OFFSET | NVME_DNR; - goto unmap; - } - - i = 0; - nents = (len + n->page_size - 1) >> n->page_bits; - nents = MIN(nents, n->max_prp_ents); - prp_trans = nents * sizeof(uint64_t); - ret = nvme_addr_read(n, prp_ent, (void *)prp_list, - prp_trans); - if (ret) { - trace_pci_nvme_err_addr_read(prp_ent); - status = NVME_DATA_TRAS_ERROR; - goto unmap; - } - prp_ent = le64_to_cpu(prp_list[i]); - } - - if (unlikely(prp_ent & (n->page_size - 1))) { - trace_pci_nvme_err_invalid_prplist_ent(prp_ent); - status = NVME_INVALID_PRP_OFFSET | NVME_DNR; - goto unmap; - } - - trans_len = MIN(len, n->page_size); - status = nvme_map_addr(n, sg, prp_ent, trans_len); - if (status) { - goto unmap; - } - - len -= trans_len; - i++; - } - } else { - if (unlikely(prp2 & (n->page_size - 1))) { - trace_pci_nvme_err_invalid_prp2_align(prp2); - status = NVME_INVALID_PRP_OFFSET | NVME_DNR; - goto unmap; - } - status = nvme_map_addr(n, sg, prp2, len); - if (status) { - goto unmap; - } - } - } - - return NVME_SUCCESS; - -unmap: - nvme_sg_unmap(sg); - return status; -} - -/* - * Map 'nsgld' data descriptors from 'segment'. The function will subtract the - * number of bytes mapped in len. - */ -static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg, - NvmeSglDescriptor *segment, uint64_t nsgld, - size_t *len, NvmeCmd *cmd) -{ - dma_addr_t addr, trans_len; - uint32_t dlen; - uint16_t status; - - for (int i = 0; i < nsgld; i++) { - uint8_t type = NVME_SGL_TYPE(segment[i].type); - - switch (type) { - case NVME_SGL_DESCR_TYPE_BIT_BUCKET: - if (cmd->opcode == NVME_CMD_WRITE) { - continue; - } - case NVME_SGL_DESCR_TYPE_DATA_BLOCK: - break; - case NVME_SGL_DESCR_TYPE_SEGMENT: - case NVME_SGL_DESCR_TYPE_LAST_SEGMENT: - return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR; - default: - return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR; - } - - dlen = le32_to_cpu(segment[i].len); - - if (!dlen) { - continue; - } - - if (*len == 0) { - /* - * All data has been mapped, but the SGL contains additional - * segments and/or descriptors. The controller might accept - * ignoring the rest of the SGL. - */ - uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls); - if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) { - break; - } - - trace_pci_nvme_err_invalid_sgl_excess_length(dlen); - return NVME_DATA_SGL_LEN_INVALID | NVME_DNR; - } - - trans_len = MIN(*len, dlen); - - if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) { - goto next; - } - - addr = le64_to_cpu(segment[i].addr); - - if (UINT64_MAX - addr < dlen) { - return NVME_DATA_SGL_LEN_INVALID | NVME_DNR; - } - - status = nvme_map_addr(n, sg, addr, trans_len); - if (status) { - return status; - } - -next: - *len -= trans_len; - } - - return NVME_SUCCESS; -} - -static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, - size_t len, NvmeCmd *cmd) -{ - /* - * Read the segment in chunks of 256 descriptors (one 4k page) to avoid - * dynamically allocating a potentially huge SGL. The spec allows the SGL - * to be larger (as in number of bytes required to describe the SGL - * descriptors and segment chain) than the command transfer size, so it is - * not bounded by MDTS. - */ - const int SEG_CHUNK_SIZE = 256; - - NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld; - uint64_t nsgld; - uint32_t seg_len; - uint16_t status; - hwaddr addr; - int ret; - - sgld = &sgl; - addr = le64_to_cpu(sgl.addr); - - trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len); - - nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr)); - - /* - * If the entire transfer can be described with a single data block it can - * be mapped directly. - */ - if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) { - status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd); - if (status) { - goto unmap; - } - - goto out; - } - - for (;;) { - switch (NVME_SGL_TYPE(sgld->type)) { - case NVME_SGL_DESCR_TYPE_SEGMENT: - case NVME_SGL_DESCR_TYPE_LAST_SEGMENT: - break; - default: - return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; - } - - seg_len = le32_to_cpu(sgld->len); - - /* check the length of the (Last) Segment descriptor */ - if ((!seg_len || seg_len & 0xf) && - (NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) { - return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; - } - - if (UINT64_MAX - addr < seg_len) { - return NVME_DATA_SGL_LEN_INVALID | NVME_DNR; - } - - nsgld = seg_len / sizeof(NvmeSglDescriptor); - - while (nsgld > SEG_CHUNK_SIZE) { - if (nvme_addr_read(n, addr, segment, sizeof(segment))) { - trace_pci_nvme_err_addr_read(addr); - status = NVME_DATA_TRAS_ERROR; - goto unmap; - } - - status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE, - &len, cmd); - if (status) { - goto unmap; - } - - nsgld -= SEG_CHUNK_SIZE; - addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor); - } - - ret = nvme_addr_read(n, addr, segment, nsgld * - sizeof(NvmeSglDescriptor)); - if (ret) { - trace_pci_nvme_err_addr_read(addr); - status = NVME_DATA_TRAS_ERROR; - goto unmap; - } - - last_sgld = &segment[nsgld - 1]; - - /* - * If the segment ends with a Data Block or Bit Bucket Descriptor Type, - * then we are done. - */ - switch (NVME_SGL_TYPE(last_sgld->type)) { - case NVME_SGL_DESCR_TYPE_DATA_BLOCK: - case NVME_SGL_DESCR_TYPE_BIT_BUCKET: - status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd); - if (status) { - goto unmap; - } - - goto out; - - default: - break; - } - - /* - * If the last descriptor was not a Data Block or Bit Bucket, then the - * current segment must not be a Last Segment. - */ - if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) { - status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; - goto unmap; - } - - sgld = last_sgld; - addr = le64_to_cpu(sgld->addr); - - /* - * Do not map the last descriptor; it will be a Segment or Last Segment - * descriptor and is handled by the next iteration. - */ - status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd); - if (status) { - goto unmap; - } - } - -out: - /* if there is any residual left in len, the SGL was too short */ - if (len) { - status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR; - goto unmap; - } - - return NVME_SUCCESS; - -unmap: - nvme_sg_unmap(sg); - return status; -} - -uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, - NvmeCmd *cmd) -{ - uint64_t prp1, prp2; - - switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) { - case NVME_PSDT_PRP: - prp1 = le64_to_cpu(cmd->dptr.prp1); - prp2 = le64_to_cpu(cmd->dptr.prp2); - - return nvme_map_prp(n, sg, prp1, prp2, len); - case NVME_PSDT_SGL_MPTR_CONTIGUOUS: - case NVME_PSDT_SGL_MPTR_SGL: - return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd); - default: - return NVME_INVALID_FIELD; - } -} - -static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len, - NvmeCmd *cmd) -{ - int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags); - hwaddr mptr = le64_to_cpu(cmd->mptr); - uint16_t status; - - if (psdt == NVME_PSDT_SGL_MPTR_SGL) { - NvmeSglDescriptor sgl; - - if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) { - return NVME_DATA_TRAS_ERROR; - } - - status = nvme_map_sgl(n, sg, sgl, len, cmd); - if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) { - status = NVME_MD_SGL_LEN_INVALID | NVME_DNR; - } - - return status; - } - - nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr)); - status = nvme_map_addr(n, sg, mptr, len); - if (status) { - nvme_sg_unmap(sg); - } - - return status; -} - -static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint16_t ctrl = le16_to_cpu(rw->control); - size_t len = nvme_l2b(ns, nlb); - uint16_t status; - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { - goto out; - } - - if (nvme_ns_ext(ns)) { - NvmeSg sg; - - len += nvme_m2b(ns, nlb); - - status = nvme_map_dptr(n, &sg, len, &req->cmd); - if (status) { - return status; - } - - nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA); - nvme_sg_split(&sg, ns, &req->sg, NULL); - nvme_sg_unmap(&sg); - - return NVME_SUCCESS; - } - -out: - return nvme_map_dptr(n, &req->sg, len, &req->cmd); -} - -static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - size_t len = nvme_m2b(ns, nlb); - uint16_t status; - - if (nvme_ns_ext(ns)) { - NvmeSg sg; - - len += nvme_l2b(ns, nlb); - - status = nvme_map_dptr(n, &sg, len, &req->cmd); - if (status) { - return status; - } - - nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA); - nvme_sg_split(&sg, ns, NULL, &req->sg); - nvme_sg_unmap(&sg); - - return NVME_SUCCESS; - } - - return nvme_map_mptr(n, &req->sg, len, &req->cmd); -} - -static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, - uint32_t len, uint32_t bytes, - int32_t skip_bytes, int64_t offset, - NvmeTxDirection dir) -{ - hwaddr addr; - uint32_t trans_len, count = bytes; - bool dma = sg->flags & NVME_SG_DMA; - int64_t sge_len; - int sg_idx = 0; - int ret; - - assert(sg->flags & NVME_SG_ALLOC); - - while (len) { - sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len; - - if (sge_len - offset < 0) { - offset -= sge_len; - sg_idx++; - continue; - } - - if (sge_len == offset) { - offset = 0; - sg_idx++; - continue; - } - - trans_len = MIN(len, count); - trans_len = MIN(trans_len, sge_len - offset); - - if (dma) { - addr = sg->qsg.sg[sg_idx].base + offset; - } else { - addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset; - } - - if (dir == NVME_TX_DIRECTION_TO_DEVICE) { - ret = nvme_addr_read(n, addr, ptr, trans_len); - } else { - ret = nvme_addr_write(n, addr, ptr, trans_len); - } - - if (ret) { - return NVME_DATA_TRAS_ERROR; - } - - ptr += trans_len; - len -= trans_len; - count -= trans_len; - offset += trans_len; - - if (count == 0) { - count = bytes; - offset += skip_bytes; - } - } - - return NVME_SUCCESS; -} - -static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir) -{ - assert(sg->flags & NVME_SG_ALLOC); - - if (sg->flags & NVME_SG_DMA) { - uint64_t residual; - - if (dir == NVME_TX_DIRECTION_TO_DEVICE) { - residual = dma_buf_write(ptr, len, &sg->qsg); - } else { - residual = dma_buf_read(ptr, len, &sg->qsg); - } - - if (unlikely(residual)) { - trace_pci_nvme_err_invalid_dma(); - return NVME_INVALID_FIELD | NVME_DNR; - } - } else { - size_t bytes; - - if (dir == NVME_TX_DIRECTION_TO_DEVICE) { - bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len); - } else { - bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len); - } - - if (unlikely(bytes != len)) { - trace_pci_nvme_err_invalid_dma(); - return NVME_INVALID_FIELD | NVME_DNR; - } - } - - return NVME_SUCCESS; -} - -static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeRequest *req) -{ - uint16_t status; - - status = nvme_map_dptr(n, &req->sg, len, &req->cmd); - if (status) { - return status; - } - - return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE); -} - -static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeRequest *req) -{ - uint16_t status; - - status = nvme_map_dptr(n, &req->sg, len, &req->cmd); - if (status) { - return status; - } - - return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE); -} - -uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint16_t ctrl = le16_to_cpu(rw->control); - - if (nvme_ns_ext(ns) && - !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - - return nvme_tx_interleaved(n, &req->sg, ptr, len, lsize, msize, 0, - dir); - } - - return nvme_tx(n, &req->sg, ptr, len, dir); -} - -uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - uint16_t status; - - if (nvme_ns_ext(ns)) { - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - - return nvme_tx_interleaved(n, &req->sg, ptr, len, msize, lsize, lsize, - dir); - } - - nvme_sg_unmap(&req->sg); - - status = nvme_map_mptr(n, &req->sg, len, &req->cmd); - if (status) { - return status; - } - - return nvme_tx(n, &req->sg, ptr, len, dir); -} - -static inline void nvme_blk_read(BlockBackend *blk, int64_t offset, - BlockCompletionFunc *cb, NvmeRequest *req) -{ - assert(req->sg.flags & NVME_SG_ALLOC); - - if (req->sg.flags & NVME_SG_DMA) { - req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE, - cb, req); - } else { - req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req); - } -} - -static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, - BlockCompletionFunc *cb, NvmeRequest *req) -{ - assert(req->sg.flags & NVME_SG_ALLOC); - - if (req->sg.flags & NVME_SG_DMA) { - req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE, - cb, req); - } else { - req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req); - } -} - -static void nvme_post_cqes(void *opaque) -{ - NvmeCQueue *cq = opaque; - NvmeCtrl *n = cq->ctrl; - NvmeRequest *req, *next; - int ret; - - QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { - NvmeSQueue *sq; - hwaddr addr; - - if (nvme_cq_full(cq)) { - break; - } - - sq = req->sq; - req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); - req->cqe.sq_id = cpu_to_le16(sq->sqid); - req->cqe.sq_head = cpu_to_le16(sq->head); - addr = cq->dma_addr + cq->tail * n->cqe_size; - ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, - sizeof(req->cqe)); - if (ret) { - trace_pci_nvme_err_addr_write(addr); - trace_pci_nvme_err_cfs(); - n->bar.csts = NVME_CSTS_FAILED; - break; - } - QTAILQ_REMOVE(&cq->req_list, req, entry); - nvme_inc_cq_tail(cq); - nvme_sg_unmap(&req->sg); - QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); - } - if (cq->tail != cq->head) { - nvme_irq_assert(n, cq); - } -} - -static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) -{ - assert(cq->cqid == req->sq->cqid); - trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid, - req->status); - - if (req->status) { - trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns), - req->status, req->cmd.opcode); - } - - QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); - QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); - timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); -} - -static void nvme_process_aers(void *opaque) -{ - NvmeCtrl *n = opaque; - NvmeAsyncEvent *event, *next; - - trace_pci_nvme_process_aers(n->aer_queued); - - QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { - NvmeRequest *req; - NvmeAerResult *result; - - /* can't post cqe if there is nothing to complete */ - if (!n->outstanding_aers) { - trace_pci_nvme_no_outstanding_aers(); - break; - } - - /* ignore if masked (cqe posted, but event not cleared) */ - if (n->aer_mask & (1 << event->result.event_type)) { - trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask); - continue; - } - - QTAILQ_REMOVE(&n->aer_queue, event, entry); - n->aer_queued--; - - n->aer_mask |= 1 << event->result.event_type; - n->outstanding_aers--; - - req = n->aer_reqs[n->outstanding_aers]; - - result = (NvmeAerResult *) &req->cqe.result; - result->event_type = event->result.event_type; - result->event_info = event->result.event_info; - result->log_page = event->result.log_page; - g_free(event); - - trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info, - result->log_page); - - nvme_enqueue_req_completion(&n->admin_cq, req); - } -} - -static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, - uint8_t event_info, uint8_t log_page) -{ - NvmeAsyncEvent *event; - - trace_pci_nvme_enqueue_event(event_type, event_info, log_page); - - if (n->aer_queued == n->params.aer_max_queued) { - trace_pci_nvme_enqueue_event_noqueue(n->aer_queued); - return; - } - - event = g_new(NvmeAsyncEvent, 1); - event->result = (NvmeAerResult) { - .event_type = event_type, - .event_info = event_info, - .log_page = log_page, - }; - - QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry); - n->aer_queued++; - - nvme_process_aers(n); -} - -static void nvme_smart_event(NvmeCtrl *n, uint8_t event) -{ - uint8_t aer_info; - - /* Ref SPEC */ - if (!(NVME_AEC_SMART(n->features.async_config) & event)) { - return; - } - - switch (event) { - case NVME_SMART_SPARE: - aer_info = NVME_AER_INFO_SMART_SPARE_THRESH; - break; - case NVME_SMART_TEMPERATURE: - aer_info = NVME_AER_INFO_SMART_TEMP_THRESH; - break; - case NVME_SMART_RELIABILITY: - case NVME_SMART_MEDIA_READ_ONLY: - case NVME_SMART_FAILED_VOLATILE_MEDIA: - case NVME_SMART_PMR_UNRELIABLE: - aer_info = NVME_AER_INFO_SMART_RELIABILITY; - break; - default: - return; - } - - nvme_enqueue_event(n, NVME_AER_TYPE_SMART, aer_info, NVME_LOG_SMART_INFO); -} - -static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) -{ - n->aer_mask &= ~(1 << event_type); - if (!QTAILQ_EMPTY(&n->aer_queue)) { - nvme_process_aers(n); - } -} - -static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len) -{ - uint8_t mdts = n->params.mdts; - - if (mdts && len > n->page_size << mdts) { - trace_pci_nvme_err_mdts(len); - return NVME_INVALID_FIELD | NVME_DNR; - } - - return NVME_SUCCESS; -} - -static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba, - uint32_t nlb) -{ - uint64_t nsze = le64_to_cpu(ns->id_ns.nsze); - - if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) { - return NVME_LBA_RANGE | NVME_DNR; - } - - return NVME_SUCCESS; -} - -static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, - uint32_t nlb) -{ - BlockDriverState *bs = blk_bs(ns->blkconf.blk); - - int64_t pnum = 0, bytes = nvme_l2b(ns, nlb); - int64_t offset = nvme_l2b(ns, slba); - bool zeroed; - int ret; - - Error *local_err = NULL; - - /* - * `pnum` holds the number of bytes after offset that shares the same - * allocation status as the byte at offset. If `pnum` is different from - * `bytes`, we should check the allocation status of the next range and - * continue this until all bytes have been checked. - */ - do { - bytes -= pnum; - - ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); - if (ret < 0) { - error_setg_errno(&local_err, -ret, "unable to get block status"); - error_report_err(local_err); - - return NVME_INTERNAL_DEV_ERROR; - } - - zeroed = !!(ret & BDRV_BLOCK_ZERO); - - trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed); - - if (zeroed) { - return NVME_DULB; - } - - offset += pnum; - } while (pnum != bytes); - - return NVME_SUCCESS; -} - -static void nvme_aio_err(NvmeRequest *req, int ret) -{ - uint16_t status = NVME_SUCCESS; - Error *local_err = NULL; - - switch (req->cmd.opcode) { - case NVME_CMD_READ: - status = NVME_UNRECOVERED_READ; - break; - case NVME_CMD_FLUSH: - case NVME_CMD_WRITE: - case NVME_CMD_WRITE_ZEROES: - case NVME_CMD_ZONE_APPEND: - status = NVME_WRITE_FAULT; - break; - default: - status = NVME_INTERNAL_DEV_ERROR; - break; - } - - trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status); - - error_setg_errno(&local_err, -ret, "aio failed"); - error_report_err(local_err); - - /* - * Set the command status code to the first encountered error but allow a - * subsequent Internal Device Error to trump it. - */ - if (req->status && status != NVME_INTERNAL_DEV_ERROR) { - return; - } - - req->status = status; -} - -static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba) -{ - return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 : - slba / ns->zone_size; -} - -static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba) -{ - uint32_t zone_idx = nvme_zone_idx(ns, slba); - - assert(zone_idx < ns->num_zones); - return &ns->zone_array[zone_idx]; -} - -static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone) -{ - uint64_t zslba = zone->d.zslba; - - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EMPTY: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_CLOSED: - return NVME_SUCCESS; - case NVME_ZONE_STATE_FULL: - trace_pci_nvme_err_zone_is_full(zslba); - return NVME_ZONE_FULL; - case NVME_ZONE_STATE_OFFLINE: - trace_pci_nvme_err_zone_is_offline(zslba); - return NVME_ZONE_OFFLINE; - case NVME_ZONE_STATE_READ_ONLY: - trace_pci_nvme_err_zone_is_read_only(zslba); - return NVME_ZONE_READ_ONLY; - default: - assert(false); - } - - return NVME_INTERNAL_DEV_ERROR; -} - -static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone, - uint64_t slba, uint32_t nlb) -{ - uint64_t zcap = nvme_zone_wr_boundary(zone); - uint16_t status; - - status = nvme_check_zone_state_for_write(zone); - if (status) { - return status; - } - - if (unlikely(slba != zone->w_ptr)) { - trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr); - return NVME_ZONE_INVALID_WRITE; - } - - if (unlikely((slba + nlb) > zcap)) { - trace_pci_nvme_err_zone_boundary(slba, nlb, zcap); - return NVME_ZONE_BOUNDARY_ERROR; - } - - return NVME_SUCCESS; -} - -static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone) -{ - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EMPTY: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_FULL: - case NVME_ZONE_STATE_CLOSED: - case NVME_ZONE_STATE_READ_ONLY: - return NVME_SUCCESS; - case NVME_ZONE_STATE_OFFLINE: - trace_pci_nvme_err_zone_is_offline(zone->d.zslba); - return NVME_ZONE_OFFLINE; - default: - assert(false); - } - - return NVME_INTERNAL_DEV_ERROR; -} - -static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba, - uint32_t nlb) -{ - NvmeZone *zone = nvme_get_zone_by_slba(ns, slba); - uint64_t bndry = nvme_zone_rd_boundary(ns, zone); - uint64_t end = slba + nlb; - uint16_t status; - - status = nvme_check_zone_state_for_read(zone); - if (status) { - ; - } else if (unlikely(end > bndry)) { - if (!ns->params.cross_zone_read) { - status = NVME_ZONE_BOUNDARY_ERROR; - } else { - /* - * Read across zone boundary - check that all subsequent - * zones that are being read have an appropriate state. - */ - do { - zone++; - status = nvme_check_zone_state_for_read(zone); - if (status) { - break; - } - } while (end > nvme_zone_rd_boundary(ns, zone)); - } - } - - return status; -} - -static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone) -{ - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_FULL: - return NVME_SUCCESS; - - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - nvme_aor_dec_open(ns); - /* fallthrough */ - case NVME_ZONE_STATE_CLOSED: - nvme_aor_dec_active(ns); - /* fallthrough */ - case NVME_ZONE_STATE_EMPTY: - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL); - return NVME_SUCCESS; - - default: - return NVME_ZONE_INVAL_TRANSITION; - } -} - -static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone) -{ - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - nvme_aor_dec_open(ns); - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED); - /* fall through */ - case NVME_ZONE_STATE_CLOSED: - return NVME_SUCCESS; - - default: - return NVME_ZONE_INVAL_TRANSITION; - } -} - -static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns) -{ - NvmeZone *zone; - - if (ns->params.max_open_zones && - ns->nr_open_zones == ns->params.max_open_zones) { - zone = QTAILQ_FIRST(&ns->imp_open_zones); - if (zone) { - /* - * Automatically close this implicitly open zone. - */ - QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); - nvme_zrm_close(ns, zone); - } - } -} - -static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, - bool implicit) -{ - int act = 0; - uint16_t status; - - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EMPTY: - act = 1; - - /* fallthrough */ - - case NVME_ZONE_STATE_CLOSED: - nvme_zrm_auto_transition_zone(ns); - status = nvme_aor_check(ns, act, 1); - if (status) { - return status; - } - - if (act) { - nvme_aor_inc_active(ns); - } - - nvme_aor_inc_open(ns); - - if (implicit) { - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN); - return NVME_SUCCESS; - } - - /* fallthrough */ - - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - if (implicit) { - return NVME_SUCCESS; - } - - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN); - - /* fallthrough */ - - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - return NVME_SUCCESS; - - default: - return NVME_ZONE_INVAL_TRANSITION; - } -} - -static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone) -{ - return __nvme_zrm_open(ns, zone, true); -} - -static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone) -{ - return __nvme_zrm_open(ns, zone, false); -} - -static void __nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, - uint32_t nlb) -{ - zone->d.wp += nlb; - - if (zone->d.wp == nvme_zone_wr_boundary(zone)) { - nvme_zrm_finish(ns, zone); - } -} - -static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) -{ - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - NvmeZone *zone; - uint64_t slba; - uint32_t nlb; - - slba = le64_to_cpu(rw->slba); - nlb = le16_to_cpu(rw->nlb) + 1; - zone = nvme_get_zone_by_slba(ns, slba); - - __nvme_advance_zone_wp(ns, zone, nlb); -} - -static inline bool nvme_is_write(NvmeRequest *req) -{ - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - - return rw->opcode == NVME_CMD_WRITE || - rw->opcode == NVME_CMD_ZONE_APPEND || - rw->opcode == NVME_CMD_WRITE_ZEROES; -} - -static void nvme_misc_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - - BlockBackend *blk = ns->blkconf.blk; - BlockAcctCookie *acct = &req->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk)); - - if (ret) { - block_acct_failed(stats, acct); - nvme_aio_err(req, ret); - } else { - block_acct_done(stats, acct); - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -void nvme_rw_complete_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - BlockAcctCookie *acct = &req->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk)); - - if (ret) { - block_acct_failed(stats, acct); - nvme_aio_err(req, ret); - } else { - block_acct_done(stats, acct); - } - - if (ns->params.zoned && nvme_is_write(req)) { - nvme_finalize_zoned_write(ns, req); - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_rw_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - - BlockBackend *blk = ns->blkconf.blk; - - trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk)); - - if (ret) { - goto out; - } - - if (nvme_msize(ns)) { - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); - - if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) { - size_t mlen = nvme_m2b(ns, nlb); - - req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen, - BDRV_REQ_MAY_UNMAP, - nvme_rw_complete_cb, req); - return; - } - - if (nvme_ns_ext(ns) || req->cmd.mptr) { - uint16_t status; - - nvme_sg_unmap(&req->sg); - status = nvme_map_mdata(nvme_ctrl(req), nlb, req); - if (status) { - ret = -EFAULT; - goto out; - } - - if (req->cmd.opcode == NVME_CMD_READ) { - return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req); - } - - return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req); - } - } - -out: - nvme_rw_complete_cb(req, ret); -} - -struct nvme_aio_format_ctx { - NvmeRequest *req; - NvmeNamespace *ns; - - /* number of outstanding write zeroes for this namespace */ - int *count; -}; - -static void nvme_aio_format_cb(void *opaque, int ret) -{ - struct nvme_aio_format_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = ctx->ns; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; - int *count = ctx->count; - - g_free(ctx); - - if (ret) { - nvme_aio_err(req, ret); - } - - if (--(*count)) { - return; - } - - g_free(count); - ns->status = 0x0; - - if (--(*num_formats)) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -struct nvme_aio_flush_ctx { - NvmeRequest *req; - NvmeNamespace *ns; - BlockAcctCookie acct; -}; - -static void nvme_aio_flush_cb(void *opaque, int ret) -{ - struct nvme_aio_flush_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - uintptr_t *num_flushes = (uintptr_t *)&req->opaque; - - BlockBackend *blk = ctx->ns->blkconf.blk; - BlockAcctCookie *acct = &ctx->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk)); - - if (!ret) { - block_acct_done(stats, acct); - } else { - block_acct_failed(stats, acct); - nvme_aio_err(req, ret); - } - - (*num_flushes)--; - g_free(ctx); - - if (*num_flushes) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_verify_cb(void *opaque, int ret) -{ - NvmeBounceContext *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - BlockAcctCookie *acct = &req->acct; - BlockAcctStats *stats = blk_get_stats(blk); - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint64_t slba = le64_to_cpu(rw->slba); - uint16_t ctrl = le16_to_cpu(rw->control); - uint16_t apptag = le16_to_cpu(rw->apptag); - uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); - uint16_t status; - - trace_pci_nvme_verify_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, - appmask, reftag); - - if (ret) { - block_acct_failed(stats, acct); - nvme_aio_err(req, ret); - goto out; - } - - block_acct_done(stats, acct); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, - ctx->mdata.iov.size, slba); - if (status) { - req->status = status; - goto out; - } - - req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, - ctrl, slba, apptag, appmask, reftag); - } - -out: - qemu_iovec_destroy(&ctx->data.iov); - g_free(ctx->data.bounce); - - qemu_iovec_destroy(&ctx->mdata.iov); - g_free(ctx->mdata.bounce); - - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - - -static void nvme_verify_mdata_in_cb(void *opaque, int ret) -{ - NvmeBounceContext *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); - BlockBackend *blk = ns->blkconf.blk; - - trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk)); - - if (ret) { - goto out; - } - - ctx->mdata.bounce = g_malloc(mlen); - - qemu_iovec_reset(&ctx->mdata.iov); - qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - - req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, - nvme_verify_cb, ctx); - return; - -out: - nvme_verify_cb(ctx, ret); -} - -static void nvme_aio_discard_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - uintptr_t *discards = (uintptr_t *)&req->opaque; - - trace_pci_nvme_aio_discard_cb(nvme_cid(req)); - - if (ret) { - nvme_aio_err(req, ret); - } - - (*discards)--; - - if (*discards) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -struct nvme_zone_reset_ctx { - NvmeRequest *req; - NvmeZone *zone; -}; - -static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret) -{ - struct nvme_zone_reset_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeZone *zone = ctx->zone; - uintptr_t *resets = (uintptr_t *)&req->opaque; - - if (ret) { - nvme_aio_err(req, ret); - goto out; - } - - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - nvme_aor_dec_open(ns); - /* fall through */ - case NVME_ZONE_STATE_CLOSED: - nvme_aor_dec_active(ns); - /* fall through */ - case NVME_ZONE_STATE_FULL: - zone->w_ptr = zone->d.zslba; - zone->d.wp = zone->w_ptr; - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY); - /* fall through */ - default: - break; - } - -out: - g_free(ctx); - - (*resets)--; - - if (*resets) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_aio_zone_reset_cb(void *opaque, int ret) -{ - struct nvme_zone_reset_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeZone *zone = ctx->zone; - - trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba); - - if (ret) { - goto out; - } - - if (nvme_msize(ns)) { - int64_t offset = ns->mdata_offset + nvme_m2b(ns, zone->d.zslba); - - blk_aio_pwrite_zeroes(ns->blkconf.blk, offset, - nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP, - nvme_aio_zone_reset_complete_cb, ctx); - return; - } - -out: - nvme_aio_zone_reset_complete_cb(opaque, ret); -} - -struct nvme_copy_ctx { - int copies; - uint8_t *bounce; - uint8_t *mbounce; - uint32_t nlb; - NvmeCopySourceRange *ranges; -}; - -struct nvme_copy_in_ctx { - NvmeRequest *req; - QEMUIOVector iov; - NvmeCopySourceRange *range; -}; - -static void nvme_copy_complete_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - struct nvme_copy_ctx *ctx = req->opaque; - - if (ret) { - block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); - nvme_aio_err(req, ret); - goto out; - } - - block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); - -out: - if (ns->params.zoned) { - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - uint64_t sdlba = le64_to_cpu(copy->sdlba); - NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba); - - __nvme_advance_zone_wp(ns, zone, ctx->nlb); - } - - g_free(ctx->bounce); - g_free(ctx->mbounce); - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_copy_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - struct nvme_copy_ctx *ctx = req->opaque; - - trace_pci_nvme_copy_cb(nvme_cid(req)); - - if (ret) { - goto out; - } - - if (nvme_msize(ns)) { - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - uint64_t sdlba = le64_to_cpu(copy->sdlba); - int64_t offset = ns->mdata_offset + nvme_m2b(ns, sdlba); - - qemu_iovec_reset(&req->sg.iov); - qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb)); - - req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &req->sg.iov, 0, - nvme_copy_complete_cb, req); - return; - } - -out: - nvme_copy_complete_cb(opaque, ret); -} - -static void nvme_copy_in_complete(NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - struct nvme_copy_ctx *ctx = req->opaque; - uint64_t sdlba = le64_to_cpu(copy->sdlba); - uint16_t status; - - trace_pci_nvme_copy_in_complete(nvme_cid(req)); - - block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - uint16_t prinfor = (copy->control[0] >> 4) & 0xf; - uint16_t prinfow = (copy->control[2] >> 2) & 0xf; - uint16_t nr = copy->nr + 1; - NvmeCopySourceRange *range; - uint64_t slba; - uint32_t nlb; - uint16_t apptag, appmask; - uint32_t reftag; - uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce; - size_t len, mlen; - int i; - - /* - * The dif helpers expects prinfo to be similar to the control field of - * the NvmeRwCmd, so shift by 10 to fake it. - */ - prinfor = prinfor << 10; - prinfow = prinfow << 10; - - for (i = 0; i < nr; i++) { - range = &ctx->ranges[i]; - slba = le64_to_cpu(range->slba); - nlb = le16_to_cpu(range->nlb) + 1; - len = nvme_l2b(ns, nlb); - mlen = nvme_m2b(ns, nlb); - apptag = le16_to_cpu(range->apptag); - appmask = le16_to_cpu(range->appmask); - reftag = le32_to_cpu(range->reftag); - - status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba, - apptag, appmask, reftag); - if (status) { - goto invalid; - } - - buf += len; - mbuf += mlen; - } - - apptag = le16_to_cpu(copy->apptag); - appmask = le16_to_cpu(copy->appmask); - reftag = le32_to_cpu(copy->reftag); - - if (prinfow & NVME_RW_PRINFO_PRACT) { - size_t len = nvme_l2b(ns, ctx->nlb); - size_t mlen = nvme_m2b(ns, ctx->nlb); - - status = nvme_check_prinfo(ns, prinfow, sdlba, reftag); - if (status) { - goto invalid; - } - - nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce, - mlen, apptag, reftag); - } else { - status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen, - prinfow, sdlba, apptag, appmask, reftag); - if (status) { - goto invalid; - } - } - } - - status = nvme_check_bounds(ns, sdlba, ctx->nlb); - if (status) { - trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze); - goto invalid; - } - - if (ns->params.zoned) { - NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba); - - status = nvme_check_zone_write(ns, zone, sdlba, ctx->nlb); - if (status) { - goto invalid; - } - - status = nvme_zrm_auto(ns, zone); - if (status) { - goto invalid; - } - - zone->w_ptr += ctx->nlb; - } - - qemu_iovec_init(&req->sg.iov, 1); - qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb)); - - block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_WRITE); - - req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba), - &req->sg.iov, 0, nvme_copy_cb, req); - - return; - -invalid: - req->status = status; - - g_free(ctx->bounce); - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_aio_copy_in_cb(void *opaque, int ret) -{ - struct nvme_copy_in_ctx *in_ctx = opaque; - NvmeRequest *req = in_ctx->req; - NvmeNamespace *ns = req->ns; - struct nvme_copy_ctx *ctx = req->opaque; - - qemu_iovec_destroy(&in_ctx->iov); - g_free(in_ctx); - - trace_pci_nvme_aio_copy_in_cb(nvme_cid(req)); - - if (ret) { - nvme_aio_err(req, ret); - } - - ctx->copies--; - - if (ctx->copies) { - return; - } - - if (req->status) { - block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); - - g_free(ctx->bounce); - g_free(ctx->mbounce); - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); - - return; - } - - nvme_copy_in_complete(req); -} - -struct nvme_compare_ctx { - struct { - QEMUIOVector iov; - uint8_t *bounce; - } data; - - struct { - QEMUIOVector iov; - uint8_t *bounce; - } mdata; -}; - -static void nvme_compare_mdata_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - NvmeCtrl *n = nvme_ctrl(req); - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint16_t ctrl = le16_to_cpu(rw->control); - uint16_t apptag = le16_to_cpu(rw->apptag); - uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); - struct nvme_compare_ctx *ctx = req->opaque; - g_autofree uint8_t *buf = NULL; - uint16_t status = NVME_SUCCESS; - - trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); - - buf = g_malloc(ctx->mdata.iov.size); - - status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size, - NVME_TX_DIRECTION_TO_DEVICE, req); - if (status) { - req->status = status; - goto out; - } - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - uint64_t slba = le64_to_cpu(rw->slba); - uint8_t *bufp; - uint8_t *mbufp = ctx->mdata.bounce; - uint8_t *end = mbufp + ctx->mdata.iov.size; - size_t msize = nvme_msize(ns); - int16_t pil = 0; - - status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, - slba, apptag, appmask, reftag); - if (status) { - req->status = status; - goto out; - } - - /* - * When formatted with protection information, do not compare the DIF - * tuple. - */ - if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); - } - - for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) { - if (memcmp(bufp + pil, mbufp + pil, msize - pil)) { - req->status = NVME_CMP_FAILURE; - goto out; - } - } - - goto out; - } - - if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) { - req->status = NVME_CMP_FAILURE; - goto out; - } - -out: - qemu_iovec_destroy(&ctx->data.iov); - g_free(ctx->data.bounce); - - qemu_iovec_destroy(&ctx->mdata.iov); - g_free(ctx->mdata.bounce); - - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_compare_data_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeCtrl *n = nvme_ctrl(req); - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - BlockAcctCookie *acct = &req->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - struct nvme_compare_ctx *ctx = req->opaque; - g_autofree uint8_t *buf = NULL; - uint16_t status; - - trace_pci_nvme_compare_data_cb(nvme_cid(req)); - - if (ret) { - block_acct_failed(stats, acct); - nvme_aio_err(req, ret); - goto out; - } - - buf = g_malloc(ctx->data.iov.size); - - status = nvme_bounce_data(n, buf, ctx->data.iov.size, - NVME_TX_DIRECTION_TO_DEVICE, req); - if (status) { - req->status = status; - goto out; - } - - if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) { - req->status = NVME_CMP_FAILURE; - goto out; - } - - if (nvme_msize(ns)) { - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); - - ctx->mdata.bounce = g_malloc(mlen); - - qemu_iovec_init(&ctx->mdata.iov, 1); - qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - - req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, - nvme_compare_mdata_cb, req); - return; - } - - block_acct_done(stats, acct); - -out: - qemu_iovec_destroy(&ctx->data.iov); - g_free(ctx->data.bounce); - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd; - - uint32_t attr = le32_to_cpu(dsm->attributes); - uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1; - - uint16_t status = NVME_SUCCESS; - - trace_pci_nvme_dsm(nvme_cid(req), nvme_nsid(ns), nr, attr); - - if (attr & NVME_DSMGMT_AD) { - int64_t offset; - size_t len; - NvmeDsmRange range[nr]; - uintptr_t *discards = (uintptr_t *)&req->opaque; - - status = nvme_h2c(n, (uint8_t *)range, sizeof(range), req); - if (status) { - return status; - } - - /* - * AIO callbacks may be called immediately, so initialize discards to 1 - * to make sure the the callback does not complete the request before - * all discards have been issued. - */ - *discards = 1; - - for (int i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(range[i].slba); - uint32_t nlb = le32_to_cpu(range[i].nlb); - - if (nvme_check_bounds(ns, slba, nlb)) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, - ns->id_ns.nsze); - continue; - } - - trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), slba, - nlb); - - if (nlb > n->dmrsl) { - trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl); - } - - offset = nvme_l2b(ns, slba); - len = nvme_l2b(ns, nlb); - - while (len) { - size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len); - - (*discards)++; - - blk_aio_pdiscard(ns->blkconf.blk, offset, bytes, - nvme_aio_discard_cb, req); - - offset += bytes; - len -= bytes; - } - } - - /* account for the 1-initialization */ - (*discards)--; - - if (*discards) { - status = NVME_NO_COMPLETE; - } else { - status = req->status; - } - } - - return status; -} - -static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - size_t len = nvme_l2b(ns, nlb); - int64_t offset = nvme_l2b(ns, slba); - uint16_t ctrl = le16_to_cpu(rw->control); - uint32_t reftag = le32_to_cpu(rw->reftag); - NvmeBounceContext *ctx = NULL; - uint16_t status; - - trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - status = nvme_check_prinfo(ns, ctrl, slba, reftag); - if (status) { - return status; - } - - if (ctrl & NVME_RW_PRINFO_PRACT) { - return NVME_INVALID_PROT_INFO | NVME_DNR; - } - } - - if (len > n->page_size << n->params.vsl) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - status = nvme_check_bounds(ns, slba, nlb); - if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); - return status; - } - - if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { - status = nvme_check_dulbe(ns, slba, nlb); - if (status) { - return status; - } - } - - ctx = g_new0(NvmeBounceContext, 1); - ctx->req = req; - - ctx->data.bounce = g_malloc(len); - - qemu_iovec_init(&ctx->data.iov, 1); - qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); - - block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, - BLOCK_ACCT_READ); - - req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, - nvme_verify_mdata_in_cb, ctx); - return NVME_NO_COMPLETE; -} - -static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - - uint16_t nr = copy->nr + 1; - uint8_t format = copy->control[0] & 0xf; - - /* - * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the - * NVME_RW_PRINFO constants. - */ - uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10; - uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10; - - uint32_t nlb = 0; - uint8_t *bounce = NULL, *bouncep = NULL; - uint8_t *mbounce = NULL, *mbouncep = NULL; - struct nvme_copy_ctx *ctx; - uint16_t status; - int i; - - trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (!(n->id_ctrl.ocfs & (1 << format))) { - trace_pci_nvme_err_copy_invalid_format(format); - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (nr > ns->id_ns.msrc + 1) { - return NVME_CMD_SIZE_LIMIT | NVME_DNR; - } - - ctx = g_new(struct nvme_copy_ctx, 1); - ctx->ranges = g_new(NvmeCopySourceRange, nr); - - status = nvme_h2c(n, (uint8_t *)ctx->ranges, - nr * sizeof(NvmeCopySourceRange), req); - if (status) { - goto out; - } - - for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); - uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; - - if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) { - status = NVME_CMD_SIZE_LIMIT | NVME_DNR; - goto out; - } - - status = nvme_check_bounds(ns, slba, _nlb); - if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze); - goto out; - } - - if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { - status = nvme_check_dulbe(ns, slba, _nlb); - if (status) { - goto out; - } - } - - if (ns->params.zoned) { - status = nvme_check_zone_read(ns, slba, _nlb); - if (status) { - goto out; - } - } - - nlb += _nlb; - } - - if (nlb > le32_to_cpu(ns->id_ns.mcl)) { - status = NVME_CMD_SIZE_LIMIT | NVME_DNR; - goto out; - } - - bounce = bouncep = g_malloc(nvme_l2b(ns, nlb)); - if (nvme_msize(ns)) { - mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb)); - } - - block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_READ); - - ctx->bounce = bounce; - ctx->mbounce = mbounce; - ctx->nlb = nlb; - ctx->copies = 1; - - req->opaque = ctx; - - for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); - uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; - - size_t len = nvme_l2b(ns, nlb); - int64_t offset = nvme_l2b(ns, slba); - - trace_pci_nvme_copy_source_range(slba, nlb); - - struct nvme_copy_in_ctx *in_ctx = g_new(struct nvme_copy_in_ctx, 1); - in_ctx->req = req; - - qemu_iovec_init(&in_ctx->iov, 1); - qemu_iovec_add(&in_ctx->iov, bouncep, len); - - ctx->copies++; - - blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0, - nvme_aio_copy_in_cb, in_ctx); - - bouncep += len; - - if (nvme_msize(ns)) { - len = nvme_m2b(ns, nlb); - offset = ns->mdata_offset + nvme_m2b(ns, slba); - - in_ctx = g_new(struct nvme_copy_in_ctx, 1); - in_ctx->req = req; - - qemu_iovec_init(&in_ctx->iov, 1); - qemu_iovec_add(&in_ctx->iov, mbouncep, len); - - ctx->copies++; - - blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0, - nvme_aio_copy_in_cb, in_ctx); - - mbouncep += len; - } - } - - /* account for the 1-initialization */ - ctx->copies--; - - if (!ctx->copies) { - nvme_copy_in_complete(req); - } - - return NVME_NO_COMPLETE; - -out: - g_free(ctx->ranges); - g_free(ctx); - - return status; -} - -static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - uint16_t ctrl = le16_to_cpu(rw->control); - size_t data_len = nvme_l2b(ns, nlb); - size_t len = data_len; - int64_t offset = nvme_l2b(ns, slba); - struct nvme_compare_ctx *ctx = NULL; - uint16_t status; - - trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) { - return NVME_INVALID_PROT_INFO | NVME_DNR; - } - - if (nvme_ns_ext(ns)) { - len += nvme_m2b(ns, nlb); - } - - status = nvme_check_mdts(n, len); - if (status) { - return status; - } - - status = nvme_check_bounds(ns, slba, nlb); - if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); - return status; - } - - if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { - status = nvme_check_dulbe(ns, slba, nlb); - if (status) { - return status; - } - } - - status = nvme_map_dptr(n, &req->sg, len, &req->cmd); - if (status) { - return status; - } - - ctx = g_new(struct nvme_compare_ctx, 1); - ctx->data.bounce = g_malloc(data_len); - - req->opaque = ctx; - - qemu_iovec_init(&ctx->data.iov, 1); - qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len); - - block_acct_start(blk_get_stats(blk), &req->acct, data_len, - BLOCK_ACCT_READ); - req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0, - nvme_compare_data_cb, req); - - return NVME_NO_COMPLETE; -} - -static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) -{ - uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uintptr_t *num_flushes = (uintptr_t *)&req->opaque; - uint16_t status; - struct nvme_aio_flush_ctx *ctx; - NvmeNamespace *ns; - - trace_pci_nvme_flush(nvme_cid(req), nsid); - - if (nsid != NVME_NSID_BROADCAST) { - req->ns = nvme_ns(n, nsid); - if (unlikely(!req->ns)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_FLUSH); - req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req); - return NVME_NO_COMPLETE; - } - - /* 1-initialize; see comment in nvme_dsm */ - *num_flushes = 1; - - for (int i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - ctx = g_new(struct nvme_aio_flush_ctx, 1); - ctx->req = req; - ctx->ns = ns; - - (*num_flushes)++; - - block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0, - BLOCK_ACCT_FLUSH); - blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx); - } - - /* account for the 1-initialization */ - (*num_flushes)--; - - if (*num_flushes) { - status = NVME_NO_COMPLETE; - } else { - status = req->status; - } - - return status; -} - -static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - NvmeNamespace *ns = req->ns; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; - uint16_t ctrl = le16_to_cpu(rw->control); - uint64_t data_size = nvme_l2b(ns, nlb); - uint64_t mapped_size = data_size; - uint64_t data_offset; - BlockBackend *blk = ns->blkconf.blk; - uint16_t status; - - if (nvme_ns_ext(ns)) { - mapped_size += nvme_m2b(ns, nlb); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - bool pract = ctrl & NVME_RW_PRINFO_PRACT; - - if (pract && nvme_msize(ns) == 8) { - mapped_size = data_size; - } - } - } - - trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba); - - status = nvme_check_mdts(n, mapped_size); - if (status) { - goto invalid; - } - - status = nvme_check_bounds(ns, slba, nlb); - if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); - goto invalid; - } - - if (ns->params.zoned) { - status = nvme_check_zone_read(ns, slba, nlb); - if (status) { - trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status); - goto invalid; - } - } - - if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { - status = nvme_check_dulbe(ns, slba, nlb); - if (status) { - goto invalid; - } - } - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - return nvme_dif_rw(n, req); - } - - status = nvme_map_data(n, nlb, req); - if (status) { - goto invalid; - } - - data_offset = nvme_l2b(ns, slba); - - block_acct_start(blk_get_stats(blk), &req->acct, data_size, - BLOCK_ACCT_READ); - nvme_blk_read(blk, data_offset, nvme_rw_cb, req); - return NVME_NO_COMPLETE; - -invalid: - block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ); - return status | NVME_DNR; -} - -static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, - bool wrz) -{ - NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - NvmeNamespace *ns = req->ns; - uint64_t slba = le64_to_cpu(rw->slba); - uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; - uint16_t ctrl = le16_to_cpu(rw->control); - uint64_t data_size = nvme_l2b(ns, nlb); - uint64_t mapped_size = data_size; - uint64_t data_offset; - NvmeZone *zone; - NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe; - BlockBackend *blk = ns->blkconf.blk; - uint16_t status; - - if (nvme_ns_ext(ns)) { - mapped_size += nvme_m2b(ns, nlb); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - bool pract = ctrl & NVME_RW_PRINFO_PRACT; - - if (pract && nvme_msize(ns) == 8) { - mapped_size -= nvme_m2b(ns, nlb); - } - } - } - - trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode), - nvme_nsid(ns), nlb, mapped_size, slba); - - if (!wrz) { - status = nvme_check_mdts(n, mapped_size); - if (status) { - goto invalid; - } - } - - status = nvme_check_bounds(ns, slba, nlb); - if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); - goto invalid; - } - - if (ns->params.zoned) { - zone = nvme_get_zone_by_slba(ns, slba); - - if (append) { - bool piremap = !!(ctrl & NVME_RW_PIREMAP); - - if (unlikely(slba != zone->d.zslba)) { - trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); - status = NVME_INVALID_FIELD; - goto invalid; - } - - if (n->params.zasl && - data_size > (uint64_t)n->page_size << n->params.zasl) { - trace_pci_nvme_err_zasl(data_size); - return NVME_INVALID_FIELD | NVME_DNR; - } - - slba = zone->w_ptr; - rw->slba = cpu_to_le64(slba); - res->slba = cpu_to_le64(slba); - - switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - case NVME_ID_NS_DPS_TYPE_1: - if (!piremap) { - return NVME_INVALID_PROT_INFO | NVME_DNR; - } - - /* fallthrough */ - - case NVME_ID_NS_DPS_TYPE_2: - if (piremap) { - uint32_t reftag = le32_to_cpu(rw->reftag); - rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba)); - } - - break; - - case NVME_ID_NS_DPS_TYPE_3: - if (piremap) { - return NVME_INVALID_PROT_INFO | NVME_DNR; - } - - break; - } - } - - status = nvme_check_zone_write(ns, zone, slba, nlb); - if (status) { - goto invalid; - } - - status = nvme_zrm_auto(ns, zone); - if (status) { - goto invalid; - } - - zone->w_ptr += nlb; - } - - data_offset = nvme_l2b(ns, slba); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - return nvme_dif_rw(n, req); - } - - if (!wrz) { - status = nvme_map_data(n, nlb, req); - if (status) { - goto invalid; - } - - block_acct_start(blk_get_stats(blk), &req->acct, data_size, - BLOCK_ACCT_WRITE); - nvme_blk_write(blk, data_offset, nvme_rw_cb, req); - } else { - req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size, - BDRV_REQ_MAY_UNMAP, nvme_rw_cb, - req); - } - - return NVME_NO_COMPLETE; - -invalid: - block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE); - return status | NVME_DNR; -} - -static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req) -{ - return nvme_do_write(n, req, false, false); -} - -static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req) -{ - return nvme_do_write(n, req, false, true); -} - -static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req) -{ - return nvme_do_write(n, req, true, false); -} - -static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c, - uint64_t *slba, uint32_t *zone_idx) -{ - uint32_t dw10 = le32_to_cpu(c->cdw10); - uint32_t dw11 = le32_to_cpu(c->cdw11); - - if (!ns->params.zoned) { - trace_pci_nvme_err_invalid_opc(c->opcode); - return NVME_INVALID_OPCODE | NVME_DNR; - } - - *slba = ((uint64_t)dw11) << 32 | dw10; - if (unlikely(*slba >= ns->id_ns.nsze)) { - trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze); - *slba = 0; - return NVME_LBA_RANGE | NVME_DNR; - } - - *zone_idx = nvme_zone_idx(ns, *slba); - assert(*zone_idx < ns->num_zones); - - return NVME_SUCCESS; -} - -typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState, - NvmeRequest *); - -enum NvmeZoneProcessingMask { - NVME_PROC_CURRENT_ZONE = 0, - NVME_PROC_OPENED_ZONES = 1 << 0, - NVME_PROC_CLOSED_ZONES = 1 << 1, - NVME_PROC_READ_ONLY_ZONES = 1 << 2, - NVME_PROC_FULL_ZONES = 1 << 3, -}; - -static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone, - NvmeZoneState state, NvmeRequest *req) -{ - return nvme_zrm_open(ns, zone); -} - -static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone, - NvmeZoneState state, NvmeRequest *req) -{ - return nvme_zrm_close(ns, zone); -} - -static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone, - NvmeZoneState state, NvmeRequest *req) -{ - return nvme_zrm_finish(ns, zone); -} - -static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone, - NvmeZoneState state, NvmeRequest *req) -{ - uintptr_t *resets = (uintptr_t *)&req->opaque; - struct nvme_zone_reset_ctx *ctx; - - switch (state) { - case NVME_ZONE_STATE_EMPTY: - return NVME_SUCCESS; - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - case NVME_ZONE_STATE_CLOSED: - case NVME_ZONE_STATE_FULL: - break; - default: - return NVME_ZONE_INVAL_TRANSITION; - } - - /* - * The zone reset aio callback needs to know the zone that is being reset - * in order to transition the zone on completion. - */ - ctx = g_new(struct nvme_zone_reset_ctx, 1); - ctx->req = req; - ctx->zone = zone; - - (*resets)++; - - blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zone->d.zslba), - nvme_l2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP, - nvme_aio_zone_reset_cb, ctx); - - return NVME_NO_COMPLETE; -} - -static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone, - NvmeZoneState state, NvmeRequest *req) -{ - switch (state) { - case NVME_ZONE_STATE_READ_ONLY: - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_OFFLINE); - /* fall through */ - case NVME_ZONE_STATE_OFFLINE: - return NVME_SUCCESS; - default: - return NVME_ZONE_INVAL_TRANSITION; - } -} - -static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone) -{ - uint16_t status; - uint8_t state = nvme_get_zone_state(zone); - - if (state == NVME_ZONE_STATE_EMPTY) { - status = nvme_aor_check(ns, 1, 0); - if (status) { - return status; - } - nvme_aor_inc_active(ns); - zone->d.za |= NVME_ZA_ZD_EXT_VALID; - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED); - return NVME_SUCCESS; - } - - return NVME_ZONE_INVAL_TRANSITION; -} - -static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone, - enum NvmeZoneProcessingMask proc_mask, - op_handler_t op_hndlr, NvmeRequest *req) -{ - uint16_t status = NVME_SUCCESS; - NvmeZoneState zs = nvme_get_zone_state(zone); - bool proc_zone; - - switch (zs) { - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - proc_zone = proc_mask & NVME_PROC_OPENED_ZONES; - break; - case NVME_ZONE_STATE_CLOSED: - proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES; - break; - case NVME_ZONE_STATE_READ_ONLY: - proc_zone = proc_mask & NVME_PROC_READ_ONLY_ZONES; - break; - case NVME_ZONE_STATE_FULL: - proc_zone = proc_mask & NVME_PROC_FULL_ZONES; - break; - default: - proc_zone = false; - } - - if (proc_zone) { - status = op_hndlr(ns, zone, zs, req); - } - - return status; -} - -static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone, - enum NvmeZoneProcessingMask proc_mask, - op_handler_t op_hndlr, NvmeRequest *req) -{ - NvmeZone *next; - uint16_t status = NVME_SUCCESS; - int i; - - if (!proc_mask) { - status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req); - } else { - if (proc_mask & NVME_PROC_CLOSED_ZONES) { - QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { - status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, - req); - if (status && status != NVME_NO_COMPLETE) { - goto out; - } - } - } - if (proc_mask & NVME_PROC_OPENED_ZONES) { - QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { - status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, - req); - if (status && status != NVME_NO_COMPLETE) { - goto out; - } - } - - QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { - status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, - req); - if (status && status != NVME_NO_COMPLETE) { - goto out; - } - } - } - if (proc_mask & NVME_PROC_FULL_ZONES) { - QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) { - status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, - req); - if (status && status != NVME_NO_COMPLETE) { - goto out; - } - } - } - - if (proc_mask & NVME_PROC_READ_ONLY_ZONES) { - for (i = 0; i < ns->num_zones; i++, zone++) { - status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, - req); - if (status && status != NVME_NO_COMPLETE) { - goto out; - } - } - } - } - -out: - return status; -} - -static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeCmd *cmd = (NvmeCmd *)&req->cmd; - NvmeNamespace *ns = req->ns; - NvmeZone *zone; - uintptr_t *resets; - uint8_t *zd_ext; - uint32_t dw13 = le32_to_cpu(cmd->cdw13); - uint64_t slba = 0; - uint32_t zone_idx = 0; - uint16_t status; - uint8_t action; - bool all; - enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE; - - action = dw13 & 0xff; - all = dw13 & 0x100; - - req->status = NVME_SUCCESS; - - if (!all) { - status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx); - if (status) { - return status; - } - } - - zone = &ns->zone_array[zone_idx]; - if (slba != zone->d.zslba) { - trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba); - return NVME_INVALID_FIELD | NVME_DNR; - } - - switch (action) { - - case NVME_ZONE_ACTION_OPEN: - if (all) { - proc_mask = NVME_PROC_CLOSED_ZONES; - } - trace_pci_nvme_open_zone(slba, zone_idx, all); - status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req); - break; - - case NVME_ZONE_ACTION_CLOSE: - if (all) { - proc_mask = NVME_PROC_OPENED_ZONES; - } - trace_pci_nvme_close_zone(slba, zone_idx, all); - status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req); - break; - - case NVME_ZONE_ACTION_FINISH: - if (all) { - proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES; - } - trace_pci_nvme_finish_zone(slba, zone_idx, all); - status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req); - break; - - case NVME_ZONE_ACTION_RESET: - resets = (uintptr_t *)&req->opaque; - - if (all) { - proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES | - NVME_PROC_FULL_ZONES; - } - trace_pci_nvme_reset_zone(slba, zone_idx, all); - - *resets = 1; - - status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone, req); - - (*resets)--; - - return *resets ? NVME_NO_COMPLETE : req->status; - - case NVME_ZONE_ACTION_OFFLINE: - if (all) { - proc_mask = NVME_PROC_READ_ONLY_ZONES; - } - trace_pci_nvme_offline_zone(slba, zone_idx, all); - status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req); - break; - - case NVME_ZONE_ACTION_SET_ZD_EXT: - trace_pci_nvme_set_descriptor_extension(slba, zone_idx); - if (all || !ns->params.zd_extension_size) { - return NVME_INVALID_FIELD | NVME_DNR; - } - zd_ext = nvme_get_zd_extension(ns, zone_idx); - status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req); - if (status) { - trace_pci_nvme_err_zd_extension_map_error(zone_idx); - return status; - } - - status = nvme_set_zd_ext(ns, zone); - if (status == NVME_SUCCESS) { - trace_pci_nvme_zd_extension_set(zone_idx); - return status; - } - break; - - default: - trace_pci_nvme_err_invalid_mgmt_action(action); - status = NVME_INVALID_FIELD; - } - - if (status == NVME_ZONE_INVAL_TRANSITION) { - trace_pci_nvme_err_invalid_zone_state_transition(action, slba, - zone->d.za); - } - if (status) { - status |= NVME_DNR; - } - - return status; -} - -static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl) -{ - NvmeZoneState zs = nvme_get_zone_state(zl); - - switch (zafs) { - case NVME_ZONE_REPORT_ALL: - return true; - case NVME_ZONE_REPORT_EMPTY: - return zs == NVME_ZONE_STATE_EMPTY; - case NVME_ZONE_REPORT_IMPLICITLY_OPEN: - return zs == NVME_ZONE_STATE_IMPLICITLY_OPEN; - case NVME_ZONE_REPORT_EXPLICITLY_OPEN: - return zs == NVME_ZONE_STATE_EXPLICITLY_OPEN; - case NVME_ZONE_REPORT_CLOSED: - return zs == NVME_ZONE_STATE_CLOSED; - case NVME_ZONE_REPORT_FULL: - return zs == NVME_ZONE_STATE_FULL; - case NVME_ZONE_REPORT_READ_ONLY: - return zs == NVME_ZONE_STATE_READ_ONLY; - case NVME_ZONE_REPORT_OFFLINE: - return zs == NVME_ZONE_STATE_OFFLINE; - default: - return false; - } -} - -static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeCmd *cmd = (NvmeCmd *)&req->cmd; - NvmeNamespace *ns = req->ns; - /* cdw12 is zero-based number of dwords to return. Convert to bytes */ - uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2; - uint32_t dw13 = le32_to_cpu(cmd->cdw13); - uint32_t zone_idx, zra, zrasf, partial; - uint64_t max_zones, nr_zones = 0; - uint16_t status; - uint64_t slba; - NvmeZoneDescr *z; - NvmeZone *zone; - NvmeZoneReportHeader *header; - void *buf, *buf_p; - size_t zone_entry_sz; - int i; - - req->status = NVME_SUCCESS; - - status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx); - if (status) { - return status; - } - - zra = dw13 & 0xff; - if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) { - return NVME_INVALID_FIELD | NVME_DNR; - } - if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - zrasf = (dw13 >> 8) & 0xff; - if (zrasf > NVME_ZONE_REPORT_OFFLINE) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (data_size < sizeof(NvmeZoneReportHeader)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - status = nvme_check_mdts(n, data_size); - if (status) { - return status; - } - - partial = (dw13 >> 16) & 0x01; - - zone_entry_sz = sizeof(NvmeZoneDescr); - if (zra == NVME_ZONE_REPORT_EXTENDED) { - zone_entry_sz += ns->params.zd_extension_size; - } - - max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz; - buf = g_malloc0(data_size); - - zone = &ns->zone_array[zone_idx]; - for (i = zone_idx; i < ns->num_zones; i++) { - if (partial && nr_zones >= max_zones) { - break; - } - if (nvme_zone_matches_filter(zrasf, zone++)) { - nr_zones++; - } - } - header = (NvmeZoneReportHeader *)buf; - header->nr_zones = cpu_to_le64(nr_zones); - - buf_p = buf + sizeof(NvmeZoneReportHeader); - for (; zone_idx < ns->num_zones && max_zones > 0; zone_idx++) { - zone = &ns->zone_array[zone_idx]; - if (nvme_zone_matches_filter(zrasf, zone)) { - z = (NvmeZoneDescr *)buf_p; - buf_p += sizeof(NvmeZoneDescr); - - z->zt = zone->d.zt; - z->zs = zone->d.zs; - z->zcap = cpu_to_le64(zone->d.zcap); - z->zslba = cpu_to_le64(zone->d.zslba); - z->za = zone->d.za; - - if (nvme_wp_is_valid(zone)) { - z->wp = cpu_to_le64(zone->d.wp); - } else { - z->wp = cpu_to_le64(~0ULL); - } - - if (zra == NVME_ZONE_REPORT_EXTENDED) { - if (zone->d.za & NVME_ZA_ZD_EXT_VALID) { - memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx), - ns->params.zd_extension_size); - } - buf_p += ns->params.zd_extension_size; - } - - max_zones--; - } - } - - status = nvme_c2h(n, (uint8_t *)buf, data_size, req); - - g_free(buf); - - return status; -} - -static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) -{ - uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uint16_t status; - - trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), - req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode)); - - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; - } - - /* - * In the base NVM command set, Flush may apply to all namespaces - * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used - * along with TP 4056 (Namespace Types), it may be pretty screwed up. - * - * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the - * opcode with a specific command since we cannot determine a unique I/O - * command set. Opcode 0x0 could have any other meaning than something - * equivalent to flushing and say it DOES have completely different - * semantics in some other command set - does an NSID of 0xFFFFFFFF then - * mean "for all namespaces, apply whatever command set specific command - * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply - * whatever command that uses the 0x0 opcode if, and only if, it allows - * NSID to be 0xFFFFFFFF"? - * - * Anyway (and luckily), for now, we do not care about this since the - * device only supports namespace types that includes the NVM Flush command - * (NVM and Zoned), so always do an NVM Flush. - */ - if (req->cmd.opcode == NVME_CMD_FLUSH) { - return nvme_flush(n, req); - } - - req->ns = nvme_ns(n, nsid); - if (unlikely(!req->ns)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (!(req->ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { - trace_pci_nvme_err_invalid_opc(req->cmd.opcode); - return NVME_INVALID_OPCODE | NVME_DNR; - } - - status = nvme_ns_status(req->ns); - if (unlikely(status)) { - return status; - } - - switch (req->cmd.opcode) { - case NVME_CMD_WRITE_ZEROES: - return nvme_write_zeroes(n, req); - case NVME_CMD_ZONE_APPEND: - return nvme_zone_append(n, req); - case NVME_CMD_WRITE: - return nvme_write(n, req); - case NVME_CMD_READ: - return nvme_read(n, req); - case NVME_CMD_COMPARE: - return nvme_compare(n, req); - case NVME_CMD_DSM: - return nvme_dsm(n, req); - case NVME_CMD_VERIFY: - return nvme_verify(n, req); - case NVME_CMD_COPY: - return nvme_copy(n, req); - case NVME_CMD_ZONE_MGMT_SEND: - return nvme_zone_mgmt_send(n, req); - case NVME_CMD_ZONE_MGMT_RECV: - return nvme_zone_mgmt_recv(n, req); - default: - assert(false); - } - - return NVME_INVALID_OPCODE | NVME_DNR; -} - -static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) -{ - n->sq[sq->sqid] = NULL; - timer_free(sq->timer); - g_free(sq->io_req); - if (sq->sqid) { - g_free(sq); - } -} - -static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd; - NvmeRequest *r, *next; - NvmeSQueue *sq; - NvmeCQueue *cq; - uint16_t qid = le16_to_cpu(c->qid); - uint32_t nsid; - - if (unlikely(!qid || nvme_check_sqid(n, qid))) { - trace_pci_nvme_err_invalid_del_sq(qid); - return NVME_INVALID_QID | NVME_DNR; - } - - trace_pci_nvme_del_sq(qid); - - sq = n->sq[qid]; - while (!QTAILQ_EMPTY(&sq->out_req_list)) { - r = QTAILQ_FIRST(&sq->out_req_list); - if (r->aiocb) { - blk_aio_cancel(r->aiocb); - } - } - - /* - * Drain all namespaces if there are still outstanding requests that we - * could not cancel explicitly. - */ - if (!QTAILQ_EMPTY(&sq->out_req_list)) { - for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { - NvmeNamespace *ns = nvme_ns(n, nsid); - if (ns) { - nvme_ns_drain(ns); - } - } - } - - assert(QTAILQ_EMPTY(&sq->out_req_list)); - - if (!nvme_check_cqid(n, sq->cqid)) { - cq = n->cq[sq->cqid]; - QTAILQ_REMOVE(&cq->sq_list, sq, entry); - - nvme_post_cqes(cq); - QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) { - if (r->sq == sq) { - QTAILQ_REMOVE(&cq->req_list, r, entry); - QTAILQ_INSERT_TAIL(&sq->req_list, r, entry); - } - } - } - - nvme_free_sq(sq, n); - return NVME_SUCCESS; -} - -static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, - uint16_t sqid, uint16_t cqid, uint16_t size) -{ - int i; - NvmeCQueue *cq; - - sq->ctrl = n; - sq->dma_addr = dma_addr; - sq->sqid = sqid; - sq->size = size; - sq->cqid = cqid; - sq->head = sq->tail = 0; - sq->io_req = g_new0(NvmeRequest, sq->size); - - QTAILQ_INIT(&sq->req_list); - QTAILQ_INIT(&sq->out_req_list); - for (i = 0; i < sq->size; i++) { - sq->io_req[i].sq = sq; - QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); - } - sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); - - assert(n->cq[cqid]); - cq = n->cq[cqid]; - QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry); - n->sq[sqid] = sq; -} - -static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeSQueue *sq; - NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd; - - uint16_t cqid = le16_to_cpu(c->cqid); - uint16_t sqid = le16_to_cpu(c->sqid); - uint16_t qsize = le16_to_cpu(c->qsize); - uint16_t qflags = le16_to_cpu(c->sq_flags); - uint64_t prp1 = le64_to_cpu(c->prp1); - - trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); - - if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { - trace_pci_nvme_err_invalid_create_sq_cqid(cqid); - return NVME_INVALID_CQID | NVME_DNR; - } - if (unlikely(!sqid || sqid > n->params.max_ioqpairs || - n->sq[sqid] != NULL)) { - trace_pci_nvme_err_invalid_create_sq_sqid(sqid); - return NVME_INVALID_QID | NVME_DNR; - } - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { - trace_pci_nvme_err_invalid_create_sq_size(qsize); - return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; - } - if (unlikely(prp1 & (n->page_size - 1))) { - trace_pci_nvme_err_invalid_create_sq_addr(prp1); - return NVME_INVALID_PRP_OFFSET | NVME_DNR; - } - if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { - trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); - return NVME_INVALID_FIELD | NVME_DNR; - } - sq = g_malloc0(sizeof(*sq)); - nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1); - return NVME_SUCCESS; -} - -struct nvme_stats { - uint64_t units_read; - uint64_t units_written; - uint64_t read_commands; - uint64_t write_commands; -}; - -static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) -{ - BlockAcctStats *s = blk_get_stats(ns->blkconf.blk); - - stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS; - stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS; - stats->read_commands += s->nr_ops[BLOCK_ACCT_READ]; - stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; -} - -static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, - uint64_t off, NvmeRequest *req) -{ - uint32_t nsid = le32_to_cpu(req->cmd.nsid); - struct nvme_stats stats = { 0 }; - NvmeSmartLog smart = { 0 }; - uint32_t trans_len; - NvmeNamespace *ns; - time_t current_ms; - - if (off >= sizeof(smart)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (nsid != 0xffffffff) { - ns = nvme_ns(n, nsid); - if (!ns) { - return NVME_INVALID_NSID | NVME_DNR; - } - nvme_set_blk_stats(ns, &stats); - } else { - int i; - - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - nvme_set_blk_stats(ns, &stats); - } - } - - trans_len = MIN(sizeof(smart) - off, buf_len); - smart.critical_warning = n->smart_critical_warning; - - smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read, - 1000)); - smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written, - 1000)); - smart.host_read_commands[0] = cpu_to_le64(stats.read_commands); - smart.host_write_commands[0] = cpu_to_le64(stats.write_commands); - - smart.temperature = cpu_to_le16(n->temperature); - - if ((n->temperature >= n->features.temp_thresh_hi) || - (n->temperature <= n->features.temp_thresh_low)) { - smart.critical_warning |= NVME_SMART_TEMPERATURE; - } - - current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); - smart.power_on_hours[0] = - cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60); - - if (!rae) { - nvme_clear_events(n, NVME_AER_TYPE_SMART); - } - - return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req); -} - -static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, - NvmeRequest *req) -{ - uint32_t trans_len; - NvmeFwSlotInfoLog fw_log = { - .afi = 0x1, - }; - - if (off >= sizeof(fw_log)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' '); - trans_len = MIN(sizeof(fw_log) - off, buf_len); - - return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req); -} - -static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, - uint64_t off, NvmeRequest *req) -{ - uint32_t trans_len; - NvmeErrorLog errlog; - - if (off >= sizeof(errlog)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (!rae) { - nvme_clear_events(n, NVME_AER_TYPE_ERROR); - } - - memset(&errlog, 0x0, sizeof(errlog)); - trans_len = MIN(sizeof(errlog) - off, buf_len); - - return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req); -} - -static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, - uint64_t off, NvmeRequest *req) -{ - uint32_t nslist[1024]; - uint32_t trans_len; - int i = 0; - uint32_t nsid; - - memset(nslist, 0x0, sizeof(nslist)); - trans_len = MIN(sizeof(nslist) - off, buf_len); - - while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != - NVME_CHANGED_NSID_SIZE) { - /* - * If more than 1024 namespaces, the first entry in the log page should - * be set to 0xffffffff and the others to 0 as spec. - */ - if (i == ARRAY_SIZE(nslist)) { - memset(nslist, 0x0, sizeof(nslist)); - nslist[0] = 0xffffffff; - break; - } - - nslist[i++] = nsid; - clear_bit(nsid, n->changed_nsids); - } - - /* - * Remove all the remaining list entries in case returns directly due to - * more than 1024 namespaces. - */ - if (nslist[0] == 0xffffffff) { - bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE); - } - - if (!rae) { - nvme_clear_events(n, NVME_AER_TYPE_NOTICE); - } - - return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req); -} - -static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, - uint64_t off, NvmeRequest *req) -{ - NvmeEffectsLog log = {}; - const uint32_t *src_iocs = NULL; - uint32_t trans_len; - - if (off >= sizeof(log)) { - trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(log)); - return NVME_INVALID_FIELD | NVME_DNR; - } - - switch (NVME_CC_CSS(n->bar.cc)) { - case NVME_CC_CSS_NVM: - src_iocs = nvme_cse_iocs_nvm; - /* fall through */ - case NVME_CC_CSS_ADMIN_ONLY: - break; - case NVME_CC_CSS_CSI: - switch (csi) { - case NVME_CSI_NVM: - src_iocs = nvme_cse_iocs_nvm; - break; - case NVME_CSI_ZONED: - src_iocs = nvme_cse_iocs_zoned; - break; - } - } - - memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs)); - - if (src_iocs) { - memcpy(log.iocs, src_iocs, sizeof(log.iocs)); - } - - trans_len = MIN(sizeof(log) - off, buf_len); - - return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); -} - -static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeCmd *cmd = &req->cmd; - - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint32_t dw11 = le32_to_cpu(cmd->cdw11); - uint32_t dw12 = le32_to_cpu(cmd->cdw12); - uint32_t dw13 = le32_to_cpu(cmd->cdw13); - uint8_t lid = dw10 & 0xff; - uint8_t lsp = (dw10 >> 8) & 0xf; - uint8_t rae = (dw10 >> 15) & 0x1; - uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24; - uint32_t numdl, numdu; - uint64_t off, lpol, lpou; - size_t len; - uint16_t status; - - numdl = (dw10 >> 16); - numdu = (dw11 & 0xffff); - lpol = dw12; - lpou = dw13; - - len = (((numdu << 16) | numdl) + 1) << 2; - off = (lpou << 32ULL) | lpol; - - if (off & 0x3) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off); - - status = nvme_check_mdts(n, len); - if (status) { - return status; - } - - switch (lid) { - case NVME_LOG_ERROR_INFO: - return nvme_error_info(n, rae, len, off, req); - case NVME_LOG_SMART_INFO: - return nvme_smart_info(n, rae, len, off, req); - case NVME_LOG_FW_SLOT_INFO: - return nvme_fw_log_info(n, len, off, req); - case NVME_LOG_CHANGED_NSLIST: - return nvme_changed_nslist(n, rae, len, off, req); - case NVME_LOG_CMD_EFFECTS: - return nvme_cmd_effects(n, csi, len, off, req); - default: - trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); - return NVME_INVALID_FIELD | NVME_DNR; - } -} - -static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) -{ - n->cq[cq->cqid] = NULL; - timer_free(cq->timer); - if (msix_enabled(&n->parent_obj)) { - msix_vector_unuse(&n->parent_obj, cq->vector); - } - if (cq->cqid) { - g_free(cq); - } -} - -static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd; - NvmeCQueue *cq; - uint16_t qid = le16_to_cpu(c->qid); - - if (unlikely(!qid || nvme_check_cqid(n, qid))) { - trace_pci_nvme_err_invalid_del_cq_cqid(qid); - return NVME_INVALID_CQID | NVME_DNR; - } - - cq = n->cq[qid]; - if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { - trace_pci_nvme_err_invalid_del_cq_notempty(qid); - return NVME_INVALID_QUEUE_DEL; - } - nvme_irq_deassert(n, cq); - trace_pci_nvme_del_cq(qid); - nvme_free_cq(cq, n); - return NVME_SUCCESS; -} - -static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, - uint16_t cqid, uint16_t vector, uint16_t size, - uint16_t irq_enabled) -{ - int ret; - - if (msix_enabled(&n->parent_obj)) { - ret = msix_vector_use(&n->parent_obj, vector); - assert(ret == 0); - } - cq->ctrl = n; - cq->cqid = cqid; - cq->size = size; - cq->dma_addr = dma_addr; - cq->phase = 1; - cq->irq_enabled = irq_enabled; - cq->vector = vector; - cq->head = cq->tail = 0; - QTAILQ_INIT(&cq->req_list); - QTAILQ_INIT(&cq->sq_list); - n->cq[cqid] = cq; - cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); -} - -static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeCQueue *cq; - NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd; - uint16_t cqid = le16_to_cpu(c->cqid); - uint16_t vector = le16_to_cpu(c->irq_vector); - uint16_t qsize = le16_to_cpu(c->qsize); - uint16_t qflags = le16_to_cpu(c->cq_flags); - uint64_t prp1 = le64_to_cpu(c->prp1); - - trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags, - NVME_CQ_FLAGS_IEN(qflags) != 0); - - if (unlikely(!cqid || cqid > n->params.max_ioqpairs || - n->cq[cqid] != NULL)) { - trace_pci_nvme_err_invalid_create_cq_cqid(cqid); - return NVME_INVALID_QID | NVME_DNR; - } - if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { - trace_pci_nvme_err_invalid_create_cq_size(qsize); - return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; - } - if (unlikely(prp1 & (n->page_size - 1))) { - trace_pci_nvme_err_invalid_create_cq_addr(prp1); - return NVME_INVALID_PRP_OFFSET | NVME_DNR; - } - if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { - trace_pci_nvme_err_invalid_create_cq_vector(vector); - return NVME_INVALID_IRQ_VECTOR | NVME_DNR; - } - if (unlikely(vector >= n->params.msix_qsize)) { - trace_pci_nvme_err_invalid_create_cq_vector(vector); - return NVME_INVALID_IRQ_VECTOR | NVME_DNR; - } - if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { - trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); - return NVME_INVALID_FIELD | NVME_DNR; - } - - cq = g_malloc0(sizeof(*cq)); - nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1, - NVME_CQ_FLAGS_IEN(qflags)); - - /* - * It is only required to set qs_created when creating a completion queue; - * creating a submission queue without a matching completion queue will - * fail. - */ - n->qs_created = true; - return NVME_SUCCESS; -} - -static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req) -{ - uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {}; - - return nvme_c2h(n, id, sizeof(id), req); -} - -static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns) -{ - switch (ns->csi) { - case NVME_CSI_NVM: - case NVME_CSI_ZONED: - return true; - } - return false; -} - -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req) -{ - trace_pci_nvme_identify_ctrl(); - - return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req); -} - -static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {}; - NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id; - - trace_pci_nvme_identify_ctrl_csi(c->csi); - - switch (c->csi) { - case NVME_CSI_NVM: - id_nvm->vsl = n->params.vsl; - id_nvm->dmrsl = cpu_to_le32(n->dmrsl); - break; - - case NVME_CSI_ZONED: - ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl; - break; - - default: - return NVME_INVALID_FIELD | NVME_DNR; - } - - return nvme_c2h(n, id, sizeof(id), req); -} - -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) -{ - NvmeNamespace *ns; - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - uint32_t nsid = le32_to_cpu(c->nsid); - - trace_pci_nvme_identify_ns(nsid); - - if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { - return NVME_INVALID_NSID | NVME_DNR; - } - - ns = nvme_ns(n, nsid); - if (unlikely(!ns)) { - if (!active) { - ns = nvme_subsys_ns(n->subsys, nsid); - if (!ns) { - return nvme_rpt_empty_id_struct(n, req); - } - } else { - return nvme_rpt_empty_id_struct(n, req); - } - } - - if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { - return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req); - } - - return NVME_INVALID_CMD_SET | NVME_DNR; -} - -static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - uint16_t min_id = le16_to_cpu(c->ctrlid); - uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; - uint16_t *ids = &list[1]; - NvmeNamespace *ns; - NvmeCtrl *ctrl; - int cntlid, nr_ids = 0; - - trace_pci_nvme_identify_ns_attached_list(min_id); - - if (c->nsid == NVME_NSID_BROADCAST) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - ns = nvme_subsys_ns(n->subsys, c->nsid); - if (!ns) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { - ctrl = nvme_subsys_ctrl(n->subsys, cntlid); - if (!ctrl) { - continue; - } - - if (!nvme_ns(ctrl, c->nsid)) { - continue; - } - - ids[nr_ids++] = cntlid; - } - - list[0] = nr_ids; - - return nvme_c2h(n, (uint8_t *)list, sizeof(list), req); -} - -static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, - bool active) -{ - NvmeNamespace *ns; - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - uint32_t nsid = le32_to_cpu(c->nsid); - - trace_pci_nvme_identify_ns_csi(nsid, c->csi); - - if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { - return NVME_INVALID_NSID | NVME_DNR; - } - - ns = nvme_ns(n, nsid); - if (unlikely(!ns)) { - if (!active) { - ns = nvme_subsys_ns(n->subsys, nsid); - if (!ns) { - return nvme_rpt_empty_id_struct(n, req); - } - } else { - return nvme_rpt_empty_id_struct(n, req); - } - } - - if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { - return nvme_rpt_empty_id_struct(n, req); - } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) { - return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned), - req); - } - - return NVME_INVALID_FIELD | NVME_DNR; -} - -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, - bool active) -{ - NvmeNamespace *ns; - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - uint32_t min_nsid = le32_to_cpu(c->nsid); - uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; - static const int data_len = sizeof(list); - uint32_t *list_ptr = (uint32_t *)list; - int i, j = 0; - - trace_pci_nvme_identify_nslist(min_nsid); - - /* - * Both 0xffffffff (NVME_NSID_BROADCAST) and 0xfffffffe are invalid values - * since the Active Namespace ID List should return namespaces with ids - * *higher* than the NSID specified in the command. This is also specified - * in the spec (NVM Express v1.3d, Section 5.15.4). - */ - if (min_nsid >= NVME_NSID_BROADCAST - 1) { - return NVME_INVALID_NSID | NVME_DNR; - } - - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - if (!active) { - ns = nvme_subsys_ns(n->subsys, i); - if (!ns) { - continue; - } - } else { - continue; - } - } - if (ns->params.nsid <= min_nsid) { - continue; - } - list_ptr[j++] = cpu_to_le32(ns->params.nsid); - if (j == data_len / sizeof(uint32_t)) { - break; - } - } - - return nvme_c2h(n, list, data_len, req); -} - -static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, - bool active) -{ - NvmeNamespace *ns; - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - uint32_t min_nsid = le32_to_cpu(c->nsid); - uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; - static const int data_len = sizeof(list); - uint32_t *list_ptr = (uint32_t *)list; - int i, j = 0; - - trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi); - - /* - * Same as in nvme_identify_nslist(), 0xffffffff/0xfffffffe are invalid. - */ - if (min_nsid >= NVME_NSID_BROADCAST - 1) { - return NVME_INVALID_NSID | NVME_DNR; - } - - if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - if (!active) { - ns = nvme_subsys_ns(n->subsys, i); - if (!ns) { - continue; - } - } else { - continue; - } - } - if (ns->params.nsid <= min_nsid || c->csi != ns->csi) { - continue; - } - list_ptr[j++] = cpu_to_le32(ns->params.nsid); - if (j == data_len / sizeof(uint32_t)) { - break; - } - } - - return nvme_c2h(n, list, data_len, req); -} - -static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeNamespace *ns; - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - uint32_t nsid = le32_to_cpu(c->nsid); - uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; - - struct data { - struct { - NvmeIdNsDescr hdr; - uint8_t v[NVME_NIDL_UUID]; - } uuid; - struct { - NvmeIdNsDescr hdr; - uint8_t v; - } csi; - }; - - struct data *ns_descrs = (struct data *)list; - - trace_pci_nvme_identify_ns_descr_list(nsid); - - if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { - return NVME_INVALID_NSID | NVME_DNR; - } - - ns = nvme_ns(n, nsid); - if (unlikely(!ns)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - /* - * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data - * structure, a Namespace UUID (nidt = 0x3) must be reported in the - * Namespace Identification Descriptor. Add the namespace UUID here. - */ - ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID; - ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID; - memcpy(&ns_descrs->uuid.v, ns->params.uuid.data, NVME_NIDL_UUID); - - ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI; - ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI; - ns_descrs->csi.v = ns->csi; - - return nvme_c2h(n, list, sizeof(list), req); -} - -static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req) -{ - uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; - static const int data_len = sizeof(list); - - trace_pci_nvme_identify_cmd_set(); - - NVME_SET_CSI(*list, NVME_CSI_NVM); - NVME_SET_CSI(*list, NVME_CSI_ZONED); - - return nvme_c2h(n, list, data_len, req); -} - -static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - - trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid), - c->csi); - - switch (c->cns) { - case NVME_ID_CNS_NS: - return nvme_identify_ns(n, req, true); - case NVME_ID_CNS_NS_PRESENT: - return nvme_identify_ns(n, req, false); - case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: - return nvme_identify_ns_attached_list(n, req); - case NVME_ID_CNS_CS_NS: - return nvme_identify_ns_csi(n, req, true); - case NVME_ID_CNS_CS_NS_PRESENT: - return nvme_identify_ns_csi(n, req, false); - case NVME_ID_CNS_CTRL: - return nvme_identify_ctrl(n, req); - case NVME_ID_CNS_CS_CTRL: - return nvme_identify_ctrl_csi(n, req); - case NVME_ID_CNS_NS_ACTIVE_LIST: - return nvme_identify_nslist(n, req, true); - case NVME_ID_CNS_NS_PRESENT_LIST: - return nvme_identify_nslist(n, req, false); - case NVME_ID_CNS_CS_NS_ACTIVE_LIST: - return nvme_identify_nslist_csi(n, req, true); - case NVME_ID_CNS_CS_NS_PRESENT_LIST: - return nvme_identify_nslist_csi(n, req, false); - case NVME_ID_CNS_NS_DESCR_LIST: - return nvme_identify_ns_descr_list(n, req); - case NVME_ID_CNS_IO_COMMAND_SET: - return nvme_identify_cmd_set(n, req); - default: - trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); - return NVME_INVALID_FIELD | NVME_DNR; - } -} - -static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req) -{ - uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff; - - req->cqe.result = 1; - if (nvme_check_sqid(n, sqid)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - return NVME_SUCCESS; -} - -static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts) -{ - trace_pci_nvme_setfeat_timestamp(ts); - - n->host_timestamp = le64_to_cpu(ts); - n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); -} - -static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) -{ - uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); - uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms; - - union nvme_timestamp { - struct { - uint64_t timestamp:48; - uint64_t sync:1; - uint64_t origin:3; - uint64_t rsvd1:12; - }; - uint64_t all; - }; - - union nvme_timestamp ts; - ts.all = 0; - ts.timestamp = n->host_timestamp + elapsed_time; - - /* If the host timestamp is non-zero, set the timestamp origin */ - ts.origin = n->host_timestamp ? 0x01 : 0x00; - - trace_pci_nvme_getfeat_timestamp(ts.all); - - return cpu_to_le64(ts.all); -} - -static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) -{ - uint64_t timestamp = nvme_get_timestamp(n); - - return nvme_c2h(n, (uint8_t *)×tamp, sizeof(timestamp), req); -} - -static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeCmd *cmd = &req->cmd; - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint32_t dw11 = le32_to_cpu(cmd->cdw11); - uint32_t nsid = le32_to_cpu(cmd->nsid); - uint32_t result; - uint8_t fid = NVME_GETSETFEAT_FID(dw10); - NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10); - uint16_t iv; - NvmeNamespace *ns; - int i; - - static const uint32_t nvme_feature_default[NVME_FID_MAX] = { - [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT, - }; - - trace_pci_nvme_getfeat(nvme_cid(req), nsid, fid, sel, dw11); - - if (!nvme_feature_support[fid]) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) { - if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { - /* - * The Reservation Notification Mask and Reservation Persistence - * features require a status code of Invalid Field in Command when - * NSID is 0xFFFFFFFF. Since the device does not support those - * features we can always return Invalid Namespace or Format as we - * should do for all other features. - */ - return NVME_INVALID_NSID | NVME_DNR; - } - - if (!nvme_ns(n, nsid)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - } - - switch (sel) { - case NVME_GETFEAT_SELECT_CURRENT: - break; - case NVME_GETFEAT_SELECT_SAVED: - /* no features are saveable by the controller; fallthrough */ - case NVME_GETFEAT_SELECT_DEFAULT: - goto defaults; - case NVME_GETFEAT_SELECT_CAP: - result = nvme_feature_cap[fid]; - goto out; - } - - switch (fid) { - case NVME_TEMPERATURE_THRESHOLD: - result = 0; - - /* - * The controller only implements the Composite Temperature sensor, so - * return 0 for all other sensors. - */ - if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) { - goto out; - } - - switch (NVME_TEMP_THSEL(dw11)) { - case NVME_TEMP_THSEL_OVER: - result = n->features.temp_thresh_hi; - goto out; - case NVME_TEMP_THSEL_UNDER: - result = n->features.temp_thresh_low; - goto out; - } - - return NVME_INVALID_FIELD | NVME_DNR; - case NVME_ERROR_RECOVERY: - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; - } - - ns = nvme_ns(n, nsid); - if (unlikely(!ns)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - result = ns->features.err_rec; - goto out; - case NVME_VOLATILE_WRITE_CACHE: - result = 0; - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - result = blk_enable_write_cache(ns->blkconf.blk); - if (result) { - break; - } - } - trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); - goto out; - case NVME_ASYNCHRONOUS_EVENT_CONF: - result = n->features.async_config; - goto out; - case NVME_TIMESTAMP: - return nvme_get_feature_timestamp(n, req); - default: - break; - } - -defaults: - switch (fid) { - case NVME_TEMPERATURE_THRESHOLD: - result = 0; - - if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) { - break; - } - - if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) { - result = NVME_TEMPERATURE_WARNING; - } - - break; - case NVME_NUMBER_OF_QUEUES: - result = (n->params.max_ioqpairs - 1) | - ((n->params.max_ioqpairs - 1) << 16); - trace_pci_nvme_getfeat_numq(result); - break; - case NVME_INTERRUPT_VECTOR_CONF: - iv = dw11 & 0xffff; - if (iv >= n->params.max_ioqpairs + 1) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - result = iv; - if (iv == n->admin_cq.vector) { - result |= NVME_INTVC_NOCOALESCING; - } - break; - case NVME_COMMAND_SET_PROFILE: - result = 0; - break; - default: - result = nvme_feature_default[fid]; - break; - } - -out: - req->cqe.result = cpu_to_le32(result); - return NVME_SUCCESS; -} - -static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) -{ - uint16_t ret; - uint64_t timestamp; - - ret = nvme_h2c(n, (uint8_t *)×tamp, sizeof(timestamp), req); - if (ret) { - return ret; - } - - nvme_set_timestamp(n, timestamp); - - return NVME_SUCCESS; -} - -static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeNamespace *ns = NULL; - - NvmeCmd *cmd = &req->cmd; - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint32_t dw11 = le32_to_cpu(cmd->cdw11); - uint32_t nsid = le32_to_cpu(cmd->nsid); - uint8_t fid = NVME_GETSETFEAT_FID(dw10); - uint8_t save = NVME_SETFEAT_SAVE(dw10); - int i; - - trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11); - - if (save && !(nvme_feature_cap[fid] & NVME_FEAT_CAP_SAVE)) { - return NVME_FID_NOT_SAVEABLE | NVME_DNR; - } - - if (!nvme_feature_support[fid]) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) { - if (nsid != NVME_NSID_BROADCAST) { - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; - } - - ns = nvme_ns(n, nsid); - if (unlikely(!ns)) { - return NVME_INVALID_FIELD | NVME_DNR; - } - } - } else if (nsid && nsid != NVME_NSID_BROADCAST) { - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; - } - - return NVME_FEAT_NOT_NS_SPEC | NVME_DNR; - } - - if (!(nvme_feature_cap[fid] & NVME_FEAT_CAP_CHANGE)) { - return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR; - } - - switch (fid) { - case NVME_TEMPERATURE_THRESHOLD: - if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) { - break; - } - - switch (NVME_TEMP_THSEL(dw11)) { - case NVME_TEMP_THSEL_OVER: - n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11); - break; - case NVME_TEMP_THSEL_UNDER: - n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11); - break; - default: - return NVME_INVALID_FIELD | NVME_DNR; - } - - if ((n->temperature >= n->features.temp_thresh_hi) || - (n->temperature <= n->features.temp_thresh_low)) { - nvme_smart_event(n, NVME_AER_INFO_SMART_TEMP_THRESH); - } - - break; - case NVME_ERROR_RECOVERY: - if (nsid == NVME_NSID_BROADCAST) { - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - - if (!ns) { - continue; - } - - if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) { - ns->features.err_rec = dw11; - } - } - - break; - } - - assert(ns); - if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) { - ns->features.err_rec = dw11; - } - break; - case NVME_VOLATILE_WRITE_CACHE: - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - if (!(dw11 & 0x1) && blk_enable_write_cache(ns->blkconf.blk)) { - blk_flush(ns->blkconf.blk); - } - - blk_set_enable_write_cache(ns->blkconf.blk, dw11 & 1); - } - - break; - - case NVME_NUMBER_OF_QUEUES: - if (n->qs_created) { - return NVME_CMD_SEQ_ERROR | NVME_DNR; - } - - /* - * NVMe v1.3, Section 5.21.1.7: 0xffff is not an allowed value for NCQR - * and NSQR. - */ - if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, - ((dw11 >> 16) & 0xFFFF) + 1, - n->params.max_ioqpairs, - n->params.max_ioqpairs); - req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | - ((n->params.max_ioqpairs - 1) << 16)); - break; - case NVME_ASYNCHRONOUS_EVENT_CONF: - n->features.async_config = dw11; - break; - case NVME_TIMESTAMP: - return nvme_set_feature_timestamp(n, req); - case NVME_COMMAND_SET_PROFILE: - if (dw11 & 0x1ff) { - trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff); - return NVME_CMD_SET_CMB_REJECTED | NVME_DNR; - } - break; - default: - return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR; - } - return NVME_SUCCESS; -} - -static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) -{ - trace_pci_nvme_aer(nvme_cid(req)); - - if (n->outstanding_aers > n->params.aerl) { - trace_pci_nvme_aer_aerl_exceeded(); - return NVME_AER_LIMIT_EXCEEDED; - } - - n->aer_reqs[n->outstanding_aers] = req; - n->outstanding_aers++; - - if (!QTAILQ_EMPTY(&n->aer_queue)) { - nvme_process_aers(n); - } - - return NVME_NO_COMPLETE; -} - -static void nvme_update_dmrsl(NvmeCtrl *n) -{ - int nsid; - - for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { - NvmeNamespace *ns = nvme_ns(n, nsid); - if (!ns) { - continue; - } - - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); - } -} - -static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns); -static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeNamespace *ns; - NvmeCtrl *ctrl; - uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; - uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); - bool attach = !(dw10 & 0xf); - uint16_t *nr_ids = &list[0]; - uint16_t *ids = &list[1]; - uint16_t ret; - int i; - - trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf); - - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; - } - - ns = nvme_subsys_ns(n->subsys, nsid); - if (!ns) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - ret = nvme_h2c(n, (uint8_t *)list, 4096, req); - if (ret) { - return ret; - } - - if (!*nr_ids) { - return NVME_NS_CTRL_LIST_INVALID | NVME_DNR; - } - - *nr_ids = MIN(*nr_ids, NVME_CONTROLLER_LIST_SIZE - 1); - for (i = 0; i < *nr_ids; i++) { - ctrl = nvme_subsys_ctrl(n->subsys, ids[i]); - if (!ctrl) { - return NVME_NS_CTRL_LIST_INVALID | NVME_DNR; - } - - if (attach) { - if (nvme_ns(ctrl, nsid)) { - return NVME_NS_ALREADY_ATTACHED | NVME_DNR; - } - - if (ns->attached && !ns->params.shared) { - return NVME_NS_PRIVATE | NVME_DNR; - } - - nvme_attach_ns(ctrl, ns); - __nvme_select_ns_iocs(ctrl, ns); - } else { - if (!nvme_ns(ctrl, nsid)) { - return NVME_NS_NOT_ATTACHED | NVME_DNR; - } - - ctrl->namespaces[nsid - 1] = NULL; - ns->attached--; - - nvme_update_dmrsl(ctrl); - } - - /* - * Add namespace id to the changed namespace id list for event clearing - * via Get Log Page command. - */ - if (!test_and_set_bit(nsid, ctrl->changed_nsids)) { - nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE, - NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED, - NVME_LOG_CHANGED_NSLIST); - } - } - - return NVME_SUCCESS; -} - -static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf, - uint8_t mset, uint8_t pi, uint8_t pil, - NvmeRequest *req) -{ - int64_t len, offset; - struct nvme_aio_format_ctx *ctx; - BlockBackend *blk = ns->blkconf.blk; - uint16_t ms; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; - int *count; - - if (ns->params.zoned) { - return NVME_INVALID_FORMAT | NVME_DNR; - } - - trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil); - - if (lbaf > ns->id_ns.nlbaf) { - return NVME_INVALID_FORMAT | NVME_DNR; - } - - ms = ns->id_ns.lbaf[lbaf].ms; - - if (pi && (ms < sizeof(NvmeDifTuple))) { - return NVME_INVALID_FORMAT | NVME_DNR; - } - - if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - nvme_ns_drain(ns); - nvme_ns_shutdown(ns); - nvme_ns_cleanup(ns); - - ns->id_ns.dps = (pil << 3) | pi; - ns->id_ns.flbas = lbaf | (mset << 4); - - nvme_ns_init_format(ns); - - ns->status = NVME_FORMAT_IN_PROGRESS; - - len = ns->size; - offset = 0; - - count = g_new(int, 1); - *count = 1; - - (*num_formats)++; - - while (len) { - ctx = g_new(struct nvme_aio_format_ctx, 1); - ctx->req = req; - ctx->ns = ns; - ctx->count = count; - - size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len); - - (*count)++; - - blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP, - nvme_aio_format_cb, ctx); - - offset += bytes; - len -= bytes; - - } - - if (--(*count)) { - return NVME_NO_COMPLETE; - } - - g_free(count); - ns->status = 0x0; - (*num_formats)--; - - return NVME_SUCCESS; -} - -static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) -{ - NvmeNamespace *ns; - uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); - uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uint8_t lbaf = dw10 & 0xf; - uint8_t mset = (dw10 >> 4) & 0x1; - uint8_t pi = (dw10 >> 5) & 0x7; - uint8_t pil = (dw10 >> 8) & 0x1; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; - uint16_t status; - int i; - - trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil); - - /* 1-initialize; see the comment in nvme_dsm */ - *num_formats = 1; - - if (nsid != NVME_NSID_BROADCAST) { - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; - } - - ns = nvme_ns(n, nsid); - if (!ns) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); - if (status && status != NVME_NO_COMPLETE) { - req->status = status; - } - } else { - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); - if (status && status != NVME_NO_COMPLETE) { - req->status = status; - break; - } - } - } - - /* account for the 1-initialization */ - if (--(*num_formats)) { - return NVME_NO_COMPLETE; - } - - return req->status; -} - -static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) -{ - trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, - nvme_adm_opc_str(req->cmd.opcode)); - - if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { - trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode); - return NVME_INVALID_OPCODE | NVME_DNR; - } - - /* SGLs shall not be used for Admin commands in NVMe over PCIe */ - if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - switch (req->cmd.opcode) { - case NVME_ADM_CMD_DELETE_SQ: - return nvme_del_sq(n, req); - case NVME_ADM_CMD_CREATE_SQ: - return nvme_create_sq(n, req); - case NVME_ADM_CMD_GET_LOG_PAGE: - return nvme_get_log(n, req); - case NVME_ADM_CMD_DELETE_CQ: - return nvme_del_cq(n, req); - case NVME_ADM_CMD_CREATE_CQ: - return nvme_create_cq(n, req); - case NVME_ADM_CMD_IDENTIFY: - return nvme_identify(n, req); - case NVME_ADM_CMD_ABORT: - return nvme_abort(n, req); - case NVME_ADM_CMD_SET_FEATURES: - return nvme_set_feature(n, req); - case NVME_ADM_CMD_GET_FEATURES: - return nvme_get_feature(n, req); - case NVME_ADM_CMD_ASYNC_EV_REQ: - return nvme_aer(n, req); - case NVME_ADM_CMD_NS_ATTACHMENT: - return nvme_ns_attachment(n, req); - case NVME_ADM_CMD_FORMAT_NVM: - return nvme_format(n, req); - default: - assert(false); - } - - return NVME_INVALID_OPCODE | NVME_DNR; -} - -static void nvme_process_sq(void *opaque) -{ - NvmeSQueue *sq = opaque; - NvmeCtrl *n = sq->ctrl; - NvmeCQueue *cq = n->cq[sq->cqid]; - - uint16_t status; - hwaddr addr; - NvmeCmd cmd; - NvmeRequest *req; - - while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { - addr = sq->dma_addr + sq->head * n->sqe_size; - if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) { - trace_pci_nvme_err_addr_read(addr); - trace_pci_nvme_err_cfs(); - n->bar.csts = NVME_CSTS_FAILED; - break; - } - nvme_inc_sq_head(sq); - - req = QTAILQ_FIRST(&sq->req_list); - QTAILQ_REMOVE(&sq->req_list, req, entry); - QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); - nvme_req_clear(req); - req->cqe.cid = cmd.cid; - memcpy(&req->cmd, &cmd, sizeof(NvmeCmd)); - - status = sq->sqid ? nvme_io_cmd(n, req) : - nvme_admin_cmd(n, req); - if (status != NVME_NO_COMPLETE) { - req->status = status; - nvme_enqueue_req_completion(cq, req); - } - } -} - -static void nvme_ctrl_reset(NvmeCtrl *n) -{ - NvmeNamespace *ns; - int i; - - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - nvme_ns_drain(ns); - } - - for (i = 0; i < n->params.max_ioqpairs + 1; i++) { - if (n->sq[i] != NULL) { - nvme_free_sq(n->sq[i], n); - } - } - for (i = 0; i < n->params.max_ioqpairs + 1; i++) { - if (n->cq[i] != NULL) { - nvme_free_cq(n->cq[i], n); - } - } - - while (!QTAILQ_EMPTY(&n->aer_queue)) { - NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue); - QTAILQ_REMOVE(&n->aer_queue, event, entry); - g_free(event); - } - - n->aer_queued = 0; - n->outstanding_aers = 0; - n->qs_created = false; - - n->bar.cc = 0; -} - -static void nvme_ctrl_shutdown(NvmeCtrl *n) -{ - NvmeNamespace *ns; - int i; - - if (n->pmr.dev) { - memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); - } - - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - nvme_ns_shutdown(ns); - } -} - -static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns) -{ - ns->iocs = nvme_cse_iocs_none; - switch (ns->csi) { - case NVME_CSI_NVM: - if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; - case NVME_CSI_ZONED: - if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) { - ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; - } -} - -static void nvme_select_ns_iocs(NvmeCtrl *n) -{ - NvmeNamespace *ns; - int i; - - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - __nvme_select_ns_iocs(n, ns); - } -} - -static int nvme_start_ctrl(NvmeCtrl *n) -{ - uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; - uint32_t page_size = 1 << page_bits; - - if (unlikely(n->cq[0])) { - trace_pci_nvme_err_startfail_cq(); - return -1; - } - if (unlikely(n->sq[0])) { - trace_pci_nvme_err_startfail_sq(); - return -1; - } - if (unlikely(!n->bar.asq)) { - trace_pci_nvme_err_startfail_nbarasq(); - return -1; - } - if (unlikely(!n->bar.acq)) { - trace_pci_nvme_err_startfail_nbaracq(); - return -1; - } - if (unlikely(n->bar.asq & (page_size - 1))) { - trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq); - return -1; - } - if (unlikely(n->bar.acq & (page_size - 1))) { - trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq); - return -1; - } - if (unlikely(!(NVME_CAP_CSS(n->bar.cap) & (1 << NVME_CC_CSS(n->bar.cc))))) { - trace_pci_nvme_err_startfail_css(NVME_CC_CSS(n->bar.cc)); - return -1; - } - if (unlikely(NVME_CC_MPS(n->bar.cc) < - NVME_CAP_MPSMIN(n->bar.cap))) { - trace_pci_nvme_err_startfail_page_too_small( - NVME_CC_MPS(n->bar.cc), - NVME_CAP_MPSMIN(n->bar.cap)); - return -1; - } - if (unlikely(NVME_CC_MPS(n->bar.cc) > - NVME_CAP_MPSMAX(n->bar.cap))) { - trace_pci_nvme_err_startfail_page_too_large( - NVME_CC_MPS(n->bar.cc), - NVME_CAP_MPSMAX(n->bar.cap)); - return -1; - } - if (unlikely(NVME_CC_IOCQES(n->bar.cc) < - NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { - trace_pci_nvme_err_startfail_cqent_too_small( - NVME_CC_IOCQES(n->bar.cc), - NVME_CTRL_CQES_MIN(n->bar.cap)); - return -1; - } - if (unlikely(NVME_CC_IOCQES(n->bar.cc) > - NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { - trace_pci_nvme_err_startfail_cqent_too_large( - NVME_CC_IOCQES(n->bar.cc), - NVME_CTRL_CQES_MAX(n->bar.cap)); - return -1; - } - if (unlikely(NVME_CC_IOSQES(n->bar.cc) < - NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { - trace_pci_nvme_err_startfail_sqent_too_small( - NVME_CC_IOSQES(n->bar.cc), - NVME_CTRL_SQES_MIN(n->bar.cap)); - return -1; - } - if (unlikely(NVME_CC_IOSQES(n->bar.cc) > - NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { - trace_pci_nvme_err_startfail_sqent_too_large( - NVME_CC_IOSQES(n->bar.cc), - NVME_CTRL_SQES_MAX(n->bar.cap)); - return -1; - } - if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { - trace_pci_nvme_err_startfail_asqent_sz_zero(); - return -1; - } - if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { - trace_pci_nvme_err_startfail_acqent_sz_zero(); - return -1; - } - - n->page_bits = page_bits; - n->page_size = page_size; - n->max_prp_ents = n->page_size / sizeof(uint64_t); - n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc); - n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc); - nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0, - NVME_AQA_ACQS(n->bar.aqa) + 1, 1); - nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, - NVME_AQA_ASQS(n->bar.aqa) + 1); - - nvme_set_timestamp(n, 0ULL); - - QTAILQ_INIT(&n->aer_queue); - - nvme_select_ns_iocs(n); - - return 0; -} - -static void nvme_cmb_enable_regs(NvmeCtrl *n) -{ - NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1); - NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1); - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR); - - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); -} - -static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, - unsigned size) -{ - if (unlikely(offset & (sizeof(uint32_t) - 1))) { - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32, - "MMIO write not 32-bit aligned," - " offset=0x%"PRIx64"", offset); - /* should be ignored, fall through for now */ - } - - if (unlikely(size < sizeof(uint32_t))) { - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall, - "MMIO write smaller than 32-bits," - " offset=0x%"PRIx64", size=%u", - offset, size); - /* should be ignored, fall through for now */ - } - - switch (offset) { - case 0xc: /* INTMS */ - if (unlikely(msix_enabled(&(n->parent_obj)))) { - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, - "undefined access to interrupt mask set" - " when MSI-X is enabled"); - /* should be ignored, fall through for now */ - } - n->bar.intms |= data & 0xffffffff; - n->bar.intmc = n->bar.intms; - trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc); - nvme_irq_check(n); - break; - case 0x10: /* INTMC */ - if (unlikely(msix_enabled(&(n->parent_obj)))) { - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, - "undefined access to interrupt mask clr" - " when MSI-X is enabled"); - /* should be ignored, fall through for now */ - } - n->bar.intms &= ~(data & 0xffffffff); - n->bar.intmc = n->bar.intms; - trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc); - nvme_irq_check(n); - break; - case 0x14: /* CC */ - trace_pci_nvme_mmio_cfg(data & 0xffffffff); - /* Windows first sends data, then sends enable bit */ - if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && - !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) - { - n->bar.cc = data; - } - - if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { - n->bar.cc = data; - if (unlikely(nvme_start_ctrl(n))) { - trace_pci_nvme_err_startfail(); - n->bar.csts = NVME_CSTS_FAILED; - } else { - trace_pci_nvme_mmio_start_success(); - n->bar.csts = NVME_CSTS_READY; - } - } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { - trace_pci_nvme_mmio_stopped(); - nvme_ctrl_reset(n); - n->bar.csts &= ~NVME_CSTS_READY; - } - if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { - trace_pci_nvme_mmio_shutdown_set(); - nvme_ctrl_shutdown(n); - n->bar.cc = data; - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; - } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { - trace_pci_nvme_mmio_shutdown_cleared(); - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; - n->bar.cc = data; - } - break; - case 0x1C: /* CSTS */ - if (data & (1 << 4)) { - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, - "attempted to W1C CSTS.NSSRO" - " but CAP.NSSRS is zero (not supported)"); - } else if (data != 0) { - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts, - "attempted to set a read only bit" - " of controller status"); - } - break; - case 0x20: /* NSSR */ - if (data == 0x4E564D65) { - trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); - } else { - /* The spec says that writes of other values have no effect */ - return; - } - break; - case 0x24: /* AQA */ - n->bar.aqa = data & 0xffffffff; - trace_pci_nvme_mmio_aqattr(data & 0xffffffff); - break; - case 0x28: /* ASQ */ - n->bar.asq = size == 8 ? data : - (n->bar.asq & ~0xffffffffULL) | (data & 0xffffffff); - trace_pci_nvme_mmio_asqaddr(data); - break; - case 0x2c: /* ASQ hi */ - n->bar.asq = (n->bar.asq & 0xffffffff) | (data << 32); - trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq); - break; - case 0x30: /* ACQ */ - trace_pci_nvme_mmio_acqaddr(data); - n->bar.acq = size == 8 ? data : - (n->bar.acq & ~0xffffffffULL) | (data & 0xffffffff); - break; - case 0x34: /* ACQ hi */ - n->bar.acq = (n->bar.acq & 0xffffffff) | (data << 32); - trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq); - break; - case 0x38: /* CMBLOC */ - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, - "invalid write to reserved CMBLOC" - " when CMBSZ is zero, ignored"); - return; - case 0x3C: /* CMBSZ */ - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly, - "invalid write to read only CMBSZ, ignored"); - return; - case 0x50: /* CMBMSC */ - if (!NVME_CAP_CMBS(n->bar.cap)) { - return; - } - - n->bar.cmbmsc = size == 8 ? data : - (n->bar.cmbmsc & ~0xffffffff) | (data & 0xffffffff); - n->cmb.cmse = false; - - if (NVME_CMBMSC_CRE(data)) { - nvme_cmb_enable_regs(n); - - if (NVME_CMBMSC_CMSE(data)) { - hwaddr cba = NVME_CMBMSC_CBA(data) << CMBMSC_CBA_SHIFT; - if (cba + int128_get64(n->cmb.mem.size) < cba) { - NVME_CMBSTS_SET_CBAI(n->bar.cmbsts, 1); - return; - } - - n->cmb.cba = cba; - n->cmb.cmse = true; - } - } else { - n->bar.cmbsz = 0; - n->bar.cmbloc = 0; - } - - return; - case 0x54: /* CMBMSC hi */ - n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32); - return; - - case 0xE00: /* PMRCAP */ - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, - "invalid write to PMRCAP register, ignored"); - return; - case 0xE04: /* PMRCTL */ - n->bar.pmrctl = data; - if (NVME_PMRCTL_EN(data)) { - memory_region_set_enabled(&n->pmr.dev->mr, true); - n->bar.pmrsts = 0; - } else { - memory_region_set_enabled(&n->pmr.dev->mr, false); - NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 1); - n->pmr.cmse = false; - } - return; - case 0xE08: /* PMRSTS */ - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, - "invalid write to PMRSTS register, ignored"); - return; - case 0xE0C: /* PMREBS */ - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, - "invalid write to PMREBS register, ignored"); - return; - case 0xE10: /* PMRSWTP */ - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, - "invalid write to PMRSWTP register, ignored"); - return; - case 0xE14: /* PMRMSCL */ - if (!NVME_CAP_PMRS(n->bar.cap)) { - return; - } - - n->bar.pmrmsc = (n->bar.pmrmsc & ~0xffffffff) | (data & 0xffffffff); - n->pmr.cmse = false; - - if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) { - hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT; - if (cba + int128_get64(n->pmr.dev->mr.size) < cba) { - NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1); - return; - } - - n->pmr.cmse = true; - n->pmr.cba = cba; - } - - return; - case 0xE18: /* PMRMSCU */ - if (!NVME_CAP_PMRS(n->bar.cap)) { - return; - } - - n->bar.pmrmsc = (n->bar.pmrmsc & 0xffffffff) | (data << 32); - return; - default: - NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, - "invalid MMIO write," - " offset=0x%"PRIx64", data=%"PRIx64"", - offset, data); - break; - } -} - -static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) -{ - NvmeCtrl *n = (NvmeCtrl *)opaque; - uint8_t *ptr = (uint8_t *)&n->bar; - uint64_t val = 0; - - trace_pci_nvme_mmio_read(addr, size); - - if (unlikely(addr & (sizeof(uint32_t) - 1))) { - NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32, - "MMIO read not 32-bit aligned," - " offset=0x%"PRIx64"", addr); - /* should RAZ, fall through for now */ - } else if (unlikely(size < sizeof(uint32_t))) { - NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall, - "MMIO read smaller than 32-bits," - " offset=0x%"PRIx64"", addr); - /* should RAZ, fall through for now */ - } - - if (addr < sizeof(n->bar)) { - /* - * When PMRWBM bit 1 is set then read from - * from PMRSTS should ensure prior writes - * made it to persistent media - */ - if (addr == 0xE08 && - (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { - memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); - } - memcpy(&val, ptr + addr, size); - } else { - NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, - "MMIO read beyond last register," - " offset=0x%"PRIx64", returning 0", addr); - } - - return val; -} - -static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) -{ - uint32_t qid; - - if (unlikely(addr & ((1 << 2) - 1))) { - NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, - "doorbell write not 32-bit aligned," - " offset=0x%"PRIx64", ignoring", addr); - return; - } - - if (((addr - 0x1000) >> 2) & 1) { - /* Completion queue doorbell write */ - - uint16_t new_head = val & 0xffff; - int start_sqs; - NvmeCQueue *cq; - - qid = (addr - (0x1000 + (1 << 2))) >> 3; - if (unlikely(nvme_check_cqid(n, qid))) { - NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq, - "completion queue doorbell write" - " for nonexistent queue," - " sqid=%"PRIu32", ignoring", qid); - - /* - * NVM Express v1.3d, Section 4.1 state: "If host software writes - * an invalid value to the Submission Queue Tail Doorbell or - * Completion Queue Head Doorbell regiter and an Asynchronous Event - * Request command is outstanding, then an asynchronous event is - * posted to the Admin Completion Queue with a status code of - * Invalid Doorbell Write Value." - * - * Also note that the spec includes the "Invalid Doorbell Register" - * status code, but nowhere does it specify when to use it. - * However, it seems reasonable to use it here in a similar - * fashion. - */ - if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, - NVME_AER_INFO_ERR_INVALID_DB_REGISTER, - NVME_LOG_ERROR_INFO); - } - - return; - } - - cq = n->cq[qid]; - if (unlikely(new_head >= cq->size)) { - NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead, - "completion queue doorbell write value" - " beyond queue size, sqid=%"PRIu32"," - " new_head=%"PRIu16", ignoring", - qid, new_head); - - if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, - NVME_AER_INFO_ERR_INVALID_DB_VALUE, - NVME_LOG_ERROR_INFO); - } - - return; - } - - trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head); - - start_sqs = nvme_cq_full(cq) ? 1 : 0; - cq->head = new_head; - if (start_sqs) { - NvmeSQueue *sq; - QTAILQ_FOREACH(sq, &cq->sq_list, entry) { - timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); - } - timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); - } - - if (cq->tail == cq->head) { - nvme_irq_deassert(n, cq); - } - } else { - /* Submission queue doorbell write */ - - uint16_t new_tail = val & 0xffff; - NvmeSQueue *sq; - - qid = (addr - 0x1000) >> 3; - if (unlikely(nvme_check_sqid(n, qid))) { - NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq, - "submission queue doorbell write" - " for nonexistent queue," - " sqid=%"PRIu32", ignoring", qid); - - if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, - NVME_AER_INFO_ERR_INVALID_DB_REGISTER, - NVME_LOG_ERROR_INFO); - } - - return; - } - - sq = n->sq[qid]; - if (unlikely(new_tail >= sq->size)) { - NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail, - "submission queue doorbell write value" - " beyond queue size, sqid=%"PRIu32"," - " new_tail=%"PRIu16", ignoring", - qid, new_tail); - - if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, - NVME_AER_INFO_ERR_INVALID_DB_VALUE, - NVME_LOG_ERROR_INFO); - } - - return; - } - - trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail); - - sq->tail = new_tail; - timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); - } -} - -static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data, - unsigned size) -{ - NvmeCtrl *n = (NvmeCtrl *)opaque; - - trace_pci_nvme_mmio_write(addr, data, size); - - if (addr < sizeof(n->bar)) { - nvme_write_bar(n, addr, data, size); - } else { - nvme_process_db(n, addr, data); - } -} - -static const MemoryRegionOps nvme_mmio_ops = { - .read = nvme_mmio_read, - .write = nvme_mmio_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .impl = { - .min_access_size = 2, - .max_access_size = 8, - }, -}; - -static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data, - unsigned size) -{ - NvmeCtrl *n = (NvmeCtrl *)opaque; - stn_le_p(&n->cmb.buf[addr], size, data); -} - -static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size) -{ - NvmeCtrl *n = (NvmeCtrl *)opaque; - return ldn_le_p(&n->cmb.buf[addr], size); -} - -static const MemoryRegionOps nvme_cmb_ops = { - .read = nvme_cmb_read, - .write = nvme_cmb_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - }, -}; - -static void nvme_check_constraints(NvmeCtrl *n, Error **errp) -{ - NvmeParams *params = &n->params; - - if (params->num_queues) { - warn_report("num_queues is deprecated; please use max_ioqpairs " - "instead"); - - params->max_ioqpairs = params->num_queues - 1; - } - - if (n->namespace.blkconf.blk && n->subsys) { - error_setg(errp, "subsystem support is unavailable with legacy " - "namespace ('drive' property)"); - return; - } - - if (params->max_ioqpairs < 1 || - params->max_ioqpairs > NVME_MAX_IOQPAIRS) { - error_setg(errp, "max_ioqpairs must be between 1 and %d", - NVME_MAX_IOQPAIRS); - return; - } - - if (params->msix_qsize < 1 || - params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) { - error_setg(errp, "msix_qsize must be between 1 and %d", - PCI_MSIX_FLAGS_QSIZE + 1); - return; - } - - if (!params->serial) { - error_setg(errp, "serial property not set"); - return; - } - - if (n->pmr.dev) { - if (host_memory_backend_is_mapped(n->pmr.dev)) { - error_setg(errp, "can't use already busy memdev: %s", - object_get_canonical_path_component(OBJECT(n->pmr.dev))); - return; - } - - if (!is_power_of_2(n->pmr.dev->size)) { - error_setg(errp, "pmr backend size needs to be power of 2 in size"); - return; - } - - host_memory_backend_set_mapped(n->pmr.dev, true); - } - - if (n->params.zasl > n->params.mdts) { - error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less " - "than or equal to mdts (Maximum Data Transfer Size)"); - return; - } - - if (!n->params.vsl) { - error_setg(errp, "vsl must be non-zero"); - return; - } -} - -static void nvme_init_state(NvmeCtrl *n) -{ - n->num_namespaces = NVME_MAX_NAMESPACES; - /* add one to max_ioqpairs to account for the admin queue pair */ - n->reg_size = pow2ceil(sizeof(NvmeBar) + - 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); - n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); - n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); - n->temperature = NVME_TEMPERATURE; - n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING; - n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); - n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); -} - -static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) -{ - uint64_t cmb_size = n->params.cmb_size_mb * MiB; - - n->cmb.buf = g_malloc0(cmb_size); - memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n, - "nvme-cmb", cmb_size); - pci_register_bar(pci_dev, NVME_CMB_BIR, - PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem); - - NVME_CAP_SET_CMBS(n->bar.cap, 1); - - if (n->params.legacy_cmb) { - nvme_cmb_enable_regs(n); - n->cmb.cmse = true; - } -} - -static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) -{ - NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 1); - NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 1); - NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR); - /* Turn on bit 1 support */ - NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); - NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 1); - - pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), - PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr); - - memory_region_set_enabled(&n->pmr.dev->mr, false); -} - -static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) -{ - uint8_t *pci_conf = pci_dev->config; - uint64_t bar_size, msix_table_size, msix_pba_size; - unsigned msix_table_offset, msix_pba_offset; - int ret; - - Error *err = NULL; - - pci_conf[PCI_INTERRUPT_PIN] = 1; - pci_config_set_prog_interface(pci_conf, 0x2); - - if (n->params.use_intel_id) { - pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); - pci_config_set_device_id(pci_conf, 0x5845); - } else { - pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT); - pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME); - } - - pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); - pcie_endpoint_cap_init(pci_dev, 0x80); - - bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB); - msix_table_offset = bar_size; - msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize; - - bar_size += msix_table_size; - bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); - msix_pba_offset = bar_size; - msix_pba_size = QEMU_ALIGN_UP(n->params.msix_qsize, 64) / 8; - - bar_size += msix_pba_size; - bar_size = pow2ceil(bar_size); - - memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", - n->reg_size); - memory_region_add_subregion(&n->bar0, 0, &n->iomem); - - pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); - ret = msix_init(pci_dev, n->params.msix_qsize, - &n->bar0, 0, msix_table_offset, - &n->bar0, 0, msix_pba_offset, 0, &err); - if (ret < 0) { - if (ret == -ENOTSUP) { - warn_report_err(err); - } else { - error_propagate(errp, err); - return ret; - } - } - - if (n->params.cmb_size_mb) { - nvme_init_cmb(n, pci_dev); - } - - if (n->pmr.dev) { - nvme_init_pmr(n, pci_dev); - } - - return 0; -} - -static void nvme_init_subnqn(NvmeCtrl *n) -{ - NvmeSubsystem *subsys = n->subsys; - NvmeIdCtrl *id = &n->id_ctrl; - - if (!subsys) { - snprintf((char *)id->subnqn, sizeof(id->subnqn), - "nqn.2019-08.org.qemu:%s", n->params.serial); - } else { - pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn); - } -} - -static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) -{ - NvmeIdCtrl *id = &n->id_ctrl; - uint8_t *pci_conf = pci_dev->config; - - id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); - id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); - strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); - strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); - - id->cntlid = cpu_to_le16(n->cntlid); - - id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); - - id->rab = 6; - - if (n->params.use_intel_id) { - id->ieee[0] = 0xb3; - id->ieee[1] = 0x02; - id->ieee[2] = 0x00; - } else { - id->ieee[0] = 0x00; - id->ieee[1] = 0x54; - id->ieee[2] = 0x52; - } - - id->mdts = n->params.mdts; - id->ver = cpu_to_le32(NVME_SPEC_VER); - id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT); - id->cntrltype = 0x1; - - /* - * Because the controller always completes the Abort command immediately, - * there can never be more than one concurrently executing Abort command, - * so this value is never used for anything. Note that there can easily be - * many Abort commands in the queues, but they are not considered - * "executing" until processed by nvme_abort. - * - * The specification recommends a value of 3 for Abort Command Limit (four - * concurrently outstanding Abort commands), so lets use that though it is - * inconsequential. - */ - id->acl = 3; - id->aerl = n->params.aerl; - id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO; - id->lpa = NVME_LPA_NS_SMART | NVME_LPA_CSE | NVME_LPA_EXTENDED; - - /* recommended default value (~70 C) */ - id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING); - id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL); - - id->sqes = (0x6 << 4) | 0x6; - id->cqes = (0x4 << 4) | 0x4; - id->nn = cpu_to_le32(n->num_namespaces); - id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP | - NVME_ONCS_FEATURES | NVME_ONCS_DSM | - NVME_ONCS_COMPARE | NVME_ONCS_COPY); - - /* - * NOTE: If this device ever supports a command set that does NOT use 0x0 - * as a Flush-equivalent operation, support for the broadcast NSID in Flush - * should probably be removed. - * - * See comment in nvme_io_cmd. - */ - id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT; - - id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0); - id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | - NVME_CTRL_SGLS_BITBUCKET); - - nvme_init_subnqn(n); - - id->psd[0].mp = cpu_to_le16(0x9c4); - id->psd[0].enlat = cpu_to_le32(0x10); - id->psd[0].exlat = cpu_to_le32(0x4); - - if (n->subsys) { - id->cmic |= NVME_CMIC_MULTI_CTRL; - } - - NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); - NVME_CAP_SET_CQR(n->bar.cap, 1); - NVME_CAP_SET_TO(n->bar.cap, 0xf); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_NVM); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_CSI_SUPP); - NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_ADMIN_ONLY); - NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - NVME_CAP_SET_CMBS(n->bar.cap, n->params.cmb_size_mb ? 1 : 0); - NVME_CAP_SET_PMRS(n->bar.cap, n->pmr.dev ? 1 : 0); - - n->bar.vs = NVME_SPEC_VER; - n->bar.intmc = n->bar.intms = 0; -} - -static int nvme_init_subsys(NvmeCtrl *n, Error **errp) -{ - int cntlid; - - if (!n->subsys) { - return 0; - } - - cntlid = nvme_subsys_register_ctrl(n, errp); - if (cntlid < 0) { - return -1; - } - - n->cntlid = cntlid; - - return 0; -} - -void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) -{ - uint32_t nsid = ns->params.nsid; - assert(nsid && nsid <= NVME_MAX_NAMESPACES); - - n->namespaces[nsid - 1] = ns; - ns->attached++; - - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); -} - -static void nvme_realize(PCIDevice *pci_dev, Error **errp) -{ - NvmeCtrl *n = NVME(pci_dev); - NvmeNamespace *ns; - Error *local_err = NULL; - - nvme_check_constraints(n, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - qbus_create_inplace(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, - &pci_dev->qdev, n->parent_obj.qdev.id); - - nvme_init_state(n); - if (nvme_init_pci(n, pci_dev, errp)) { - return; - } - - if (nvme_init_subsys(n, errp)) { - error_propagate(errp, local_err); - return; - } - nvme_init_ctrl(n, pci_dev); - - /* setup a namespace if the controller drive property was given */ - if (n->namespace.blkconf.blk) { - ns = &n->namespace; - ns->params.nsid = 1; - - if (nvme_ns_setup(n, ns, errp)) { - return; - } - - nvme_attach_ns(n, ns); - } -} - -static void nvme_exit(PCIDevice *pci_dev) -{ - NvmeCtrl *n = NVME(pci_dev); - NvmeNamespace *ns; - int i; - - nvme_ctrl_reset(n); - - for (i = 1; i <= n->num_namespaces; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - nvme_ns_cleanup(ns); - } - - g_free(n->cq); - g_free(n->sq); - g_free(n->aer_reqs); - - if (n->params.cmb_size_mb) { - g_free(n->cmb.buf); - } - - if (n->pmr.dev) { - host_memory_backend_set_mapped(n->pmr.dev, false); - } - msix_uninit(pci_dev, &n->bar0, &n->bar0); - memory_region_del_subregion(&n->bar0, &n->iomem); -} - -static Property nvme_props[] = { - DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf), - DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND, - HostMemoryBackend *), - DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS, - NvmeSubsystem *), - DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial), - DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0), - DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0), - DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64), - DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65), - DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3), - DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64), - DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7), - DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7), - DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false), - DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false), - DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), - DEFINE_PROP_END_OF_LIST(), -}; - -static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - NvmeCtrl *n = NVME(obj); - uint8_t value = n->smart_critical_warning; - - visit_type_uint8(v, name, &value, errp); -} - -static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - NvmeCtrl *n = NVME(obj); - uint8_t value, old_value, cap = 0, index, event; - - if (!visit_type_uint8(v, name, &value, errp)) { - return; - } - - cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY - | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA; - if (NVME_CAP_PMRS(n->bar.cap)) { - cap |= NVME_SMART_PMR_UNRELIABLE; - } - - if ((value & cap) != value) { - error_setg(errp, "unsupported smart critical warning bits: 0x%x", - value & ~cap); - return; - } - - old_value = n->smart_critical_warning; - n->smart_critical_warning = value; - - /* only inject new bits of smart critical warning */ - for (index = 0; index < NVME_SMART_WARN_MAX; index++) { - event = 1 << index; - if (value & ~old_value & event) - nvme_smart_event(n, event); - } -} - -static const VMStateDescription nvme_vmstate = { - .name = "nvme", - .unmigratable = 1, -}; - -static void nvme_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); - - pc->realize = nvme_realize; - pc->exit = nvme_exit; - pc->class_id = PCI_CLASS_STORAGE_EXPRESS; - pc->revision = 2; - - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); - dc->desc = "Non-Volatile Memory Express"; - device_class_set_props(dc, nvme_props); - dc->vmsd = &nvme_vmstate; -} - -static void nvme_instance_init(Object *obj) -{ - NvmeCtrl *n = NVME(obj); - - device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex, - "bootindex", "/namespace@1,0", - DEVICE(obj)); - - object_property_add(obj, "smart_critical_warning", "uint8", - nvme_get_smart_warning, - nvme_set_smart_warning, NULL, NULL); -} - -static const TypeInfo nvme_info = { - .name = TYPE_NVME, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(NvmeCtrl), - .instance_init = nvme_instance_init, - .class_init = nvme_class_init, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_PCIE_DEVICE }, - { } - }, -}; - -static const TypeInfo nvme_bus_info = { - .name = TYPE_NVME_BUS, - .parent = TYPE_BUS, - .instance_size = sizeof(NvmeBus), -}; - -static void nvme_register_types(void) -{ - type_register_static(&nvme_info); - type_register_static(&nvme_bus_info); -} - -type_init(nvme_register_types) diff --git a/hw/block/nvme.h b/hw/block/nvme.h deleted file mode 100644 index 5d05ec368f7..00000000000 --- a/hw/block/nvme.h +++ /dev/null @@ -1,266 +0,0 @@ -#ifndef HW_NVME_H -#define HW_NVME_H - -#include "block/nvme.h" -#include "hw/pci/pci.h" -#include "nvme-subsys.h" -#include "nvme-ns.h" - -#define NVME_DEFAULT_ZONE_SIZE (128 * MiB) -#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) - -typedef struct NvmeParams { - char *serial; - uint32_t num_queues; /* deprecated since 5.1 */ - uint32_t max_ioqpairs; - uint16_t msix_qsize; - uint32_t cmb_size_mb; - uint8_t aerl; - uint32_t aer_max_queued; - uint8_t mdts; - uint8_t vsl; - bool use_intel_id; - uint8_t zasl; - bool legacy_cmb; -} NvmeParams; - -typedef struct NvmeAsyncEvent { - QTAILQ_ENTRY(NvmeAsyncEvent) entry; - NvmeAerResult result; -} NvmeAsyncEvent; - -enum { - NVME_SG_ALLOC = 1 << 0, - NVME_SG_DMA = 1 << 1, -}; - -typedef struct NvmeSg { - int flags; - - union { - QEMUSGList qsg; - QEMUIOVector iov; - }; -} NvmeSg; - -typedef struct NvmeRequest { - struct NvmeSQueue *sq; - struct NvmeNamespace *ns; - BlockAIOCB *aiocb; - uint16_t status; - void *opaque; - NvmeCqe cqe; - NvmeCmd cmd; - BlockAcctCookie acct; - NvmeSg sg; - QTAILQ_ENTRY(NvmeRequest)entry; -} NvmeRequest; - -typedef struct NvmeBounceContext { - NvmeRequest *req; - - struct { - QEMUIOVector iov; - uint8_t *bounce; - } data, mdata; -} NvmeBounceContext; - -static inline const char *nvme_adm_opc_str(uint8_t opc) -{ - switch (opc) { - case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; - case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; - case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; - case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; - case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; - case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; - case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; - case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; - case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; - case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; - case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; - case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; - default: return "NVME_ADM_CMD_UNKNOWN"; - } -} - -static inline const char *nvme_io_opc_str(uint8_t opc) -{ - switch (opc) { - case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; - case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; - case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; - case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; - case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; - case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; - case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; - case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; - case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; - case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; - case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; - default: return "NVME_NVM_CMD_UNKNOWN"; - } -} - -typedef struct NvmeSQueue { - struct NvmeCtrl *ctrl; - uint16_t sqid; - uint16_t cqid; - uint32_t head; - uint32_t tail; - uint32_t size; - uint64_t dma_addr; - QEMUTimer *timer; - NvmeRequest *io_req; - QTAILQ_HEAD(, NvmeRequest) req_list; - QTAILQ_HEAD(, NvmeRequest) out_req_list; - QTAILQ_ENTRY(NvmeSQueue) entry; -} NvmeSQueue; - -typedef struct NvmeCQueue { - struct NvmeCtrl *ctrl; - uint8_t phase; - uint16_t cqid; - uint16_t irq_enabled; - uint32_t head; - uint32_t tail; - uint32_t vector; - uint32_t size; - uint64_t dma_addr; - QEMUTimer *timer; - QTAILQ_HEAD(, NvmeSQueue) sq_list; - QTAILQ_HEAD(, NvmeRequest) req_list; -} NvmeCQueue; - -#define TYPE_NVME_BUS "nvme-bus" -#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) - -typedef struct NvmeBus { - BusState parent_bus; -} NvmeBus; - -#define TYPE_NVME "nvme" -#define NVME(obj) \ - OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) - -typedef struct NvmeFeatureVal { - struct { - uint16_t temp_thresh_hi; - uint16_t temp_thresh_low; - }; - uint32_t async_config; -} NvmeFeatureVal; - -typedef struct NvmeCtrl { - PCIDevice parent_obj; - MemoryRegion bar0; - MemoryRegion iomem; - NvmeBar bar; - NvmeParams params; - NvmeBus bus; - - uint16_t cntlid; - bool qs_created; - uint32_t page_size; - uint16_t page_bits; - uint16_t max_prp_ents; - uint16_t cqe_size; - uint16_t sqe_size; - uint32_t reg_size; - uint32_t num_namespaces; - uint32_t max_q_ents; - uint8_t outstanding_aers; - uint32_t irq_status; - uint64_t host_timestamp; /* Timestamp sent by the host */ - uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - uint64_t starttime_ms; - uint16_t temperature; - uint8_t smart_critical_warning; - - struct { - MemoryRegion mem; - uint8_t *buf; - bool cmse; - hwaddr cba; - } cmb; - - struct { - HostMemoryBackend *dev; - bool cmse; - hwaddr cba; - } pmr; - - uint8_t aer_mask; - NvmeRequest **aer_reqs; - QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; - int aer_queued; - - uint32_t dmrsl; - - /* Namespace ID is started with 1 so bitmap should be 1-based */ -#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) - DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); - - NvmeSubsystem *subsys; - - NvmeNamespace namespace; - /* - * Attached namespaces to this controller. If subsys is not given, all - * namespaces in this list will always be attached. - */ - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES]; - NvmeSQueue **sq; - NvmeCQueue **cq; - NvmeSQueue admin_sq; - NvmeCQueue admin_cq; - NvmeIdCtrl id_ctrl; - NvmeFeatureVal features; -} NvmeCtrl; - -static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) -{ - if (!nsid || nsid > n->num_namespaces) { - return NULL; - } - - return n->namespaces[nsid - 1]; -} - -static inline NvmeCQueue *nvme_cq(NvmeRequest *req) -{ - NvmeSQueue *sq = req->sq; - NvmeCtrl *n = sq->ctrl; - - return n->cq[sq->cqid]; -} - -static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) -{ - NvmeSQueue *sq = req->sq; - return sq->ctrl; -} - -static inline uint16_t nvme_cid(NvmeRequest *req) -{ - if (!req) { - return 0xffff; - } - - return le16_to_cpu(req->cqe.cid); -} - -typedef enum NvmeTxDirection { - NVME_TX_DIRECTION_TO_DEVICE = 0, - NVME_TX_DIRECTION_FROM_DEVICE = 1, -} NvmeTxDirection; - -void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); -uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req); -uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req); -void nvme_rw_complete_cb(void *opaque, int ret); -uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, - NvmeCmd *cmd); - -#endif /* HW_NVME_H */ diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index 25c053693ce..02c514fb6e0 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -173,7 +173,6 @@ static void pflash_setup_mappings(PFlashCFI02 *pfl) "pflash-alias", &pfl->orig_mem, 0, size); memory_region_add_subregion(&pfl->mem, i * size, &pfl->mem_mappings[i]); } - pfl->rom_mode = true; } static void pflash_reset_state_machine(PFlashCFI02 *pfl) @@ -917,8 +916,13 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) /* Allocate memory for a bitmap for sectors being erased. */ pfl->sector_erase_map = bitmap_new(pfl->total_sectors); - pflash_setup_mappings(pfl); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); + pfl->rom_mode = true; + if (pfl->mappings > 1) { + pflash_setup_mappings(pfl); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); + } else { + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->orig_mem); + } timer_init_ns(&pfl->timer, QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); pfl->status = 0; diff --git a/hw/block/swim.c b/hw/block/swim.c index 509c2f49003..333da08ce09 100644 --- a/hw/block/swim.c +++ b/hw/block/swim.c @@ -421,8 +421,7 @@ static void sysbus_swim_realize(DeviceState *dev, Error **errp) Swim *sys = SWIM(dev); SWIMCtrl *swimctrl = &sys->ctrl; - qbus_create_inplace(&swimctrl->bus, sizeof(SWIMBus), TYPE_SWIM_BUS, dev, - NULL); + qbus_init(&swimctrl->bus, sizeof(SWIMBus), TYPE_SWIM_BUS, dev, NULL); swimctrl->bus.ctrl = swimctrl; } diff --git a/hw/block/trace-events b/hw/block/trace-events index fa12e3a67a7..d86b53520cc 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -1,9 +1,12 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # fdc.c fdc_ioport_read(uint8_t reg, uint8_t value) "read reg 0x%02x val 0x%02x" fdc_ioport_write(uint8_t reg, uint8_t value) "write reg 0x%02x val 0x%02x" +# fdc-sysbus.c +fdctrl_tc_pulse(int level) "TC pulse: %u" + # pflash_cfi01.c # pflash_cfi02.c pflash_chip_erase_invalid(const char *name, uint64_t offset) "%s: chip erase: invalid address 0x%" PRIx64 @@ -49,212 +52,6 @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" -# nvme.c -# nvme traces for successful events -pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" -pci_nvme_irq_pin(void) "pulsing IRQ pin" -pci_nvme_irq_masked(void) "IRQ is masked" -pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" -pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" -pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" -pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" -pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" -pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" -pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" -pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" -pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" -pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" -pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" -pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" -pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" -pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" -pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" -pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" -pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" -pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" -pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" -pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" -pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" -pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" -pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" -pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" -pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" -pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" -pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" -pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8"" -pci_nvme_identify_ctrl(void) "identify controller" -pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" -pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" -pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" -pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" -pci_nvme_identify_cmd_set(void) "identify i/o command set" -pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" -pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32"" -pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32"" -pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" -pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" -pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" -pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" -pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" -pci_nvme_process_aers(int queued) "queued %d" -pci_nvme_aer(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aer_aerl_exceeded(void) "aerl exceeded" -pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" -pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" -pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8"" -pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32"" -pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" -pci_nvme_enqueue_event_noqueue(int queued) "queued %d" -pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" -pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" -pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" -pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" -pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" -pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" -pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" -pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" -pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" -pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" -pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" -pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" -pci_nvme_mmio_stopped(void) "cleared controller enable bit" -pci_nvme_mmio_shutdown_set(void) "shutdown bit set" -pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" -pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32"" -pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" -pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" -pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" - -# nvme traces for error conditions -pci_nvme_err_mdts(size_t len) "len %zu" -pci_nvme_err_zasl(size_t len) "len %zu" -pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8"" -pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64"" -pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64"" -pci_nvme_err_cfs(void) "controller fatal status" -pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16"" -pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8"" -pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" -pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" -pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32"" -pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" -pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64"" -pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" -pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" -pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" -pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" -pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64"" -pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" -pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" -pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64"" -pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32"" -pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64"" -pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64"" -pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64"" -pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64"" -pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" -pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" -pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded" -pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded" -pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32"" -pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32"" -pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" -pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" -pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" -pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" -pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" -pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" -pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" -pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" -pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" -pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" -pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" -pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" -pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" -pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" -pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" -pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" -pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" -pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" -pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" -pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" -pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u" -pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" -pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" -pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32"" -pci_nvme_err_startfail(void) "setting controller enable bit failed" -pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8"" - -# Traces for undefined behavior -pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" -pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" -pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" -pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" -pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" -pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" -pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" -pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" -pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" -pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" -pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" -pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" -pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" -pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" -pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" -pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" -pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" -pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" -pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" -pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" -pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" -pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" - # xen-block.c xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" xen_block_connect(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 0b5b9d44cdb..ba13cb87e52 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -31,6 +31,8 @@ #include "sysemu/sysemu.h" #include "sysemu/runstate.h" +#define REALIZE_CONNECTION_RETRIES 3 + static const int user_feature_bits[] = { VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_SEG_MAX, @@ -47,9 +49,13 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_RING_PACKED, + VIRTIO_F_IOMMU_PLATFORM, VHOST_INVALID_FEATURE_BIT }; +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); + static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) { VHostUserBlk *s = VHOST_USER_BLK(vdev); @@ -87,11 +93,13 @@ static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) int ret; struct virtio_blk_config blkcfg; VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); + Error *local_err = NULL; ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg, - sizeof(struct virtio_blk_config)); + sizeof(struct virtio_blk_config), + &local_err); if (ret < 0) { - error_report("get config space failed"); + error_report_err(local_err); return -1; } @@ -109,7 +117,7 @@ const VhostDevConfigOps blk_ops = { .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, }; -static int vhost_user_blk_start(VirtIODevice *vdev) +static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp) { VHostUserBlk *s = VHOST_USER_BLK(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -117,19 +125,19 @@ static int vhost_user_blk_start(VirtIODevice *vdev) int i, ret; if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); + error_setg(errp, "binding does not support guest notifiers"); return -ENOSYS; } ret = vhost_dev_enable_notifiers(&s->dev, vdev); if (ret < 0) { - error_report("Error enabling host notifiers: %d", -ret); + error_setg_errno(errp, -ret, "Error enabling host notifiers"); return ret; } ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); if (ret < 0) { - error_report("Error binding guest notifier: %d", -ret); + error_setg_errno(errp, -ret, "Error binding guest notifier"); goto err_host_notifiers; } @@ -137,27 +145,27 @@ static int vhost_user_blk_start(VirtIODevice *vdev) ret = vhost_dev_prepare_inflight(&s->dev, vdev); if (ret < 0) { - error_report("Error set inflight format: %d", -ret); + error_setg_errno(errp, -ret, "Error setting inflight format"); goto err_guest_notifiers; } if (!s->inflight->addr) { ret = vhost_dev_get_inflight(&s->dev, s->queue_size, s->inflight); if (ret < 0) { - error_report("Error get inflight: %d", -ret); + error_setg_errno(errp, -ret, "Error getting inflight"); goto err_guest_notifiers; } } ret = vhost_dev_set_inflight(&s->dev, s->inflight); if (ret < 0) { - error_report("Error set inflight: %d", -ret); + error_setg_errno(errp, -ret, "Error setting inflight"); goto err_guest_notifiers; } ret = vhost_dev_start(&s->dev, vdev); if (ret < 0) { - error_report("Error starting vhost: %d", -ret); + error_setg_errno(errp, -ret, "Error starting vhost"); goto err_guest_notifiers; } s->started_vu = true; @@ -210,6 +218,7 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBlk *s = VHOST_USER_BLK(vdev); bool should_start = virtio_device_started(vdev, status); + Error *local_err = NULL; int ret; if (!vdev->vm_running) { @@ -225,10 +234,9 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) } if (should_start) { - ret = vhost_user_blk_start(vdev); + ret = vhost_user_blk_start(vdev, &local_err); if (ret < 0) { - error_report("vhost-user-blk: vhost start failed: %s", - strerror(-ret)); + error_reportf_err(local_err, "vhost-user-blk: vhost start failed: "); qemu_chr_fe_disconnect(&s->chardev); } } else { @@ -266,6 +274,7 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VHostUserBlk *s = VHOST_USER_BLK(vdev); + Error *local_err = NULL; int i, ret; if (!vdev->start_on_kick) { @@ -283,10 +292,9 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start * vhost here instead of waiting for .set_status(). */ - ret = vhost_user_blk_start(vdev); + ret = vhost_user_blk_start(vdev, &local_err); if (ret < 0) { - error_report("vhost-user-blk: vhost start failed: %s", - strerror(-ret)); + error_reportf_err(local_err, "vhost-user-blk: vhost start failed: "); qemu_chr_fe_disconnect(&s->chardev); return; } @@ -309,7 +317,7 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) vhost_dev_free_inflight(s->inflight); } -static int vhost_user_blk_connect(DeviceState *dev) +static int vhost_user_blk_connect(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); @@ -320,6 +328,7 @@ static int vhost_user_blk_connect(DeviceState *dev) } s->connected = true; + s->dev.num_queues = s->num_queues; s->dev.nvqs = s->num_queues; s->dev.vqs = s->vhost_vqs; s->dev.vq_index = 0; @@ -327,19 +336,16 @@ static int vhost_user_blk_connect(DeviceState *dev) vhost_dev_set_config_notifier(&s->dev, &blk_ops); - ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); + ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0, + errp); if (ret < 0) { - error_report("vhost-user-blk: vhost initialization failed: %s", - strerror(-ret)); return ret; } /* restore vhost state */ if (virtio_device_started(vdev, vdev->status)) { - ret = vhost_user_blk_start(vdev); + ret = vhost_user_blk_start(vdev, errp); if (ret < 0) { - error_report("vhost-user-blk: vhost start failed: %s", - strerror(-ret)); return ret; } } @@ -362,19 +368,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); } -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, - bool realized); - -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event) -{ - vhost_user_blk_event(opaque, event, false); -} - -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event) -{ - vhost_user_blk_event(opaque, event, true); -} - static void vhost_user_blk_chr_closed_bh(void *opaque) { DeviceState *dev = opaque; @@ -382,36 +375,27 @@ static void vhost_user_blk_chr_closed_bh(void *opaque) VHostUserBlk *s = VHOST_USER_BLK(vdev); vhost_user_blk_disconnect(dev); - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, - vhost_user_blk_event_oper, NULL, opaque, NULL, true); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, + NULL, opaque, NULL, true); } -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, - bool realized) +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) { DeviceState *dev = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); + Error *local_err = NULL; switch (event) { case CHR_EVENT_OPENED: - if (vhost_user_blk_connect(dev) < 0) { + if (vhost_user_blk_connect(dev, &local_err) < 0) { + error_report_err(local_err); qemu_chr_fe_disconnect(&s->chardev); return; } break; case CHR_EVENT_CLOSED: - /* - * Closing the connection should happen differently on device - * initialization and operation stages. - * On initalization, we want to re-start vhost_dev initialization - * from the very beginning right away when the connection is closed, - * so we clean up vhost_dev on each connection closing. - * On operation, we want to postpone vhost_dev cleanup to let the - * other code perform its own cleanup sequence using vhost_dev data - * (e.g. vhost_dev_set_log). - */ - if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) { + if (!runstate_check(RUN_STATE_SHUTDOWN)) { /* * A close event may happen during a read/write, but vhost * code assumes the vhost_dev remains setup, so delay the @@ -431,8 +415,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, * knowing its type (in this case vhost-user). */ s->dev.started = false; - } else { - vhost_user_blk_disconnect(dev); } break; case CHR_EVENT_BREAK: @@ -443,15 +425,46 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, } } +static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp) +{ + DeviceState *dev = &s->parent_obj.parent_obj; + int ret; + + s->connected = false; + + ret = qemu_chr_fe_wait_connected(&s->chardev, errp); + if (ret < 0) { + return ret; + } + + ret = vhost_user_blk_connect(dev, errp); + if (ret < 0) { + qemu_chr_fe_disconnect(&s->chardev); + return ret; + } + assert(s->connected); + + ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + sizeof(struct virtio_blk_config), errp); + if (ret < 0) { + qemu_chr_fe_disconnect(&s->chardev); + vhost_dev_cleanup(&s->dev); + return ret; + } + + return 0; +} + static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); - Error *err = NULL; + int retries; int i, ret; if (!s->chardev.chr) { - error_setg(errp, "vhost-user-blk: chardev is mandatory"); + error_setg(errp, "chardev is mandatory"); return; } @@ -459,12 +472,17 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) s->num_queues = 1; } if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { - error_setg(errp, "vhost-user-blk: invalid number of IO queues"); + error_setg(errp, "invalid number of IO queues"); return; } if (!s->queue_size) { - error_setg(errp, "vhost-user-blk: queue size must be non-zero"); + error_setg(errp, "queue size must be non-zero"); + return; + } + if (s->queue_size > VIRTQUEUE_MAX_SIZE) { + error_setg(errp, "queue size must not exceed %d", + VIRTQUEUE_MAX_SIZE); return; } @@ -483,33 +501,25 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) s->inflight = g_new0(struct vhost_inflight, 1); s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); - s->connected = false; - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, - vhost_user_blk_event_realize, NULL, (void *)dev, - NULL, true); - -reconnect: - if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { - error_report_err(err); - goto virtio_err; - } - - /* check whether vhost_user_blk_connect() failed or not */ - if (!s->connected) { - goto reconnect; - } + retries = REALIZE_CONNECTION_RETRIES; + assert(!*errp); + do { + if (*errp) { + error_prepend(errp, "Reconnecting after error: "); + error_report_err(*errp); + *errp = NULL; + } + ret = vhost_user_blk_realize_connect(s, errp); + } while (ret == -EPROTO && retries--); - ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, - sizeof(struct virtio_blk_config)); if (ret < 0) { - error_report("vhost-user-blk: get block config failed"); - goto reconnect; + goto virtio_err; } - /* we're fully initialized, now we can operate, so change the handler */ + /* we're fully initialized, now we can operate, so add the handler */ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, - vhost_user_blk_event_oper, NULL, (void *)dev, + vhost_user_blk_event, NULL, (void *)dev, NULL, true); return; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index d28979efb8d..f139cd7cc9c 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -40,7 +40,7 @@ * Starting from the discard feature, we can use this array to properly * set the config size depending on the features enabled. */ -static VirtIOFeature feature_sizes[] = { +static const VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_BLK_F_DISCARD, .end = endof(struct virtio_blk_config, discard_sector_alignment)}, {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES, diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 83754a43448..674953f1ade 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -728,6 +728,8 @@ static XenBlockDrive *xen_block_drive_create(const char *id, XenBlockDrive *drive = NULL; QDict *file_layer; QDict *driver_layer; + struct stat st; + int rc; if (params) { char **v = g_strsplit(params, ":", 2); @@ -761,7 +763,17 @@ static XenBlockDrive *xen_block_drive_create(const char *id, file_layer = qdict_new(); driver_layer = qdict_new(); - qdict_put_str(file_layer, "driver", "file"); + rc = stat(filename, &st); + if (rc) { + error_setg_errno(errp, errno, "Could not stat file '%s'", filename); + goto done; + } + if (S_ISBLK(st.st_mode)) { + qdict_put_str(file_layer, "driver", "host_device"); + } else { + qdict_put_str(file_layer, "driver", "file"); + } + qdict_put_str(file_layer, "filename", filename); g_free(filename); diff --git a/hw/char/Kconfig b/hw/char/Kconfig index 4cf36ac637b..6b6cf2fc1df 100644 --- a/hw/char/Kconfig +++ b/hw/char/Kconfig @@ -61,9 +61,13 @@ config AVR_USART config MCHP_PFSOC_MMUART bool + select SERIAL config SIFIVE_UART bool config GOLDFISH_TTY bool + +config SHAKTI_UART + bool diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c index ceb677bc5a8..c069a30842e 100644 --- a/hw/char/cadence_uart.c +++ b/hw/char/cadence_uart.c @@ -235,8 +235,18 @@ static void uart_parameters_setup(CadenceUARTState *s) static int uart_can_receive(void *opaque) { CadenceUARTState *s = opaque; - int ret = MAX(CADENCE_UART_RX_FIFO_SIZE, CADENCE_UART_TX_FIFO_SIZE); - uint32_t ch_mode = s->r[R_MR] & UART_MR_CHMODE; + int ret; + uint32_t ch_mode; + + /* ignore characters when unclocked or in reset */ + if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n", + __func__); + return 0; + } + + ret = MAX(CADENCE_UART_RX_FIFO_SIZE, CADENCE_UART_TX_FIFO_SIZE); + ch_mode = s->r[R_MR] & UART_MR_CHMODE; if (ch_mode == NORMAL_MODE || ch_mode == ECHO_MODE) { ret = MIN(ret, CADENCE_UART_RX_FIFO_SIZE - s->rx_count); @@ -288,7 +298,7 @@ static void uart_write_rx_fifo(void *opaque, const uint8_t *buf, int size) uart_update_status(s); } -static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond, +static gboolean cadence_uart_xmit(void *do_not_use, GIOCondition cond, void *opaque) { CadenceUARTState *s = opaque; @@ -353,11 +363,6 @@ static void uart_receive(void *opaque, const uint8_t *buf, int size) CadenceUARTState *s = opaque; uint32_t ch_mode = s->r[R_MR] & UART_MR_CHMODE; - /* ignore characters when unclocked or in reset */ - if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) { - return; - } - if (ch_mode == NORMAL_MODE || ch_mode == ECHO_MODE) { uart_write_rx_fifo(opaque, buf, size); } @@ -373,6 +378,8 @@ static void uart_event(void *opaque, QEMUChrEvent event) /* ignore characters when unclocked or in reset */ if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n", + __func__); return; } @@ -403,15 +410,22 @@ static void uart_read_rx_fifo(CadenceUARTState *s, uint32_t *c) uart_update_status(s); } -static void uart_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) +static MemTxResult uart_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size, MemTxAttrs attrs) { CadenceUARTState *s = opaque; + /* ignore access when unclocked or in reset */ + if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n", + __func__); + return MEMTX_ERROR; + } + DB_PRINT(" offset:%x data:%08x\n", (unsigned)offset, (unsigned)value); offset >>= 2; if (offset >= CADENCE_UART_R_MAX) { - return; + return MEMTX_DECODE_ERROR; } switch (offset) { case R_IER: /* ier (wts imr) */ @@ -458,30 +472,41 @@ static void uart_write(void *opaque, hwaddr offset, break; } uart_update_status(s); + + return MEMTX_OK; } -static uint64_t uart_read(void *opaque, hwaddr offset, - unsigned size) +static MemTxResult uart_read(void *opaque, hwaddr offset, + uint64_t *value, unsigned size, MemTxAttrs attrs) { CadenceUARTState *s = opaque; uint32_t c = 0; + /* ignore access when unclocked or in reset */ + if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n", + __func__); + return MEMTX_ERROR; + } + offset >>= 2; if (offset >= CADENCE_UART_R_MAX) { - c = 0; - } else if (offset == R_TX_RX) { + return MEMTX_DECODE_ERROR; + } + if (offset == R_TX_RX) { uart_read_rx_fifo(s, &c); } else { - c = s->r[offset]; + c = s->r[offset]; } DB_PRINT(" offset:%x data:%08x\n", (unsigned)(offset << 2), (unsigned)c); - return c; + *value = c; + return MEMTX_OK; } static const MemoryRegionOps uart_ops = { - .read = uart_read, - .write = uart_write, + .read_with_attrs = uart_read, + .write_with_attrs = uart_write, .endianness = DEVICE_NATIVE_ENDIAN, }; diff --git a/hw/char/cmsdk-apb-uart.c b/hw/char/cmsdk-apb-uart.c index ba2cbbee3d8..f8dc89ee3dc 100644 --- a/hw/char/cmsdk-apb-uart.c +++ b/hw/char/cmsdk-apb-uart.c @@ -191,7 +191,7 @@ static uint64_t uart_read(void *opaque, hwaddr offset, unsigned size) /* Try to send tx data, and arrange to be called back later if * we can't (ie the char backend is busy/blocking). */ -static gboolean uart_transmit(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean uart_transmit(void *do_not_use, GIOCondition cond, void *opaque) { CMSDKAPBUART *s = CMSDK_APB_UART(opaque); int ret; diff --git a/hw/char/escc.c b/hw/char/escc.c index 52e79782872..8755d8d34f3 100644 --- a/hw/char/escc.c +++ b/hw/char/escc.c @@ -86,12 +86,15 @@ #define W_INTR 1 #define INTR_INTALL 0x01 #define INTR_TXINT 0x02 +#define INTR_PAR_SPEC 0x04 #define INTR_RXMODEMSK 0x18 #define INTR_RXINT1ST 0x08 #define INTR_RXINTALL 0x10 +#define INTR_WTRQ_TXRX 0x20 #define W_IVEC 2 #define W_RXCTRL 3 #define RXCTRL_RXEN 0x01 +#define RXCTRL_HUNT 0x10 #define W_TXCTRL1 4 #define TXCTRL1_PAREN 0x01 #define TXCTRL1_PAREV 0x02 @@ -105,6 +108,7 @@ #define TXCTRL1_CLK64X 0xc0 #define TXCTRL1_CLKMSK 0xc0 #define W_TXCTRL2 5 +#define TXCTRL2_TXCRC 0x01 #define TXCTRL2_TXEN 0x08 #define TXCTRL2_BITMSK 0x60 #define TXCTRL2_5BITS 0x00 @@ -115,18 +119,27 @@ #define W_SYNC2 7 #define W_TXBUF 8 #define W_MINTR 9 +#define MINTR_VIS 0x01 +#define MINTR_NV 0x02 #define MINTR_STATUSHI 0x10 +#define MINTR_SOFTIACK 0x20 #define MINTR_RST_MASK 0xc0 #define MINTR_RST_B 0x40 #define MINTR_RST_A 0x80 #define MINTR_RST_ALL 0xc0 #define W_MISC1 10 +#define MISC1_ENC_MASK 0x60 #define W_CLOCK 11 #define CLOCK_TRXC 0x08 #define W_BRGLO 12 #define W_BRGHI 13 #define W_MISC2 14 -#define MISC2_PLLDIS 0x30 +#define MISC2_BRG_EN 0x01 +#define MISC2_BRG_SRC 0x02 +#define MISC2_LCL_LOOP 0x10 +#define MISC2_PLLCMD0 0x20 +#define MISC2_PLLCMD1 0x40 +#define MISC2_PLLCMD2 0x80 #define W_EXTINT 15 #define EXTINT_DCD 0x08 #define EXTINT_SYNCINT 0x10 @@ -170,6 +183,7 @@ #define R_RXBUF 8 #define R_RXCTRL 9 #define R_MISC 10 +#define MISC_2CLKMISS 0x40 #define R_MISC1 11 #define R_BRGLO 12 #define R_BRGHI 13 @@ -230,20 +244,23 @@ static uint32_t get_queue(void *opaque) q->count--; } trace_escc_get_queue(CHN_C(s), val); - if (q->count > 0) + if (q->count > 0) { serial_receive_byte(s, 0); + } return val; } static int escc_update_irq_chn(ESCCChannelState *s) { if ((((s->wregs[W_INTR] & INTR_TXINT) && (s->txint == 1)) || - // tx ints enabled, pending - ((((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINT1ST) || - ((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINTALL)) && - s->rxint == 1) || // rx ints enabled, pending - ((s->wregs[W_EXTINT] & EXTINT_BRKINT) && - (s->rregs[R_STATUS] & STATUS_BRK)))) { // break int e&p + /* tx ints enabled, pending */ + ((((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINT1ST) || + ((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINTALL)) && + s->rxint == 1) || + /* rx ints enabled, pending */ + ((s->wregs[W_EXTINT] & EXTINT_BRKINT) && + (s->rregs[R_STATUS] & STATUS_BRK)))) { + /* break int e&p */ return 1; } return 0; @@ -262,26 +279,7 @@ static void escc_update_irq(ESCCChannelState *s) static void escc_reset_chn(ESCCChannelState *s) { - int i; - s->reg = 0; - for (i = 0; i < ESCC_SERIAL_REGS; i++) { - s->rregs[i] = 0; - s->wregs[i] = 0; - } - s->wregs[W_TXCTRL1] = TXCTRL1_1STOP; // 1X divisor, 1 stop bit, no parity - s->wregs[W_MINTR] = MINTR_RST_ALL; - s->wregs[W_CLOCK] = CLOCK_TRXC; // Synch mode tx clock = TRxC - s->wregs[W_MISC2] = MISC2_PLLDIS; // PLL disabled - s->wregs[W_EXTINT] = EXTINT_DCD | EXTINT_SYNCINT | EXTINT_CTSINT | - EXTINT_TXUNDRN | EXTINT_BRKINT; // Enable most interrupts - if (s->disabled) - s->rregs[R_STATUS] = STATUS_TXEMPTY | STATUS_DCD | STATUS_SYNC | - STATUS_CTS | STATUS_TXUNDRN; - else - s->rregs[R_STATUS] = STATUS_TXEMPTY | STATUS_TXUNDRN; - s->rregs[R_SPEC] = SPEC_BITS8 | SPEC_ALLSENT; - s->rx = s->tx = 0; s->rxint = s->txint = 0; s->rxint_under_svc = s->txint_under_svc = 0; @@ -289,32 +287,110 @@ static void escc_reset_chn(ESCCChannelState *s) clear_queue(s); } +static void escc_soft_reset_chn(ESCCChannelState *s) +{ + escc_reset_chn(s); + + s->wregs[W_CMD] = 0; + s->wregs[W_INTR] &= INTR_PAR_SPEC | INTR_WTRQ_TXRX; + s->wregs[W_RXCTRL] &= ~RXCTRL_RXEN; + /* 1 stop bit */ + s->wregs[W_TXCTRL1] |= TXCTRL1_1STOP; + s->wregs[W_TXCTRL2] &= TXCTRL2_TXCRC | TXCTRL2_8BITS; + s->wregs[W_MINTR] &= ~MINTR_SOFTIACK; + s->wregs[W_MISC1] &= MISC1_ENC_MASK; + /* PLL disabled */ + s->wregs[W_MISC2] &= MISC2_BRG_EN | MISC2_BRG_SRC | + MISC2_PLLCMD1 | MISC2_PLLCMD2; + s->wregs[W_MISC2] |= MISC2_PLLCMD0; + /* Enable most interrupts */ + s->wregs[W_EXTINT] = EXTINT_DCD | EXTINT_SYNCINT | EXTINT_CTSINT | + EXTINT_TXUNDRN | EXTINT_BRKINT; + + s->rregs[R_STATUS] &= STATUS_DCD | STATUS_SYNC | STATUS_CTS | STATUS_BRK; + s->rregs[R_STATUS] |= STATUS_TXEMPTY | STATUS_TXUNDRN; + if (s->disabled) { + s->rregs[R_STATUS] |= STATUS_DCD | STATUS_SYNC | STATUS_CTS; + } + s->rregs[R_SPEC] &= SPEC_ALLSENT; + s->rregs[R_SPEC] |= SPEC_BITS8; + s->rregs[R_INTR] = 0; + s->rregs[R_MISC] &= MISC_2CLKMISS; +} + +static void escc_hard_reset_chn(ESCCChannelState *s) +{ + escc_soft_reset_chn(s); + + /* + * Hard reset is almost identical to soft reset above, except that the + * values of WR9 (W_MINTR), WR10 (W_MISC1), WR11 (W_CLOCK) and WR14 + * (W_MISC2) have extra bits forced to 0/1 + */ + s->wregs[W_MINTR] &= MINTR_VIS | MINTR_NV; + s->wregs[W_MINTR] |= MINTR_RST_B | MINTR_RST_A; + s->wregs[W_MISC1] = 0; + s->wregs[W_CLOCK] = CLOCK_TRXC; + s->wregs[W_MISC2] &= MISC2_PLLCMD1 | MISC2_PLLCMD2; + s->wregs[W_MISC2] |= MISC2_LCL_LOOP | MISC2_PLLCMD0; +} + static void escc_reset(DeviceState *d) { ESCCState *s = ESCC(d); + int i, j; + + for (i = 0; i < 2; i++) { + ESCCChannelState *cs = &s->chn[i]; - escc_reset_chn(&s->chn[0]); - escc_reset_chn(&s->chn[1]); + /* + * According to the ESCC datasheet "Miscellaneous Questions" section + * on page 384, the values of the ESCC registers are not guaranteed on + * power-on until an explicit hardware or software reset has been + * issued. For now we zero the registers so that a device reset always + * returns the emulated device to a fixed state. + */ + for (j = 0; j < ESCC_SERIAL_REGS; j++) { + cs->rregs[j] = 0; + cs->wregs[j] = 0; + } + + /* + * ...but there is an exception. The "Transmit Interrupts and Transmit + * Buffer Empty Bit" section on page 50 of the ESCC datasheet says of + * the STATUS_TXEMPTY bit in R_STATUS: "After a hardware reset + * (including a hardware reset by software), or a channel reset, this + * bit is set to 1". The Sun PROM checks this bit early on startup and + * gets stuck in an infinite loop if it is not set. + */ + cs->rregs[R_STATUS] |= STATUS_TXEMPTY; + + escc_reset_chn(cs); + } } static inline void set_rxint(ESCCChannelState *s) { s->rxint = 1; - /* XXX: missing daisy chainnig: escc_chn_b rx should have a lower priority - than chn_a rx/tx/special_condition service*/ + /* + * XXX: missing daisy chaining: escc_chn_b rx should have a lower priority + * than chn_a rx/tx/special_condition service + */ s->rxint_under_svc = 1; if (s->chn == escc_chn_a) { s->rregs[R_INTR] |= INTR_RXINTA; - if (s->wregs[W_MINTR] & MINTR_STATUSHI) + if (s->wregs[W_MINTR] & MINTR_STATUSHI) { s->otherchn->rregs[R_IVEC] = IVEC_HIRXINTA; - else + } else { s->otherchn->rregs[R_IVEC] = IVEC_LORXINTA; + } } else { s->otherchn->rregs[R_INTR] |= INTR_RXINTB; - if (s->wregs[W_MINTR] & MINTR_STATUSHI) + if (s->wregs[W_MINTR] & MINTR_STATUSHI) { s->rregs[R_IVEC] = IVEC_HIRXINTB; - else + } else { s->rregs[R_IVEC] = IVEC_LORXINTB; + } } escc_update_irq(s); } @@ -328,17 +404,18 @@ static inline void set_txint(ESCCChannelState *s) if (s->wregs[W_INTR] & INTR_TXINT) { s->rregs[R_INTR] |= INTR_TXINTA; } - if (s->wregs[W_MINTR] & MINTR_STATUSHI) + if (s->wregs[W_MINTR] & MINTR_STATUSHI) { s->otherchn->rregs[R_IVEC] = IVEC_HITXINTA; - else + } else { s->otherchn->rregs[R_IVEC] = IVEC_LOTXINTA; + } } else { s->rregs[R_IVEC] = IVEC_TXINTB; if (s->wregs[W_INTR] & INTR_TXINT) { s->otherchn->rregs[R_INTR] |= INTR_TXINTB; } } - escc_update_irq(s); + escc_update_irq(s); } } @@ -347,20 +424,23 @@ static inline void clr_rxint(ESCCChannelState *s) s->rxint = 0; s->rxint_under_svc = 0; if (s->chn == escc_chn_a) { - if (s->wregs[W_MINTR] & MINTR_STATUSHI) + if (s->wregs[W_MINTR] & MINTR_STATUSHI) { s->otherchn->rregs[R_IVEC] = IVEC_HINOINT; - else + } else { s->otherchn->rregs[R_IVEC] = IVEC_LONOINT; + } s->rregs[R_INTR] &= ~INTR_RXINTA; } else { - if (s->wregs[W_MINTR] & MINTR_STATUSHI) + if (s->wregs[W_MINTR] & MINTR_STATUSHI) { s->rregs[R_IVEC] = IVEC_HINOINT; - else + } else { s->rregs[R_IVEC] = IVEC_LONOINT; + } s->otherchn->rregs[R_INTR] &= ~INTR_RXINTB; } - if (s->txint) + if (s->txint) { set_txint(s); + } escc_update_irq(s); } @@ -369,21 +449,24 @@ static inline void clr_txint(ESCCChannelState *s) s->txint = 0; s->txint_under_svc = 0; if (s->chn == escc_chn_a) { - if (s->wregs[W_MINTR] & MINTR_STATUSHI) + if (s->wregs[W_MINTR] & MINTR_STATUSHI) { s->otherchn->rregs[R_IVEC] = IVEC_HINOINT; - else + } else { s->otherchn->rregs[R_IVEC] = IVEC_LONOINT; + } s->rregs[R_INTR] &= ~INTR_TXINTA; } else { s->otherchn->rregs[R_INTR] &= ~INTR_TXINTB; - if (s->wregs[W_MINTR] & MINTR_STATUSHI) + if (s->wregs[W_MINTR] & MINTR_STATUSHI) { s->rregs[R_IVEC] = IVEC_HINOINT; - else + } else { s->rregs[R_IVEC] = IVEC_LONOINT; + } s->otherchn->rregs[R_INTR] &= ~INTR_TXINTB; } - if (s->rxint) + if (s->rxint) { set_rxint(s); + } escc_update_irq(s); } @@ -392,21 +475,24 @@ static void escc_update_parameters(ESCCChannelState *s) int speed, parity, data_bits, stop_bits; QEMUSerialSetParams ssp; - if (!qemu_chr_fe_backend_connected(&s->chr) || s->type != escc_serial) + if (!qemu_chr_fe_backend_connected(&s->chr) || s->type != escc_serial) { return; + } if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREN) { - if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREV) + if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREV) { parity = 'E'; - else + } else { parity = 'O'; + } } else { parity = 'N'; } - if ((s->wregs[W_TXCTRL1] & TXCTRL1_STPMSK) == TXCTRL1_2STOP) + if ((s->wregs[W_TXCTRL1] & TXCTRL1_STPMSK) == TXCTRL1_2STOP) { stop_bits = 2; - else + } else { stop_bits = 1; + } switch (s->wregs[W_TXCTRL2] & TXCTRL2_BITMSK) { case TXCTRL2_5BITS: data_bits = 5; @@ -487,13 +573,33 @@ static void escc_mem_write(void *opaque, hwaddr addr, break; } break; - case W_INTR ... W_RXCTRL: + case W_RXCTRL: + s->wregs[s->reg] = val; + if (val & RXCTRL_HUNT) { + s->rregs[R_STATUS] |= STATUS_SYNC; + } + break; + case W_INTR ... W_IVEC: case W_SYNC1 ... W_TXBUF: case W_MISC1 ... W_CLOCK: case W_MISC2 ... W_EXTINT: s->wregs[s->reg] = val; break; case W_TXCTRL1: + s->wregs[s->reg] = val; + /* + * The ESCC datasheet states that SPEC_ALLSENT is always set in + * sync mode, and set in async mode when all characters have + * cleared the transmitter. Since writes to SERIAL_DATA use the + * blocking qemu_chr_fe_write_all() function to write each + * character, the guest can never see the state when async data + * is in the process of being transmitted so we can set this bit + * unconditionally regardless of the state of the W_TXCTRL1 mode + * bits. + */ + s->rregs[R_SPEC] |= SPEC_ALLSENT; + escc_update_parameters(s); + break; case W_TXCTRL2: s->wregs[s->reg] = val; escc_update_parameters(s); @@ -510,23 +616,28 @@ static void escc_mem_write(void *opaque, hwaddr addr, default: break; case MINTR_RST_B: - escc_reset_chn(&serial->chn[0]); + trace_escc_soft_reset_chn(CHN_C(&serial->chn[0])); + escc_soft_reset_chn(&serial->chn[0]); return; case MINTR_RST_A: - escc_reset_chn(&serial->chn[1]); + trace_escc_soft_reset_chn(CHN_C(&serial->chn[1])); + escc_soft_reset_chn(&serial->chn[1]); return; case MINTR_RST_ALL: - escc_reset(DEVICE(serial)); + trace_escc_hard_reset(); + escc_hard_reset_chn(&serial->chn[0]); + escc_hard_reset_chn(&serial->chn[1]); return; } break; default: break; } - if (s->reg == 0) + if (s->reg == 0) { s->reg = newreg; - else + } else { s->reg = 0; + } break; case SERIAL_DATA: trace_escc_mem_writeb_data(CHN_C(s), val); @@ -538,17 +649,19 @@ static void escc_mem_write(void *opaque, hwaddr addr, s->txint = 0; escc_update_irq(s); s->tx = val; - if (s->wregs[W_TXCTRL2] & TXCTRL2_TXEN) { // tx enabled + if (s->wregs[W_TXCTRL2] & TXCTRL2_TXEN) { /* tx enabled */ if (qemu_chr_fe_backend_connected(&s->chr)) { - /* XXX this blocks entire thread. Rewrite to use - * qemu_chr_fe_write and background I/O callbacks */ + /* + * XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks + */ qemu_chr_fe_write_all(&s->chr, &s->tx, 1); } else if (s->type == escc_kbd && !s->disabled) { handle_kbd_command(s, val); } } - s->rregs[R_STATUS] |= STATUS_TXEMPTY; // Tx buffer empty - s->rregs[R_SPEC] |= SPEC_ALLSENT; // All sent + s->rregs[R_STATUS] |= STATUS_TXEMPTY; /* Tx buffer empty */ + s->rregs[R_SPEC] |= SPEC_ALLSENT; /* All sent */ set_txint(s); break; default: @@ -606,12 +719,13 @@ static int serial_can_receive(void *opaque) ESCCChannelState *s = opaque; int ret; - if (((s->wregs[W_RXCTRL] & RXCTRL_RXEN) == 0) // Rx not enabled - || ((s->rregs[R_STATUS] & STATUS_RXAV) == STATUS_RXAV)) - // char already available + if (((s->wregs[W_RXCTRL] & RXCTRL_RXEN) == 0) /* Rx not enabled */ + || ((s->rregs[R_STATUS] & STATUS_RXAV) == STATUS_RXAV)) { + /* char already available */ ret = 0; - else + } else { ret = 1; + } return ret; } @@ -638,12 +752,13 @@ static void serial_receive1(void *opaque, const uint8_t *buf, int size) static void serial_event(void *opaque, QEMUChrEvent event) { ESCCChannelState *s = opaque; - if (event == CHR_EVENT_BREAK) + if (event == CHR_EVENT_BREAK) { serial_receive_break(s); + } } static const VMStateDescription vmstate_escc_chn = { - .name ="escc_chn", + .name = "escc_chn", .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { @@ -662,7 +777,7 @@ static const VMStateDescription vmstate_escc_chn = { }; static const VMStateDescription vmstate_escc = { - .name ="escc", + .name = "escc", .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { @@ -734,21 +849,21 @@ static QemuInputHandler sunkbd_handler = { static void handle_kbd_command(ESCCChannelState *s, int val) { trace_escc_kbd_command(val); - if (s->led_mode) { // Ignore led byte + if (s->led_mode) { /* Ignore led byte */ s->led_mode = 0; return; } switch (val) { - case 1: // Reset, return type code + case 1: /* Reset, return type code */ clear_queue(s); put_queue(s, 0xff); - put_queue(s, 4); // Type 4 + put_queue(s, 4); /* Type 4 */ put_queue(s, 0x7f); break; - case 0xe: // Set leds + case 0xe: /* Set leds */ s->led_mode = 1; break; - case 7: // Query layout + case 7: /* Query layout */ case 0xf: clear_queue(s); put_queue(s, 0xfe); @@ -768,34 +883,39 @@ static void sunmouse_event(void *opaque, trace_escc_sunmouse_event(dx, dy, buttons_state); ch = 0x80 | 0x7; /* protocol start byte, no buttons pressed */ - if (buttons_state & MOUSE_EVENT_LBUTTON) + if (buttons_state & MOUSE_EVENT_LBUTTON) { ch ^= 0x4; - if (buttons_state & MOUSE_EVENT_MBUTTON) + } + if (buttons_state & MOUSE_EVENT_MBUTTON) { ch ^= 0x2; - if (buttons_state & MOUSE_EVENT_RBUTTON) + } + if (buttons_state & MOUSE_EVENT_RBUTTON) { ch ^= 0x1; + } put_queue(s, ch); ch = dx; - if (ch > 127) + if (ch > 127) { ch = 127; - else if (ch < -127) + } else if (ch < -127) { ch = -127; + } put_queue(s, ch & 0xff); ch = -dy; - if (ch > 127) + if (ch > 127) { ch = 127; - else if (ch < -127) + } else if (ch < -127) { ch = -127; + } put_queue(s, ch & 0xff); - // MSC protocol specify two extra motion bytes + /* MSC protocol specifies two extra motion bytes */ put_queue(s, 0); put_queue(s, 0); diff --git a/hw/char/goldfish_tty.c b/hw/char/goldfish_tty.c index 8365a187614..20b77885c18 100644 --- a/hw/char/goldfish_tty.c +++ b/hw/char/goldfish_tty.c @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * Goldfish TTY * diff --git a/hw/char/ibex_uart.c b/hw/char/ibex_uart.c index 73b8f2e45be..e58181fcf42 100644 --- a/hw/char/ibex_uart.c +++ b/hw/char/ibex_uart.c @@ -35,6 +35,44 @@ #include "qemu/log.h" #include "qemu/module.h" +REG32(INTR_STATE, 0x00) + FIELD(INTR_STATE, TX_WATERMARK, 0, 1) + FIELD(INTR_STATE, RX_WATERMARK, 1, 1) + FIELD(INTR_STATE, TX_EMPTY, 2, 1) + FIELD(INTR_STATE, RX_OVERFLOW, 3, 1) +REG32(INTR_ENABLE, 0x04) +REG32(INTR_TEST, 0x08) +REG32(ALERT_TEST, 0x0C) +REG32(CTRL, 0x10) + FIELD(CTRL, TX_ENABLE, 0, 1) + FIELD(CTRL, RX_ENABLE, 1, 1) + FIELD(CTRL, NF, 2, 1) + FIELD(CTRL, SLPBK, 4, 1) + FIELD(CTRL, LLPBK, 5, 1) + FIELD(CTRL, PARITY_EN, 6, 1) + FIELD(CTRL, PARITY_ODD, 7, 1) + FIELD(CTRL, RXBLVL, 8, 2) + FIELD(CTRL, NCO, 16, 16) +REG32(STATUS, 0x14) + FIELD(STATUS, TXFULL, 0, 1) + FIELD(STATUS, RXFULL, 1, 1) + FIELD(STATUS, TXEMPTY, 2, 1) + FIELD(STATUS, RXIDLE, 4, 1) + FIELD(STATUS, RXEMPTY, 5, 1) +REG32(RDATA, 0x18) +REG32(WDATA, 0x1C) +REG32(FIFO_CTRL, 0x20) + FIELD(FIFO_CTRL, RXRST, 0, 1) + FIELD(FIFO_CTRL, TXRST, 1, 1) + FIELD(FIFO_CTRL, RXILVL, 2, 3) + FIELD(FIFO_CTRL, TXILVL, 5, 2) +REG32(FIFO_STATUS, 0x24) + FIELD(FIFO_STATUS, TXLVL, 0, 5) + FIELD(FIFO_STATUS, RXLVL, 16, 5) +REG32(OVRD, 0x28) +REG32(VAL, 0x2C) +REG32(TIMEOUT_CTRL, 0x30) + static void ibex_uart_update_irqs(IbexUartState *s) { if (s->uart_intr_state & s->uart_intr_enable & R_INTR_STATE_TX_WATERMARK_MASK) { @@ -97,7 +135,7 @@ static void ibex_uart_receive(void *opaque, const uint8_t *buf, int size) ibex_uart_update_irqs(s); } -static gboolean ibex_uart_xmit(GIOChannel *chan, GIOCondition cond, +static gboolean ibex_uart_xmit(void *do_not_use, GIOCondition cond, void *opaque) { IbexUartState *s = opaque; @@ -512,6 +550,7 @@ static void ibex_uart_class_init(ObjectClass *klass, void *data) dc->realize = ibex_uart_realize; dc->vmsd = &vmstate_ibex_uart; device_class_set_props(dc, ibex_uart_properties); + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } static const TypeInfo ibex_uart_info = { diff --git a/hw/char/lm32_juart.c b/hw/char/lm32_juart.c deleted file mode 100644 index ce302796506..00000000000 --- a/hw/char/lm32_juart.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * LatticeMico32 JTAG UART model. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "qemu/module.h" -#include "trace.h" -#include "chardev/char-fe.h" - -#include "hw/char/lm32_juart.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-properties-system.h" -#include "qom/object.h" - -enum { - LM32_JUART_MIN_SAVE_VERSION = 0, - LM32_JUART_CURRENT_SAVE_VERSION = 0, - LM32_JUART_MAX_SAVE_VERSION = 0, -}; - -enum { - JTX_FULL = (1<<8), -}; - -enum { - JRX_FULL = (1<<8), -}; - -OBJECT_DECLARE_SIMPLE_TYPE(LM32JuartState, LM32_JUART) - -struct LM32JuartState { - SysBusDevice parent_obj; - - CharBackend chr; - - uint32_t jtx; - uint32_t jrx; -}; - -uint32_t lm32_juart_get_jtx(DeviceState *d) -{ - LM32JuartState *s = LM32_JUART(d); - - trace_lm32_juart_get_jtx(s->jtx); - return s->jtx; -} - -uint32_t lm32_juart_get_jrx(DeviceState *d) -{ - LM32JuartState *s = LM32_JUART(d); - - trace_lm32_juart_get_jrx(s->jrx); - return s->jrx; -} - -void lm32_juart_set_jtx(DeviceState *d, uint32_t jtx) -{ - LM32JuartState *s = LM32_JUART(d); - unsigned char ch = jtx & 0xff; - - trace_lm32_juart_set_jtx(s->jtx); - - s->jtx = jtx; - /* XXX this blocks entire thread. Rewrite to use - * qemu_chr_fe_write and background I/O callbacks */ - qemu_chr_fe_write_all(&s->chr, &ch, 1); -} - -void lm32_juart_set_jrx(DeviceState *d, uint32_t jtx) -{ - LM32JuartState *s = LM32_JUART(d); - - trace_lm32_juart_set_jrx(s->jrx); - s->jrx &= ~JRX_FULL; -} - -static void juart_rx(void *opaque, const uint8_t *buf, int size) -{ - LM32JuartState *s = opaque; - - s->jrx = *buf | JRX_FULL; -} - -static int juart_can_rx(void *opaque) -{ - LM32JuartState *s = opaque; - - return !(s->jrx & JRX_FULL); -} - -static void juart_event(void *opaque, QEMUChrEvent event) -{ -} - -static void juart_reset(DeviceState *d) -{ - LM32JuartState *s = LM32_JUART(d); - - s->jtx = 0; - s->jrx = 0; -} - -static void lm32_juart_realize(DeviceState *dev, Error **errp) -{ - LM32JuartState *s = LM32_JUART(dev); - - qemu_chr_fe_set_handlers(&s->chr, juart_can_rx, juart_rx, - juart_event, NULL, s, NULL, true); -} - -static const VMStateDescription vmstate_lm32_juart = { - .name = "lm32-juart", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32(jtx, LM32JuartState), - VMSTATE_UINT32(jrx, LM32JuartState), - VMSTATE_END_OF_LIST() - } -}; - -static Property lm32_juart_properties[] = { - DEFINE_PROP_CHR("chardev", LM32JuartState, chr), - DEFINE_PROP_END_OF_LIST(), -}; - -static void lm32_juart_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = juart_reset; - dc->vmsd = &vmstate_lm32_juart; - device_class_set_props(dc, lm32_juart_properties); - dc->realize = lm32_juart_realize; -} - -static const TypeInfo lm32_juart_info = { - .name = TYPE_LM32_JUART, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(LM32JuartState), - .class_init = lm32_juart_class_init, -}; - -static void lm32_juart_register_types(void) -{ - type_register_static(&lm32_juart_info); -} - -type_init(lm32_juart_register_types) diff --git a/hw/char/lm32_uart.c b/hw/char/lm32_uart.c deleted file mode 100644 index d8e03313111..00000000000 --- a/hw/char/lm32_uart.c +++ /dev/null @@ -1,314 +0,0 @@ -/* - * QEMU model of the LatticeMico32 UART block. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://www.latticesemi.com/documents/mico32uart.pdf - */ - - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-properties-system.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "chardev/char-fe.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -enum { - R_RXTX = 0, - R_IER, - R_IIR, - R_LCR, - R_MCR, - R_LSR, - R_MSR, - R_DIV, - R_MAX -}; - -enum { - IER_RBRI = (1<<0), - IER_THRI = (1<<1), - IER_RLSI = (1<<2), - IER_MSI = (1<<3), -}; - -enum { - IIR_STAT = (1<<0), - IIR_ID0 = (1<<1), - IIR_ID1 = (1<<2), -}; - -enum { - LCR_WLS0 = (1<<0), - LCR_WLS1 = (1<<1), - LCR_STB = (1<<2), - LCR_PEN = (1<<3), - LCR_EPS = (1<<4), - LCR_SP = (1<<5), - LCR_SB = (1<<6), -}; - -enum { - MCR_DTR = (1<<0), - MCR_RTS = (1<<1), -}; - -enum { - LSR_DR = (1<<0), - LSR_OE = (1<<1), - LSR_PE = (1<<2), - LSR_FE = (1<<3), - LSR_BI = (1<<4), - LSR_THRE = (1<<5), - LSR_TEMT = (1<<6), -}; - -enum { - MSR_DCTS = (1<<0), - MSR_DDSR = (1<<1), - MSR_TERI = (1<<2), - MSR_DDCD = (1<<3), - MSR_CTS = (1<<4), - MSR_DSR = (1<<5), - MSR_RI = (1<<6), - MSR_DCD = (1<<7), -}; - -#define TYPE_LM32_UART "lm32-uart" -OBJECT_DECLARE_SIMPLE_TYPE(LM32UartState, LM32_UART) - -struct LM32UartState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - CharBackend chr; - qemu_irq irq; - - uint32_t regs[R_MAX]; -}; - -static void uart_update_irq(LM32UartState *s) -{ - unsigned int irq; - - if ((s->regs[R_LSR] & (LSR_OE | LSR_PE | LSR_FE | LSR_BI)) - && (s->regs[R_IER] & IER_RLSI)) { - irq = 1; - s->regs[R_IIR] = IIR_ID1 | IIR_ID0; - } else if ((s->regs[R_LSR] & LSR_DR) && (s->regs[R_IER] & IER_RBRI)) { - irq = 1; - s->regs[R_IIR] = IIR_ID1; - } else if ((s->regs[R_LSR] & LSR_THRE) && (s->regs[R_IER] & IER_THRI)) { - irq = 1; - s->regs[R_IIR] = IIR_ID0; - } else if ((s->regs[R_MSR] & 0x0f) && (s->regs[R_IER] & IER_MSI)) { - irq = 1; - s->regs[R_IIR] = 0; - } else { - irq = 0; - s->regs[R_IIR] = IIR_STAT; - } - - trace_lm32_uart_irq_state(irq); - qemu_set_irq(s->irq, irq); -} - -static uint64_t uart_read(void *opaque, hwaddr addr, - unsigned size) -{ - LM32UartState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_RXTX: - r = s->regs[R_RXTX]; - s->regs[R_LSR] &= ~LSR_DR; - uart_update_irq(s); - qemu_chr_fe_accept_input(&s->chr); - break; - case R_IIR: - case R_LSR: - case R_MSR: - r = s->regs[addr]; - break; - case R_IER: - case R_LCR: - case R_MCR: - case R_DIV: - error_report("lm32_uart: read access to write only register 0x" - TARGET_FMT_plx, addr << 2); - break; - default: - error_report("lm32_uart: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_lm32_uart_memory_read(addr << 2, r); - return r; -} - -static void uart_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size) -{ - LM32UartState *s = opaque; - unsigned char ch = value; - - trace_lm32_uart_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_RXTX: - /* XXX this blocks entire thread. Rewrite to use - * qemu_chr_fe_write and background I/O callbacks */ - qemu_chr_fe_write_all(&s->chr, &ch, 1); - break; - case R_IER: - case R_LCR: - case R_MCR: - case R_DIV: - s->regs[addr] = value; - break; - case R_IIR: - case R_LSR: - case R_MSR: - error_report("lm32_uart: write access to read only register 0x" - TARGET_FMT_plx, addr << 2); - break; - default: - error_report("lm32_uart: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - uart_update_irq(s); -} - -static const MemoryRegionOps uart_ops = { - .read = uart_read, - .write = uart_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, -}; - -static void uart_rx(void *opaque, const uint8_t *buf, int size) -{ - LM32UartState *s = opaque; - - if (s->regs[R_LSR] & LSR_DR) { - s->regs[R_LSR] |= LSR_OE; - } - - s->regs[R_LSR] |= LSR_DR; - s->regs[R_RXTX] = *buf; - - uart_update_irq(s); -} - -static int uart_can_rx(void *opaque) -{ - LM32UartState *s = opaque; - - return !(s->regs[R_LSR] & LSR_DR); -} - -static void uart_event(void *opaque, QEMUChrEvent event) -{ -} - -static void uart_reset(DeviceState *d) -{ - LM32UartState *s = LM32_UART(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - - /* defaults */ - s->regs[R_LSR] = LSR_THRE | LSR_TEMT; -} - -static void lm32_uart_init(Object *obj) -{ - LM32UartState *s = LM32_UART(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); - - sysbus_init_irq(dev, &s->irq); - - memory_region_init_io(&s->iomem, obj, &uart_ops, s, - "uart", R_MAX * 4); - sysbus_init_mmio(dev, &s->iomem); -} - -static void lm32_uart_realize(DeviceState *dev, Error **errp) -{ - LM32UartState *s = LM32_UART(dev); - - qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, - uart_event, NULL, s, NULL, true); -} - -static const VMStateDescription vmstate_lm32_uart = { - .name = "lm32-uart", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, LM32UartState, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static Property lm32_uart_properties[] = { - DEFINE_PROP_CHR("chardev", LM32UartState, chr), - DEFINE_PROP_END_OF_LIST(), -}; - -static void lm32_uart_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = uart_reset; - dc->vmsd = &vmstate_lm32_uart; - device_class_set_props(dc, lm32_uart_properties); - dc->realize = lm32_uart_realize; -} - -static const TypeInfo lm32_uart_info = { - .name = TYPE_LM32_UART, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(LM32UartState), - .instance_init = lm32_uart_init, - .class_init = lm32_uart_class_init, -}; - -static void lm32_uart_register_types(void) -{ - type_register_static(&lm32_uart_info); -} - -type_init(lm32_uart_register_types) diff --git a/hw/char/mchp_pfsoc_mmuart.c b/hw/char/mchp_pfsoc_mmuart.c index 8a002b0a19f..22f3e78eb9e 100644 --- a/hw/char/mchp_pfsoc_mmuart.c +++ b/hw/char/mchp_pfsoc_mmuart.c @@ -22,21 +22,25 @@ #include "qemu/osdep.h" #include "qemu/log.h" -#include "chardev/char.h" -#include "exec/address-spaces.h" +#include "qapi/error.h" +#include "migration/vmstate.h" #include "hw/char/mchp_pfsoc_mmuart.h" +#include "hw/qdev-properties.h" + +#define REGS_OFFSET 0x20 static uint64_t mchp_pfsoc_mmuart_read(void *opaque, hwaddr addr, unsigned size) { MchpPfSoCMMUartState *s = opaque; - if (addr >= MCHP_PFSOC_MMUART_REG_SIZE) { + addr >>= 2; + if (addr >= MCHP_PFSOC_MMUART_REG_COUNT) { qemu_log_mask(LOG_GUEST_ERROR, "%s: read: addr=0x%" HWADDR_PRIx "\n", - __func__, addr); + __func__, addr << 2); return 0; } - return s->reg[addr / sizeof(uint32_t)]; + return s->reg[addr]; } static void mchp_pfsoc_mmuart_write(void *opaque, hwaddr addr, @@ -45,13 +49,14 @@ static void mchp_pfsoc_mmuart_write(void *opaque, hwaddr addr, MchpPfSoCMMUartState *s = opaque; uint32_t val32 = (uint32_t)value; - if (addr >= MCHP_PFSOC_MMUART_REG_SIZE) { + addr >>= 2; + if (addr >= MCHP_PFSOC_MMUART_REG_COUNT) { qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write: addr=0x%" HWADDR_PRIx - " v=0x%x\n", __func__, addr, val32); + " v=0x%x\n", __func__, addr << 2, val32); return; } - s->reg[addr / sizeof(uint32_t)] = val32; + s->reg[addr] = val32; } static const MemoryRegionOps mchp_pfsoc_mmuart_ops = { @@ -64,23 +69,95 @@ static const MemoryRegionOps mchp_pfsoc_mmuart_ops = { }, }; -MchpPfSoCMMUartState *mchp_pfsoc_mmuart_create(MemoryRegion *sysmem, - hwaddr base, qemu_irq irq, Chardev *chr) +static void mchp_pfsoc_mmuart_reset(DeviceState *dev) +{ + MchpPfSoCMMUartState *s = MCHP_PFSOC_UART(dev); + + memset(s->reg, 0, sizeof(s->reg)); + device_cold_reset(DEVICE(&s->serial_mm)); +} + +static void mchp_pfsoc_mmuart_init(Object *obj) +{ + MchpPfSoCMMUartState *s = MCHP_PFSOC_UART(obj); + + object_initialize_child(obj, "serial-mm", &s->serial_mm, TYPE_SERIAL_MM); + object_property_add_alias(obj, "chardev", OBJECT(&s->serial_mm), "chardev"); +} + +static void mchp_pfsoc_mmuart_realize(DeviceState *dev, Error **errp) { - MchpPfSoCMMUartState *s; + MchpPfSoCMMUartState *s = MCHP_PFSOC_UART(dev); + + qdev_prop_set_uint8(DEVICE(&s->serial_mm), "regshift", 2); + qdev_prop_set_uint32(DEVICE(&s->serial_mm), "baudbase", 399193); + qdev_prop_set_uint8(DEVICE(&s->serial_mm), "endianness", + DEVICE_LITTLE_ENDIAN); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->serial_mm), errp)) { + return; + } + + sysbus_pass_irq(SYS_BUS_DEVICE(dev), SYS_BUS_DEVICE(&s->serial_mm)); - s = g_new0(MchpPfSoCMMUartState, 1); + memory_region_init(&s->container, OBJECT(s), "mchp.pfsoc.mmuart", 0x1000); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container); - memory_region_init_io(&s->iomem, NULL, &mchp_pfsoc_mmuart_ops, s, - "mchp.pfsoc.mmuart", 0x1000); + memory_region_add_subregion(&s->container, 0, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->serial_mm), 0)); + + memory_region_init_io(&s->iomem, OBJECT(s), &mchp_pfsoc_mmuart_ops, s, + "mchp.pfsoc.mmuart.regs", 0x1000 - REGS_OFFSET); + memory_region_add_subregion(&s->container, REGS_OFFSET, &s->iomem); +} - s->base = base; - s->irq = irq; +static const VMStateDescription mchp_pfsoc_mmuart_vmstate = { + .name = "mchp.pfsoc.uart", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(reg, MchpPfSoCMMUartState, + MCHP_PFSOC_MMUART_REG_COUNT), + VMSTATE_END_OF_LIST() + } +}; + +static void mchp_pfsoc_mmuart_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = mchp_pfsoc_mmuart_realize; + dc->reset = mchp_pfsoc_mmuart_reset; + dc->vmsd = &mchp_pfsoc_mmuart_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); +} + +static const TypeInfo mchp_pfsoc_mmuart_info = { + .name = TYPE_MCHP_PFSOC_UART, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(MchpPfSoCMMUartState), + .instance_init = mchp_pfsoc_mmuart_init, + .class_init = mchp_pfsoc_mmuart_class_init, +}; + +static void mchp_pfsoc_mmuart_register_types(void) +{ + type_register_static(&mchp_pfsoc_mmuart_info); +} + +type_init(mchp_pfsoc_mmuart_register_types) + +MchpPfSoCMMUartState *mchp_pfsoc_mmuart_create(MemoryRegion *sysmem, + hwaddr base, + qemu_irq irq, Chardev *chr) +{ + DeviceState *dev = qdev_new(TYPE_MCHP_PFSOC_UART); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - s->serial = serial_mm_init(sysmem, base, 2, irq, 399193, chr, - DEVICE_LITTLE_ENDIAN); + qdev_prop_set_chr(dev, "chardev", chr); + sysbus_realize(sbd, &error_fatal); - memory_region_add_subregion(sysmem, base + 0x20, &s->iomem); + memory_region_add_subregion(sysmem, base, sysbus_mmio_get_region(sbd, 0)); + sysbus_connect_irq(sbd, 0, irq); - return s; + return MCHP_PFSOC_UART(dev); } diff --git a/hw/char/meson.build b/hw/char/meson.build index da5bb8b762e..7b594f51b86 100644 --- a/hw/char/meson.build +++ b/hw/char/meson.build @@ -8,9 +8,6 @@ softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_serial.c')) softmmu_ss.add(when: 'CONFIG_IPACK', if_true: files('ipoctal232.c')) softmmu_ss.add(when: 'CONFIG_ISA_BUS', if_true: files('parallel-isa.c')) softmmu_ss.add(when: 'CONFIG_ISA_DEBUG', if_true: files('debugcon.c')) -softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_juart.c')) -softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_uart.c')) -softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-uart.c')) softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_uart.c')) softmmu_ss.add(when: 'CONFIG_PARALLEL', if_true: files('parallel.c')) softmmu_ss.add(when: 'CONFIG_PL011', if_true: files('pl011.c')) @@ -19,6 +16,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL', if_true: files('serial.c')) softmmu_ss.add(when: 'CONFIG_SERIAL_ISA', if_true: files('serial-isa.c')) softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c')) softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c')) +softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c')) softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c')) softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c')) softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c')) diff --git a/hw/char/milkymist-uart.c b/hw/char/milkymist-uart.c deleted file mode 100644 index cb1b3470ad5..00000000000 --- a/hw/char/milkymist-uart.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * QEMU model of the Milkymist UART block. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/uart.pdf - */ - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-properties-system.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "chardev/char-fe.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -enum { - R_RXTX = 0, - R_DIV, - R_STAT, - R_CTRL, - R_DBG, - R_MAX -}; - -enum { - STAT_THRE = (1<<0), - STAT_RX_EVT = (1<<1), - STAT_TX_EVT = (1<<2), -}; - -enum { - CTRL_RX_IRQ_EN = (1<<0), - CTRL_TX_IRQ_EN = (1<<1), - CTRL_THRU_EN = (1<<2), -}; - -enum { - DBG_BREAK_EN = (1<<0), -}; - -#define TYPE_MILKYMIST_UART "milkymist-uart" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistUartState, MILKYMIST_UART) - -struct MilkymistUartState { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - CharBackend chr; - qemu_irq irq; - - uint32_t regs[R_MAX]; -}; - -static void uart_update_irq(MilkymistUartState *s) -{ - int rx_event = s->regs[R_STAT] & STAT_RX_EVT; - int tx_event = s->regs[R_STAT] & STAT_TX_EVT; - int rx_irq_en = s->regs[R_CTRL] & CTRL_RX_IRQ_EN; - int tx_irq_en = s->regs[R_CTRL] & CTRL_TX_IRQ_EN; - - if ((rx_irq_en && rx_event) || (tx_irq_en && tx_event)) { - trace_milkymist_uart_raise_irq(); - qemu_irq_raise(s->irq); - } else { - trace_milkymist_uart_lower_irq(); - qemu_irq_lower(s->irq); - } -} - -static uint64_t uart_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistUartState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_RXTX: - r = s->regs[addr]; - break; - case R_DIV: - case R_STAT: - case R_CTRL: - case R_DBG: - r = s->regs[addr]; - break; - - default: - error_report("milkymist_uart: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_uart_memory_read(addr << 2, r); - - return r; -} - -static void uart_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistUartState *s = opaque; - unsigned char ch = value; - - trace_milkymist_uart_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_RXTX: - qemu_chr_fe_write_all(&s->chr, &ch, 1); - s->regs[R_STAT] |= STAT_TX_EVT; - break; - case R_DIV: - case R_CTRL: - case R_DBG: - s->regs[addr] = value; - break; - - case R_STAT: - /* write one to clear bits */ - s->regs[addr] &= ~(value & (STAT_RX_EVT | STAT_TX_EVT)); - qemu_chr_fe_accept_input(&s->chr); - break; - - default: - error_report("milkymist_uart: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - uart_update_irq(s); -} - -static const MemoryRegionOps uart_mmio_ops = { - .read = uart_read, - .write = uart_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void uart_rx(void *opaque, const uint8_t *buf, int size) -{ - MilkymistUartState *s = opaque; - - assert(!(s->regs[R_STAT] & STAT_RX_EVT)); - - s->regs[R_STAT] |= STAT_RX_EVT; - s->regs[R_RXTX] = *buf; - - uart_update_irq(s); -} - -static int uart_can_rx(void *opaque) -{ - MilkymistUartState *s = opaque; - - return !(s->regs[R_STAT] & STAT_RX_EVT); -} - -static void uart_event(void *opaque, QEMUChrEvent event) -{ -} - -static void milkymist_uart_reset(DeviceState *d) -{ - MilkymistUartState *s = MILKYMIST_UART(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - - /* THRE is always set */ - s->regs[R_STAT] = STAT_THRE; -} - -static void milkymist_uart_realize(DeviceState *dev, Error **errp) -{ - MilkymistUartState *s = MILKYMIST_UART(dev); - - qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, - uart_event, NULL, s, NULL, true); -} - -static void milkymist_uart_init(Object *obj) -{ - SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - MilkymistUartState *s = MILKYMIST_UART(obj); - - sysbus_init_irq(sbd, &s->irq); - - memory_region_init_io(&s->regs_region, OBJECT(s), &uart_mmio_ops, s, - "milkymist-uart", R_MAX * 4); - sysbus_init_mmio(sbd, &s->regs_region); -} - -static const VMStateDescription vmstate_milkymist_uart = { - .name = "milkymist-uart", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistUartState, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static Property milkymist_uart_properties[] = { - DEFINE_PROP_CHR("chardev", MilkymistUartState, chr), - DEFINE_PROP_END_OF_LIST(), -}; - -static void milkymist_uart_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_uart_realize; - dc->reset = milkymist_uart_reset; - dc->vmsd = &vmstate_milkymist_uart; - device_class_set_props(dc, milkymist_uart_properties); -} - -static const TypeInfo milkymist_uart_info = { - .name = TYPE_MILKYMIST_UART, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistUartState), - .instance_init = milkymist_uart_init, - .class_init = milkymist_uart_class_init, -}; - -static void milkymist_uart_register_types(void) -{ - type_register_static(&milkymist_uart_info); -} - -type_init(milkymist_uart_register_types) diff --git a/hw/char/nrf51_uart.c b/hw/char/nrf51_uart.c index 045ca5fa40d..3c6f982de97 100644 --- a/hw/char/nrf51_uart.c +++ b/hw/char/nrf51_uart.c @@ -75,7 +75,7 @@ static uint64_t uart_read(void *opaque, hwaddr addr, unsigned int size) return r; } -static gboolean uart_transmit(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean uart_transmit(void *do_not_use, GIOCondition cond, void *opaque) { NRF51UARTState *s = NRF51_UART(opaque); int r; diff --git a/hw/char/pl011.c b/hw/char/pl011.c index dc85527a5f9..6e2d7f75095 100644 --- a/hw/char/pl011.c +++ b/hw/char/pl011.c @@ -26,6 +26,7 @@ #include "hw/qdev-properties-system.h" #include "migration/vmstate.h" #include "chardev/char-fe.h" +#include "chardev/char-serial.h" #include "qemu/log.h" #include "qemu/module.h" #include "trace.h" @@ -231,6 +232,11 @@ static void pl011_write(void *opaque, hwaddr offset, s->read_count = 0; s->read_pos = 0; } + if ((s->lcr ^ value) & 0x1) { + int break_enable = value & 0x1; + qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_BREAK, + &break_enable); + } s->lcr = value; pl011_set_read_trigger(s); break; diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index ba1af1cfc45..ddae738d563 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -23,7 +23,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/log.h" -#include "hw/sysbus.h" #include "hw/char/riscv_htif.h" #include "hw/char/serial.h" #include "chardev/char.h" diff --git a/hw/char/serial.c b/hw/char/serial.c index bc2e3229704..7061aacbce9 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -220,7 +220,7 @@ static void serial_update_msl(SerialState *s) } } -static gboolean serial_watch_cb(GIOChannel *chan, GIOCondition cond, +static gboolean serial_watch_cb(void *do_not_use, GIOCondition cond, void *opaque) { SerialState *s = opaque; diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c index 167f4d8cb90..355886ee3a1 100644 --- a/hw/char/sh_serial.c +++ b/hw/char/sh_serial.c @@ -26,13 +26,17 @@ */ #include "qemu/osdep.h" +#include "hw/sysbus.h" #include "hw/irq.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "hw/sh4/sh.h" #include "chardev/char-fe.h" #include "qapi/error.h" #include "qemu/timer.h" - -//#define DEBUG_SERIAL +#include "qemu/log.h" +#include "trace.h" #define SH_SERIAL_FLAG_TEND (1 << 0) #define SH_SERIAL_FLAG_TDE (1 << 1) @@ -42,10 +46,10 @@ #define SH_RX_FIFO_LENGTH (16) -typedef struct { - MemoryRegion iomem; - MemoryRegion iomem_p4; - MemoryRegion iomem_a7; +OBJECT_DECLARE_SIMPLE_TYPE(SHSerialState, SH_SERIAL) + +struct SHSerialState { + SysBusDevice parent; uint8_t smr; uint8_t brr; uint8_t scr; @@ -59,13 +63,12 @@ typedef struct { uint8_t rx_tail; uint8_t rx_head; - int freq; - int feat; + uint8_t feat; int flags; int rtrg; CharBackend chr; - QEMUTimer *fifo_timeout_timer; + QEMUTimer fifo_timeout_timer; uint64_t etu; /* Elementary Time Unit (ns) */ qemu_irq eri; @@ -73,9 +76,13 @@ typedef struct { qemu_irq txi; qemu_irq tei; qemu_irq bri; -} sh_serial_state; +}; + +typedef struct {} SHSerialStateClass; -static void sh_serial_clear_fifo(sh_serial_state * s) +OBJECT_DEFINE_TYPE(SHSerialState, sh_serial, SH_SERIAL, SYS_BUS_DEVICE) + +static void sh_serial_clear_fifo(SHSerialState *s) { memset(s->rx_fifo, 0, SH_RX_FIFO_LENGTH); s->rx_cnt = 0; @@ -86,14 +93,12 @@ static void sh_serial_clear_fifo(sh_serial_state * s) static void sh_serial_write(void *opaque, hwaddr offs, uint64_t val, unsigned size) { - sh_serial_state *s = opaque; + SHSerialState *s = opaque; + DeviceState *d = DEVICE(s); unsigned char ch; -#ifdef DEBUG_SERIAL - printf("sh_serial: write offs=0x%02x val=0x%02x\n", - offs, val); -#endif - switch(offs) { + trace_sh_serial_write(d->id, size, offs, val); + switch (offs) { case 0x00: /* SMR */ s->smr = val & ((s->feat & SH_SERIAL_FEAT_SCIF) ? 0x7b : 0xff); return; @@ -103,8 +108,9 @@ static void sh_serial_write(void *opaque, hwaddr offs, case 0x08: /* SCR */ /* TODO : For SH7751, SCIF mask should be 0xfb. */ s->scr = val & ((s->feat & SH_SERIAL_FEAT_SCIF) ? 0xfa : 0xff); - if (!(val & (1 << 5))) + if (!(val & (1 << 5))) { s->flags |= SH_SERIAL_FLAG_TEND; + } if ((s->feat & SH_SERIAL_FEAT_SCIF) && s->txi) { qemu_set_irq(s->txi, val & (1 << 7)); } @@ -115,8 +121,10 @@ static void sh_serial_write(void *opaque, hwaddr offs, case 0x0c: /* FTDR / TDR */ if (qemu_chr_fe_backend_connected(&s->chr)) { ch = val; - /* XXX this blocks entire thread. Rewrite to use - * qemu_chr_fe_write and background I/O callbacks */ + /* + * XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks + */ qemu_chr_fe_write_all(&s->chr, &ch, 1); } s->dr = val; @@ -129,18 +137,23 @@ static void sh_serial_write(void *opaque, hwaddr offs, #endif } if (s->feat & SH_SERIAL_FEAT_SCIF) { - switch(offs) { + switch (offs) { case 0x10: /* FSR */ - if (!(val & (1 << 6))) + if (!(val & (1 << 6))) { s->flags &= ~SH_SERIAL_FLAG_TEND; - if (!(val & (1 << 5))) + } + if (!(val & (1 << 5))) { s->flags &= ~SH_SERIAL_FLAG_TDE; - if (!(val & (1 << 4))) + } + if (!(val & (1 << 4))) { s->flags &= ~SH_SERIAL_FLAG_BRK; - if (!(val & (1 << 1))) + } + if (!(val & (1 << 1))) { s->flags &= ~SH_SERIAL_FLAG_RDF; - if (!(val & (1 << 0))) + } + if (!(val & (1 << 0))) { s->flags &= ~SH_SERIAL_FLAG_DR; + } if (!(val & (1 << 1)) || !(val & (1 << 0))) { if (s->rxi) { @@ -176,9 +189,8 @@ static void sh_serial_write(void *opaque, hwaddr offs, case 0x24: /* LSR */ return; } - } - else { - switch(offs) { + } else { + switch (offs) { #if 0 case 0x0c: ret = s->dr; @@ -192,20 +204,20 @@ static void sh_serial_write(void *opaque, hwaddr offs, return; } } - - fprintf(stderr, "sh_serial: unsupported write to 0x%02" - HWADDR_PRIx "\n", offs); - abort(); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: unsupported write to 0x%02" HWADDR_PRIx "\n", + __func__, offs); } static uint64_t sh_serial_read(void *opaque, hwaddr offs, unsigned size) { - sh_serial_state *s = opaque; - uint32_t ret = ~0; + SHSerialState *s = opaque; + DeviceState *d = DEVICE(s); + uint32_t ret = UINT32_MAX; #if 0 - switch(offs) { + switch (offs) { case 0x00: ret = s->smr; break; @@ -221,7 +233,7 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs, } #endif if (s->feat & SH_SERIAL_FEAT_SCIF) { - switch(offs) { + switch (offs) { case 0x00: /* SMR */ ret = s->smr; break; @@ -230,29 +242,37 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs, break; case 0x10: /* FSR */ ret = 0; - if (s->flags & SH_SERIAL_FLAG_TEND) + if (s->flags & SH_SERIAL_FLAG_TEND) { ret |= (1 << 6); - if (s->flags & SH_SERIAL_FLAG_TDE) + } + if (s->flags & SH_SERIAL_FLAG_TDE) { ret |= (1 << 5); - if (s->flags & SH_SERIAL_FLAG_BRK) + } + if (s->flags & SH_SERIAL_FLAG_BRK) { ret |= (1 << 4); - if (s->flags & SH_SERIAL_FLAG_RDF) + } + if (s->flags & SH_SERIAL_FLAG_RDF) { ret |= (1 << 1); - if (s->flags & SH_SERIAL_FLAG_DR) + } + if (s->flags & SH_SERIAL_FLAG_DR) { ret |= (1 << 0); + } - if (s->scr & (1 << 5)) + if (s->scr & (1 << 5)) { s->flags |= SH_SERIAL_FLAG_TDE | SH_SERIAL_FLAG_TEND; + } break; case 0x14: if (s->rx_cnt > 0) { ret = s->rx_fifo[s->rx_tail++]; s->rx_cnt--; - if (s->rx_tail == SH_RX_FIFO_LENGTH) + if (s->rx_tail == SH_RX_FIFO_LENGTH) { s->rx_tail = 0; - if (s->rx_cnt < s->rtrg) + } + if (s->rx_cnt < s->rtrg) { s->flags &= ~SH_SERIAL_FLAG_RDF; + } } break; case 0x18: @@ -268,9 +288,8 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs, ret = 0; break; } - } - else { - switch(offs) { + } else { + switch (offs) { #if 0 case 0x0c: ret = s->dr; @@ -287,40 +306,39 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs, break; } } -#ifdef DEBUG_SERIAL - printf("sh_serial: read offs=0x%02x val=0x%x\n", - offs, ret); -#endif + trace_sh_serial_read(d->id, size, offs, ret); - if (ret & ~((1 << 16) - 1)) { - fprintf(stderr, "sh_serial: unsupported read from 0x%02" - HWADDR_PRIx "\n", offs); - abort(); + if (ret > UINT16_MAX) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: unsupported read from 0x%02" HWADDR_PRIx "\n", + __func__, offs); + ret = 0; } return ret; } -static int sh_serial_can_receive(sh_serial_state *s) +static int sh_serial_can_receive(SHSerialState *s) { return s->scr & (1 << 4); } -static void sh_serial_receive_break(sh_serial_state *s) +static void sh_serial_receive_break(SHSerialState *s) { - if (s->feat & SH_SERIAL_FEAT_SCIF) + if (s->feat & SH_SERIAL_FEAT_SCIF) { s->sr |= (1 << 4); + } } static int sh_serial_can_receive1(void *opaque) { - sh_serial_state *s = opaque; + SHSerialState *s = opaque; return sh_serial_can_receive(s); } static void sh_serial_timeout_int(void *opaque) { - sh_serial_state *s = opaque; + SHSerialState *s = opaque; s->flags |= SH_SERIAL_FLAG_RDF; if (s->scr & (1 << 6) && s->rxi) { @@ -330,7 +348,7 @@ static void sh_serial_timeout_int(void *opaque) static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size) { - sh_serial_state *s = opaque; + SHSerialState *s = opaque; if (s->feat & SH_SERIAL_FEAT_SCIF) { int i; @@ -344,11 +362,11 @@ static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size) if (s->rx_cnt >= s->rtrg) { s->flags |= SH_SERIAL_FLAG_RDF; if (s->scr & (1 << 6) && s->rxi) { - timer_del(s->fifo_timeout_timer); + timer_del(&s->fifo_timeout_timer); qemu_set_irq(s->rxi, 1); } } else { - timer_mod(s->fifo_timeout_timer, + timer_mod(&s->fifo_timeout_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 15 * s->etu); } } @@ -360,9 +378,10 @@ static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size) static void sh_serial_event(void *opaque, QEMUChrEvent event) { - sh_serial_state *s = opaque; - if (event == CHR_EVENT_BREAK) + SHSerialState *s = opaque; + if (event == CHR_EVENT_BREAK) { sh_serial_receive_break(s); + } } static const MemoryRegionOps sh_serial_ops = { @@ -371,20 +390,10 @@ static const MemoryRegionOps sh_serial_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -void sh_serial_init(MemoryRegion *sysmem, - hwaddr base, int feat, - uint32_t freq, Chardev *chr, - qemu_irq eri_source, - qemu_irq rxi_source, - qemu_irq txi_source, - qemu_irq tei_source, - qemu_irq bri_source) +static void sh_serial_reset(DeviceState *dev) { - sh_serial_state *s; + SHSerialState *s = SH_SERIAL(dev); - s = g_malloc0(sizeof(sh_serial_state)); - - s->feat = feat; s->flags = SH_SERIAL_FLAG_TEND | SH_SERIAL_FLAG_TDE; s->rtrg = 1; @@ -393,39 +402,64 @@ void sh_serial_init(MemoryRegion *sysmem, s->scr = 1 << 5; /* pretend that TX is enabled so early printk works */ s->sptr = 0; - if (feat & SH_SERIAL_FEAT_SCIF) { + if (s->feat & SH_SERIAL_FEAT_SCIF) { s->fcr = 0; - } - else { + } else { s->dr = 0xff; } sh_serial_clear_fifo(s); +} - memory_region_init_io(&s->iomem, NULL, &sh_serial_ops, s, - "serial", 0x100000000ULL); - - memory_region_init_alias(&s->iomem_p4, NULL, "serial-p4", &s->iomem, - 0, 0x28); - memory_region_add_subregion(sysmem, P4ADDR(base), &s->iomem_p4); - - memory_region_init_alias(&s->iomem_a7, NULL, "serial-a7", &s->iomem, - 0, 0x28); - memory_region_add_subregion(sysmem, A7ADDR(base), &s->iomem_a7); - - if (chr) { - qemu_chr_fe_init(&s->chr, chr, &error_abort); +static void sh_serial_realize(DeviceState *d, Error **errp) +{ + SHSerialState *s = SH_SERIAL(d); + MemoryRegion *iomem = g_malloc(sizeof(*iomem)); + + assert(d->id); + memory_region_init_io(iomem, OBJECT(d), &sh_serial_ops, s, d->id, 0x28); + sysbus_init_mmio(SYS_BUS_DEVICE(d), iomem); + qdev_init_gpio_out_named(d, &s->eri, "eri", 1); + qdev_init_gpio_out_named(d, &s->rxi, "rxi", 1); + qdev_init_gpio_out_named(d, &s->txi, "txi", 1); + qdev_init_gpio_out_named(d, &s->tei, "tei", 1); + qdev_init_gpio_out_named(d, &s->bri, "bri", 1); + + if (qemu_chr_fe_backend_connected(&s->chr)) { qemu_chr_fe_set_handlers(&s->chr, sh_serial_can_receive1, sh_serial_receive1, sh_serial_event, NULL, s, NULL, true); } - s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - sh_serial_timeout_int, s); + timer_init_ns(&s->fifo_timeout_timer, QEMU_CLOCK_VIRTUAL, + sh_serial_timeout_int, s); s->etu = NANOSECONDS_PER_SECOND / 9600; - s->eri = eri_source; - s->rxi = rxi_source; - s->txi = txi_source; - s->tei = tei_source; - s->bri = bri_source; +} + +static void sh_serial_finalize(Object *obj) +{ + SHSerialState *s = SH_SERIAL(obj); + + timer_del(&s->fifo_timeout_timer); +} + +static void sh_serial_init(Object *obj) +{ +} + +static Property sh_serial_properties[] = { + DEFINE_PROP_CHR("chardev", SHSerialState, chr), + DEFINE_PROP_UINT8("features", SHSerialState, feat, 0), + DEFINE_PROP_END_OF_LIST() +}; + +static void sh_serial_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + device_class_set_props(dc, sh_serial_properties); + dc->realize = sh_serial_realize; + dc->reset = sh_serial_reset; + /* Reason: part of SuperH CPU/SoC, needs to be wired up */ + dc->user_creatable = false; } diff --git a/hw/char/shakti_uart.c b/hw/char/shakti_uart.c new file mode 100644 index 00000000000..98b142c7df8 --- /dev/null +++ b/hw/char/shakti_uart.c @@ -0,0 +1,186 @@ +/* + * SHAKTI UART + * + * Copyright (c) 2021 Vijai Kumar K + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/char/shakti_uart.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "qemu/log.h" + +static uint64_t shakti_uart_read(void *opaque, hwaddr addr, unsigned size) +{ + ShaktiUartState *s = opaque; + + switch (addr) { + case SHAKTI_UART_BAUD: + return s->uart_baud; + case SHAKTI_UART_RX: + qemu_chr_fe_accept_input(&s->chr); + s->uart_status &= ~SHAKTI_UART_STATUS_RX_NOT_EMPTY; + return s->uart_rx; + case SHAKTI_UART_STATUS: + return s->uart_status; + case SHAKTI_UART_DELAY: + return s->uart_delay; + case SHAKTI_UART_CONTROL: + return s->uart_control; + case SHAKTI_UART_INT_EN: + return s->uart_interrupt; + case SHAKTI_UART_IQ_CYCLES: + return s->uart_iq_cycles; + case SHAKTI_UART_RX_THRES: + return s->uart_rx_threshold; + default: + /* Also handles TX REG which is write only */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + } + + return 0; +} + +static void shakti_uart_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + ShaktiUartState *s = opaque; + uint32_t value = data; + uint8_t ch; + + switch (addr) { + case SHAKTI_UART_BAUD: + s->uart_baud = value; + break; + case SHAKTI_UART_TX: + ch = value; + qemu_chr_fe_write_all(&s->chr, &ch, 1); + s->uart_status &= ~SHAKTI_UART_STATUS_TX_FULL; + break; + case SHAKTI_UART_STATUS: + s->uart_status = value; + break; + case SHAKTI_UART_DELAY: + s->uart_delay = value; + break; + case SHAKTI_UART_CONTROL: + s->uart_control = value; + break; + case SHAKTI_UART_INT_EN: + s->uart_interrupt = value; + break; + case SHAKTI_UART_IQ_CYCLES: + s->uart_iq_cycles = value; + break; + case SHAKTI_UART_RX_THRES: + s->uart_rx_threshold = value; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + } +} + +static const MemoryRegionOps shakti_uart_ops = { + .read = shakti_uart_read, + .write = shakti_uart_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = {.min_access_size = 1, .max_access_size = 4}, + .valid = {.min_access_size = 1, .max_access_size = 4}, +}; + +static void shakti_uart_reset(DeviceState *dev) +{ + ShaktiUartState *s = SHAKTI_UART(dev); + + s->uart_baud = SHAKTI_UART_BAUD_DEFAULT; + s->uart_tx = 0x0; + s->uart_rx = 0x0; + s->uart_status = 0x0000; + s->uart_delay = 0x0000; + s->uart_control = SHAKTI_UART_CONTROL_DEFAULT; + s->uart_interrupt = 0x0000; + s->uart_iq_cycles = 0x00; + s->uart_rx_threshold = 0x00; +} + +static int shakti_uart_can_receive(void *opaque) +{ + ShaktiUartState *s = opaque; + + return !(s->uart_status & SHAKTI_UART_STATUS_RX_NOT_EMPTY); +} + +static void shakti_uart_receive(void *opaque, const uint8_t *buf, int size) +{ + ShaktiUartState *s = opaque; + + s->uart_rx = *buf; + s->uart_status |= SHAKTI_UART_STATUS_RX_NOT_EMPTY; +} + +static void shakti_uart_realize(DeviceState *dev, Error **errp) +{ + ShaktiUartState *sus = SHAKTI_UART(dev); + qemu_chr_fe_set_handlers(&sus->chr, shakti_uart_can_receive, + shakti_uart_receive, NULL, NULL, sus, NULL, true); +} + +static void shakti_uart_instance_init(Object *obj) +{ + ShaktiUartState *sus = SHAKTI_UART(obj); + memory_region_init_io(&sus->mmio, + obj, + &shakti_uart_ops, + sus, + TYPE_SHAKTI_UART, + 0x1000); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &sus->mmio); +} + +static Property shakti_uart_properties[] = { + DEFINE_PROP_CHR("chardev", ShaktiUartState, chr), + DEFINE_PROP_END_OF_LIST(), +}; + +static void shakti_uart_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->reset = shakti_uart_reset; + dc->realize = shakti_uart_realize; + device_class_set_props(dc, shakti_uart_properties); + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); +} + +static const TypeInfo shakti_uart_info = { + .name = TYPE_SHAKTI_UART, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ShaktiUartState), + .class_init = shakti_uart_class_init, + .instance_init = shakti_uart_instance_init, +}; + +static void shakti_uart_register_types(void) +{ + type_register_static(&shakti_uart_info); +} +type_init(shakti_uart_register_types) diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c index 3a00ba7f006..1c75f792b35 100644 --- a/hw/char/sifive_uart.c +++ b/hw/char/sifive_uart.c @@ -19,12 +19,12 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/log.h" -#include "hw/sysbus.h" +#include "migration/vmstate.h" #include "chardev/char.h" #include "chardev/char-fe.h" -#include "hw/hw.h" #include "hw/irq.h" #include "hw/char/sifive_uart.h" +#include "hw/qdev-properties-system.h" /* * Not yet implemented: @@ -33,7 +33,7 @@ */ /* Returns the state of the IP (interrupt pending) register */ -static uint64_t uart_ip(SiFiveUARTState *s) +static uint64_t sifive_uart_ip(SiFiveUARTState *s) { uint64_t ret = 0; @@ -50,7 +50,7 @@ static uint64_t uart_ip(SiFiveUARTState *s) return ret; } -static void update_irq(SiFiveUARTState *s) +static void sifive_uart_update_irq(SiFiveUARTState *s) { int cond = 0; if ((s->ie & SIFIVE_UART_IE_TXWM) || @@ -65,7 +65,7 @@ static void update_irq(SiFiveUARTState *s) } static uint64_t -uart_read(void *opaque, hwaddr addr, unsigned int size) +sifive_uart_read(void *opaque, hwaddr addr, unsigned int size) { SiFiveUARTState *s = opaque; unsigned char r; @@ -76,7 +76,7 @@ uart_read(void *opaque, hwaddr addr, unsigned int size) memmove(s->rx_fifo, s->rx_fifo + 1, s->rx_fifo_len - 1); s->rx_fifo_len--; qemu_chr_fe_accept_input(&s->chr); - update_irq(s); + sifive_uart_update_irq(s); return r; } return 0x80000000; @@ -86,7 +86,7 @@ uart_read(void *opaque, hwaddr addr, unsigned int size) case SIFIVE_UART_IE: return s->ie; case SIFIVE_UART_IP: - return uart_ip(s); + return sifive_uart_ip(s); case SIFIVE_UART_TXCTRL: return s->txctrl; case SIFIVE_UART_RXCTRL: @@ -101,8 +101,8 @@ uart_read(void *opaque, hwaddr addr, unsigned int size) } static void -uart_write(void *opaque, hwaddr addr, - uint64_t val64, unsigned int size) +sifive_uart_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) { SiFiveUARTState *s = opaque; uint32_t value = val64; @@ -111,11 +111,11 @@ uart_write(void *opaque, hwaddr addr, switch (addr) { case SIFIVE_UART_TXFIFO: qemu_chr_fe_write(&s->chr, &ch, 1); - update_irq(s); + sifive_uart_update_irq(s); return; case SIFIVE_UART_IE: s->ie = val64; - update_irq(s); + sifive_uart_update_irq(s); return; case SIFIVE_UART_TXCTRL: s->txctrl = val64; @@ -131,9 +131,9 @@ uart_write(void *opaque, hwaddr addr, __func__, (int)addr, (int)value); } -static const MemoryRegionOps uart_ops = { - .read = uart_read, - .write = uart_write, +static const MemoryRegionOps sifive_uart_ops = { + .read = sifive_uart_read, + .write = sifive_uart_write, .endianness = DEVICE_NATIVE_ENDIAN, .valid = { .min_access_size = 4, @@ -141,7 +141,7 @@ static const MemoryRegionOps uart_ops = { } }; -static void uart_rx(void *opaque, const uint8_t *buf, int size) +static void sifive_uart_rx(void *opaque, const uint8_t *buf, int size) { SiFiveUARTState *s = opaque; @@ -152,43 +152,138 @@ static void uart_rx(void *opaque, const uint8_t *buf, int size) } s->rx_fifo[s->rx_fifo_len++] = *buf; - update_irq(s); + sifive_uart_update_irq(s); } -static int uart_can_rx(void *opaque) +static int sifive_uart_can_rx(void *opaque) { SiFiveUARTState *s = opaque; return s->rx_fifo_len < sizeof(s->rx_fifo); } -static void uart_event(void *opaque, QEMUChrEvent event) +static void sifive_uart_event(void *opaque, QEMUChrEvent event) { } -static int uart_be_change(void *opaque) +static int sifive_uart_be_change(void *opaque) { SiFiveUARTState *s = opaque; - qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event, - uart_be_change, s, NULL, true); + qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx, + sifive_uart_event, sifive_uart_be_change, s, + NULL, true); return 0; } +static Property sifive_uart_properties[] = { + DEFINE_PROP_CHR("chardev", SiFiveUARTState, chr), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sifive_uart_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + SiFiveUARTState *s = SIFIVE_UART(obj); + + memory_region_init_io(&s->mmio, OBJECT(s), &sifive_uart_ops, s, + TYPE_SIFIVE_UART, SIFIVE_UART_MAX); + sysbus_init_mmio(sbd, &s->mmio); + sysbus_init_irq(sbd, &s->irq); +} + +static void sifive_uart_realize(DeviceState *dev, Error **errp) +{ + SiFiveUARTState *s = SIFIVE_UART(dev); + + qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx, + sifive_uart_event, sifive_uart_be_change, s, + NULL, true); + +} + +static void sifive_uart_reset_enter(Object *obj, ResetType type) +{ + SiFiveUARTState *s = SIFIVE_UART(obj); + s->ie = 0; + s->ip = 0; + s->txctrl = 0; + s->rxctrl = 0; + s->div = 0; + s->rx_fifo_len = 0; +} + +static void sifive_uart_reset_hold(Object *obj) +{ + SiFiveUARTState *s = SIFIVE_UART(obj); + qemu_irq_lower(s->irq); +} + +static const VMStateDescription vmstate_sifive_uart = { + .name = TYPE_SIFIVE_UART, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(rx_fifo, SiFiveUARTState, + SIFIVE_UART_RX_FIFO_SIZE), + VMSTATE_UINT8(rx_fifo_len, SiFiveUARTState), + VMSTATE_UINT32(ie, SiFiveUARTState), + VMSTATE_UINT32(ip, SiFiveUARTState), + VMSTATE_UINT32(txctrl, SiFiveUARTState), + VMSTATE_UINT32(rxctrl, SiFiveUARTState), + VMSTATE_UINT32(div, SiFiveUARTState), + VMSTATE_END_OF_LIST() + }, +}; + + +static void sifive_uart_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + ResettableClass *rc = RESETTABLE_CLASS(oc); + + dc->realize = sifive_uart_realize; + dc->vmsd = &vmstate_sifive_uart; + rc->phases.enter = sifive_uart_reset_enter; + rc->phases.hold = sifive_uart_reset_hold; + device_class_set_props(dc, sifive_uart_properties); + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); +} + +static const TypeInfo sifive_uart_info = { + .name = TYPE_SIFIVE_UART, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SiFiveUARTState), + .instance_init = sifive_uart_init, + .class_init = sifive_uart_class_init, +}; + +static void sifive_uart_register_types(void) +{ + type_register_static(&sifive_uart_info); +} + +type_init(sifive_uart_register_types) + /* * Create UART device. */ SiFiveUARTState *sifive_uart_create(MemoryRegion *address_space, hwaddr base, Chardev *chr, qemu_irq irq) { - SiFiveUARTState *s = g_malloc0(sizeof(SiFiveUARTState)); - s->irq = irq; - qemu_chr_fe_init(&s->chr, chr, &error_abort); - qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event, - uart_be_change, s, NULL, true); - memory_region_init_io(&s->mmio, NULL, &uart_ops, s, - TYPE_SIFIVE_UART, SIFIVE_UART_MAX); - memory_region_add_subregion(address_space, base, &s->mmio); - return s; + DeviceState *dev; + SysBusDevice *s; + SiFiveUARTState *r; + + dev = qdev_new("riscv.sifive.uart"); + s = SYS_BUS_DEVICE(dev); + qdev_prop_set_chr(dev, "chardev", chr); + sysbus_realize_and_unref(s, &error_fatal); + memory_region_add_subregion(address_space, base, + sysbus_mmio_get_region(s, 0)); + sysbus_connect_irq(s, 0, irq); + + r = SIFIVE_UART(dev); + return r; } diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c index 79eaa2fa523..91eae1a5988 100644 --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -2,7 +2,6 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "qapi/error.h" -#include "cpu.h" #include "migration/vmstate.h" #include "chardev/char-fe.h" #include "hw/ppc/spapr.h" diff --git a/hw/char/trace-events b/hw/char/trace-events index 76d52938ead..2ecb36232e9 100644 --- a/hw/char/trace-events +++ b/hw/char/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # parallel.c parallel_ioport_read(const char *desc, uint16_t addr, uint8_t value) "read [%s] addr 0x%02x val 0x%02x" @@ -35,24 +35,9 @@ grlib_apbuart_event(int event) "event:%d" grlib_apbuart_writel_unknown(uint64_t addr, uint32_t value) "addr 0x%"PRIx64" value 0x%x" grlib_apbuart_readl_unknown(uint64_t addr) "addr 0x%"PRIx64 -# lm32_juart.c -lm32_juart_get_jtx(uint32_t value) "jtx 0x%08x" -lm32_juart_set_jtx(uint32_t value) "jtx 0x%08x" -lm32_juart_get_jrx(uint32_t value) "jrx 0x%08x" -lm32_juart_set_jrx(uint32_t value) "jrx 0x%08x" - -# lm32_uart.c -lm32_uart_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -lm32_uart_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -lm32_uart_irq_state(int level) "irq state %d" - -# milkymist-uart.c -milkymist_uart_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_uart_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_uart_raise_irq(void) "Raise IRQ" -milkymist_uart_lower_irq(void) "Lower IRQ" - # escc.c +escc_hard_reset(void) "hard reset" +escc_soft_reset_chn(char channel) "soft reset channel %c" escc_put_queue(char channel, int b) "channel %c put: 0x%02x" escc_get_queue(char channel, int val) "channel %c get 0x%02x" escc_update_irq(int irq) "IRQ = %d" @@ -90,6 +75,10 @@ cmsdk_apb_uart_set_params(int speed) "CMSDK APB UART: params set to %d 8N1" nrf51_uart_read(uint64_t addr, uint64_t r, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u" nrf51_uart_write(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u" +# shakti_uart.c +shakti_uart_read(uint64_t addr, uint16_t r, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx16 " size %u" +shakti_uart_write(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u" + # exynos4210_uart.c exynos_uart_dmabusy(uint32_t channel) "UART%d: DMA busy (Rx buffer empty)" exynos_uart_dmaready(uint32_t channel) "UART%d: DMA ready" @@ -112,3 +101,7 @@ exynos_uart_rx_timeout(uint32_t channel, uint32_t stat, uint32_t intsp) "UART%d: # cadence_uart.c cadence_uart_baudrate(unsigned baudrate) "baudrate %u" + +# sh_serial.c +sh_serial_read(char *id, unsigned size, uint64_t offs, uint64_t val) " %s size %d offs 0x%02" PRIx64 " -> 0x%02" PRIx64 +sh_serial_write(char *id, unsigned size, uint64_t offs, uint64_t val) "%s size %d offs 0x%02" PRIx64 " <- 0x%02" PRIx64 diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c index 6b132caa29d..dd5a02e3392 100644 --- a/hw/char/virtio-console.c +++ b/hw/char/virtio-console.c @@ -38,7 +38,7 @@ struct VirtConsole { * Callback function that's called from chardevs when backend becomes * writable. */ -static gboolean chr_write_unblocked(GIOChannel *chan, GIOCondition cond, +static gboolean chr_write_unblocked(void *do_not_use, GIOCondition cond, void *opaque) { VirtConsole *vcon = opaque; diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index b20038991a6..f01ec2137c9 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -28,7 +28,6 @@ #include "qemu/error-report.h" #include "qemu/queue.h" #include "hw/qdev-properties.h" -#include "hw/sysbus.h" #include "trace.h" #include "hw/virtio/virtio-serial.h" #include "hw/virtio/virtio-access.h" @@ -1049,8 +1048,8 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp) config_size); /* Spawn a new virtio-serial bus on which the ports will ride as devices */ - qbus_create_inplace(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS, - dev, vdev->bus_name); + qbus_init(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS, + dev, vdev->bus_name); qbus_set_hotplug_handler(BUS(&vser->bus), OBJECT(vser)); vser->bus.vser = vser; QTAILQ_INIT(&vser->ports); diff --git a/hw/core/bus.c b/hw/core/bus.c index 9cfbc3a6877..c7831b5293b 100644 --- a/hw/core/bus.c +++ b/hw/core/bus.c @@ -99,7 +99,8 @@ static void bus_reset_child_foreach(Object *obj, ResettableChildCallback cb, } } -static void qbus_init(BusState *bus, DeviceState *parent, const char *name) +static void qbus_init_internal(BusState *bus, DeviceState *parent, + const char *name) { const char *typename = object_get_typename(OBJECT(bus)); BusClass *bc; @@ -151,19 +152,19 @@ static void bus_unparent(Object *obj) bus->parent = NULL; } -void qbus_create_inplace(void *bus, size_t size, const char *typename, - DeviceState *parent, const char *name) +void qbus_init(void *bus, size_t size, const char *typename, + DeviceState *parent, const char *name) { object_initialize(bus, size, typename); - qbus_init(bus, parent, name); + qbus_init_internal(bus, parent, name); } -BusState *qbus_create(const char *typename, DeviceState *parent, const char *name) +BusState *qbus_new(const char *typename, DeviceState *parent, const char *name) { BusState *bus; bus = BUS(object_new(typename)); - qbus_init(bus, parent, name); + qbus_init_internal(bus, parent, name); return bus; } diff --git a/hw/core/clock-vmstate.c b/hw/core/clock-vmstate.c index 260b13fc2c8..9d9174ffbd7 100644 --- a/hw/core/clock-vmstate.c +++ b/hw/core/clock-vmstate.c @@ -14,12 +14,50 @@ #include "migration/vmstate.h" #include "hw/clock.h" +static bool muldiv_needed(void *opaque) +{ + Clock *clk = opaque; + + return clk->multiplier != 1 || clk->divider != 1; +} + +static int clock_pre_load(void *opaque) +{ + Clock *clk = opaque; + /* + * The initial out-of-reset settings of the Clock might have been + * configured by the device to be different from what we set + * in clock_initfn(), so we must here set the default values to + * be used if they are not in the inbound migration state. + */ + clk->multiplier = 1; + clk->divider = 1; + + return 0; +} + +const VMStateDescription vmstate_muldiv = { + .name = "clock/muldiv", + .version_id = 1, + .minimum_version_id = 1, + .needed = muldiv_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(multiplier, Clock), + VMSTATE_UINT32(divider, Clock), + }, +}; + const VMStateDescription vmstate_clock = { .name = "clock", .version_id = 0, .minimum_version_id = 0, + .pre_load = clock_pre_load, .fields = (VMStateField[]) { VMSTATE_UINT64(period, Clock), VMSTATE_END_OF_LIST() - } + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_muldiv, + NULL + }, }; diff --git a/hw/core/clock.c b/hw/core/clock.c index fc5a99683f8..916875e07a2 100644 --- a/hw/core/clock.c +++ b/hw/core/clock.c @@ -64,6 +64,15 @@ bool clock_set(Clock *clk, uint64_t period) return true; } +static uint64_t clock_get_child_period(Clock *clk) +{ + /* + * Return the period to be used for child clocks, which is the parent + * clock period adjusted for for multiplier and divider effects. + */ + return muldiv64(clk->period, clk->multiplier, clk->divider); +} + static void clock_call_callback(Clock *clk, ClockEvent event) { /* @@ -78,15 +87,16 @@ static void clock_call_callback(Clock *clk, ClockEvent event) static void clock_propagate_period(Clock *clk, bool call_callbacks) { Clock *child; + uint64_t child_period = clock_get_child_period(clk); QLIST_FOREACH(child, &clk->children, sibling) { - if (child->period != clk->period) { + if (child->period != child_period) { if (call_callbacks) { clock_call_callback(child, ClockPreUpdate); } - child->period = clk->period; + child->period = child_period; trace_clock_update(CLOCK_PATH(child), CLOCK_PATH(clk), - CLOCK_PERIOD_TO_HZ(clk->period), + CLOCK_PERIOD_TO_HZ(child->period), call_callbacks); if (call_callbacks) { clock_call_callback(child, ClockUpdate); @@ -110,7 +120,7 @@ void clock_set_source(Clock *clk, Clock *src) trace_clock_set_source(CLOCK_PATH(clk), CLOCK_PATH(src)); - clk->period = src->period; + clk->period = clock_get_child_period(src); QLIST_INSERT_HEAD(&src->children, clk, sibling); clk->source = src; clock_propagate_period(clk, false); @@ -133,10 +143,23 @@ char *clock_display_freq(Clock *clk) return freq_to_str(clock_get_hz(clk)); } +void clock_set_mul_div(Clock *clk, uint32_t multiplier, uint32_t divider) +{ + assert(divider != 0); + + trace_clock_set_mul_div(CLOCK_PATH(clk), clk->multiplier, multiplier, + clk->divider, divider); + clk->multiplier = multiplier; + clk->divider = divider; +} + static void clock_initfn(Object *obj) { Clock *clk = CLOCK(obj); + clk->multiplier = 1; + clk->divider = 1; + QLIST_INIT(&clk->children); } diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c new file mode 100644 index 00000000000..fbea86d3303 --- /dev/null +++ b/hw/core/cpu-common.c @@ -0,0 +1,307 @@ +/* + * QEMU CPU model + * + * Copyright (c) 2012-2014 SUSE LINUX Products GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/core/cpu.h" +#include "sysemu/hw_accel.h" +#include "qemu/notify.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "exec/log.h" +#include "exec/cpu-common.h" +#include "qemu/error-report.h" +#include "qemu/qemu-print.h" +#include "sysemu/tcg.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "trace/trace-root.h" +#include "qemu/plugin.h" + +CPUState *cpu_by_arch_id(int64_t id) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->get_arch_id(cpu) == id) { + return cpu; + } + } + return NULL; +} + +bool cpu_exists(int64_t id) +{ + return !!cpu_by_arch_id(id); +} + +CPUState *cpu_create(const char *typename) +{ + Error *err = NULL; + CPUState *cpu = CPU(object_new(typename)); + if (!qdev_realize(DEVICE(cpu), NULL, &err)) { + error_report_err(err); + object_unref(OBJECT(cpu)); + exit(EXIT_FAILURE); + } + return cpu; +} + +/* Resetting the IRQ comes from across the code base so we take the + * BQL here if we need to. cpu_interrupt assumes it is held.*/ +void cpu_reset_interrupt(CPUState *cpu, int mask) +{ + bool need_lock = !qemu_mutex_iothread_locked(); + + if (need_lock) { + qemu_mutex_lock_iothread(); + } + cpu->interrupt_request &= ~mask; + if (need_lock) { + qemu_mutex_unlock_iothread(); + } +} + +void cpu_exit(CPUState *cpu) +{ + qatomic_set(&cpu->exit_request, 1); + /* Ensure cpu_exec will see the exit request after TCG has exited. */ + smp_wmb(); + qatomic_set(&cpu->icount_decr_ptr->u16.high, -1); +} + +static int cpu_common_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg) +{ + return 0; +} + +static int cpu_common_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg) +{ + return 0; +} + +void cpu_dump_state(CPUState *cpu, FILE *f, int flags) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->dump_state) { + cpu_synchronize_state(cpu); + cc->dump_state(cpu, f, flags); + } +} + +void cpu_reset(CPUState *cpu) +{ + device_cold_reset(DEVICE(cpu)); + + trace_guest_cpu_reset(cpu); +} + +static void cpu_common_reset(DeviceState *dev) +{ + CPUState *cpu = CPU(dev); + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (qemu_loglevel_mask(CPU_LOG_RESET)) { + qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index); + log_cpu_state(cpu, cc->reset_dump_flags); + } + + cpu->interrupt_request = 0; + cpu->halted = cpu->start_powered_off; + cpu->mem_io_pc = 0; + cpu->icount_extra = 0; + qatomic_set(&cpu->icount_decr_ptr->u32, 0); + cpu->can_do_io = 1; + cpu->exception_index = -1; + cpu->crash_occurred = false; + cpu->cflags_next_tb = -1; + + if (tcg_enabled()) { + cpu_tb_jmp_cache_clear(cpu); + + tcg_flush_softmmu_tlb(cpu); + } +} + +static bool cpu_common_has_work(CPUState *cs) +{ + return false; +} + +ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model) +{ + CPUClass *cc = CPU_CLASS(object_class_by_name(typename)); + + assert(cpu_model && cc->class_by_name); + return cc->class_by_name(cpu_model); +} + +static void cpu_common_parse_features(const char *typename, char *features, + Error **errp) +{ + char *val; + static bool cpu_globals_initialized; + /* Single "key=value" string being parsed */ + char *featurestr = features ? strtok(features, ",") : NULL; + + /* should be called only once, catch invalid users */ + assert(!cpu_globals_initialized); + cpu_globals_initialized = true; + + while (featurestr) { + val = strchr(featurestr, '='); + if (val) { + GlobalProperty *prop = g_new0(typeof(*prop), 1); + *val = 0; + val++; + prop->driver = typename; + prop->property = g_strdup(featurestr); + prop->value = g_strdup(val); + qdev_prop_register_global(prop); + } else { + error_setg(errp, "Expected key=value format, found %s.", + featurestr); + return; + } + featurestr = strtok(NULL, ","); + } +} + +static void cpu_common_realizefn(DeviceState *dev, Error **errp) +{ + CPUState *cpu = CPU(dev); + Object *machine = qdev_get_machine(); + + /* qdev_get_machine() can return something that's not TYPE_MACHINE + * if this is one of the user-only emulators; in that case there's + * no need to check the ignore_memory_transaction_failures board flag. + */ + if (object_dynamic_cast(machine, TYPE_MACHINE)) { + ObjectClass *oc = object_get_class(machine); + MachineClass *mc = MACHINE_CLASS(oc); + + if (mc) { + cpu->ignore_memory_transaction_failures = + mc->ignore_memory_transaction_failures; + } + } + + if (dev->hotplugged) { + cpu_synchronize_post_init(cpu); + cpu_resume(cpu); + } + + /* NOTE: latest generic point where the cpu is fully realized */ + trace_init_vcpu(cpu); +} + +static void cpu_common_unrealizefn(DeviceState *dev) +{ + CPUState *cpu = CPU(dev); + + /* NOTE: latest generic point before the cpu is fully unrealized */ + trace_fini_vcpu(cpu); + cpu_exec_unrealizefn(cpu); +} + +static void cpu_common_initfn(Object *obj) +{ + CPUState *cpu = CPU(obj); + CPUClass *cc = CPU_GET_CLASS(obj); + + cpu->cpu_index = UNASSIGNED_CPU_INDEX; + cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX; + cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs; + /* *-user doesn't have configurable SMP topology */ + /* the default value is changed by qemu_init_vcpu() for softmmu */ + cpu->nr_cores = 1; + cpu->nr_threads = 1; + + qemu_mutex_init(&cpu->work_mutex); + QSIMPLEQ_INIT(&cpu->work_list); + QTAILQ_INIT(&cpu->breakpoints); + QTAILQ_INIT(&cpu->watchpoints); + + cpu_exec_initfn(cpu); +} + +static void cpu_common_post_initfn(Object *obj) +{ + /* Now that cpu->env_ptr has been initialized set up instruction logging. */ +#ifdef CONFIG_TCG_LOG_INSTR + qemu_log_instr_init(CPU(obj)); +#endif +} + +static void cpu_common_finalize(Object *obj) +{ + CPUState *cpu = CPU(obj); + + qemu_mutex_destroy(&cpu->work_mutex); +} + +static int64_t cpu_common_get_arch_id(CPUState *cpu) +{ + return cpu->cpu_index; +} + +static void cpu_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + CPUClass *k = CPU_CLASS(klass); + + k->parse_features = cpu_common_parse_features; + k->get_arch_id = cpu_common_get_arch_id; + k->has_work = cpu_common_has_work; + k->gdb_read_register = cpu_common_gdb_read_register; + k->gdb_write_register = cpu_common_gdb_write_register; + set_bit(DEVICE_CATEGORY_CPU, dc->categories); + dc->realize = cpu_common_realizefn; + dc->unrealize = cpu_common_unrealizefn; + dc->reset = cpu_common_reset; + cpu_class_init_props(dc); + /* + * Reason: CPUs still need special care by board code: wiring up + * IRQs, adding reset handlers, halting non-first CPUs, ... + */ + dc->user_creatable = false; +} + +static const TypeInfo cpu_type_info = { + .name = TYPE_CPU, + .parent = TYPE_DEVICE, + .instance_size = sizeof(CPUState), + .instance_init = cpu_common_initfn, + .instance_post_init = cpu_common_post_initfn, + .instance_finalize = cpu_common_finalize, + .abstract = true, + .class_size = sizeof(CPUClass), + .class_init = cpu_class_init, +}; + +static void cpu_register_types(void) +{ + type_register_static(&cpu_type_info); +} + +type_init(cpu_register_types) diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c new file mode 100644 index 00000000000..00253f89293 --- /dev/null +++ b/hw/core/cpu-sysemu.c @@ -0,0 +1,145 @@ +/* + * QEMU CPU model (system emulation specific) + * + * Copyright (c) 2012-2014 SUSE LINUX Products GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/core/cpu.h" +#include "hw/core/sysemu-cpu-ops.h" + +bool cpu_paging_enabled(const CPUState *cpu) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->sysemu_ops->get_paging_enabled) { + return cc->sysemu_ops->get_paging_enabled(cpu); + } + + return false; +} + +void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, + Error **errp) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->sysemu_ops->get_memory_mapping) { + cc->sysemu_ops->get_memory_mapping(cpu, list, errp); + return; + } + + error_setg(errp, "Obtaining memory mappings is unsupported on this CPU."); +} + +hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, + MemTxAttrs *attrs) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->sysemu_ops->get_phys_page_attrs_debug) { + return cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr, attrs); + } + /* Fallback for CPUs which don't implement the _attrs_ hook */ + *attrs = MEMTXATTRS_UNSPECIFIED; + return cc->sysemu_ops->get_phys_page_debug(cpu, addr); +} + +hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr) +{ + MemTxAttrs attrs = {}; + + return cpu_get_phys_page_attrs_debug(cpu, addr, &attrs); +} + +int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + int ret = 0; + + if (cc->sysemu_ops->asidx_from_attrs) { + ret = cc->sysemu_ops->asidx_from_attrs(cpu, attrs); + assert(ret < cpu->num_ases && ret >= 0); + } + return ret; +} + +int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (!cc->sysemu_ops->write_elf32_qemunote) { + return 0; + } + return (*cc->sysemu_ops->write_elf32_qemunote)(f, cpu, opaque); +} + +int cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (!cc->sysemu_ops->write_elf32_note) { + return -1; + } + return (*cc->sysemu_ops->write_elf32_note)(f, cpu, cpuid, opaque); +} + +int cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (!cc->sysemu_ops->write_elf64_qemunote) { + return 0; + } + return (*cc->sysemu_ops->write_elf64_qemunote)(f, cpu, opaque); +} + +int cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (!cc->sysemu_ops->write_elf64_note) { + return -1; + } + return (*cc->sysemu_ops->write_elf64_note)(f, cpu, cpuid, opaque); +} + +bool cpu_virtio_is_big_endian(CPUState *cpu) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + + if (cc->sysemu_ops->virtio_is_big_endian) { + return cc->sysemu_ops->virtio_is_big_endian(cpu); + } + return target_words_bigendian(); +} + +GuestPanicInformation *cpu_get_crash_info(CPUState *cpu) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + GuestPanicInformation *res = NULL; + + if (cc->sysemu_ops->get_crash_info) { + res = cc->sysemu_ops->get_crash_info(cpu); + } + return res; +} diff --git a/hw/core/cpu.c b/hw/core/cpu.c deleted file mode 100644 index 9d46cea876a..00000000000 --- a/hw/core/cpu.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * QEMU CPU model - * - * Copyright (c) 2012-2014 SUSE LINUX Products GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see - * - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "hw/core/cpu.h" -#include "sysemu/hw_accel.h" -#include "qemu/notify.h" -#include "qemu/log.h" -#include "qemu/main-loop.h" -#include "exec/log.h" -#include "exec/cpu-common.h" -#include "qemu/error-report.h" -#include "qemu/qemu-print.h" -#include "sysemu/tcg.h" -#include "hw/boards.h" -#include "hw/qdev-properties.h" -#include "trace/trace-root.h" -#include "qemu/plugin.h" -#include "sysemu/hw_accel.h" - -CPUState *cpu_by_arch_id(int64_t id) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - CPUClass *cc = CPU_GET_CLASS(cpu); - - if (cc->get_arch_id(cpu) == id) { - return cpu; - } - } - return NULL; -} - -bool cpu_exists(int64_t id) -{ - return !!cpu_by_arch_id(id); -} - -CPUState *cpu_create(const char *typename) -{ - Error *err = NULL; - CPUState *cpu = CPU(object_new(typename)); - if (!qdev_realize(DEVICE(cpu), NULL, &err)) { - error_report_err(err); - object_unref(OBJECT(cpu)); - exit(EXIT_FAILURE); - } - return cpu; -} - -bool cpu_paging_enabled(const CPUState *cpu) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - return cc->get_paging_enabled(cpu); -} - -static bool cpu_common_get_paging_enabled(const CPUState *cpu) -{ - return false; -} - -void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, - Error **errp) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - cc->get_memory_mapping(cpu, list, errp); -} - -static void cpu_common_get_memory_mapping(CPUState *cpu, - MemoryMappingList *list, - Error **errp) -{ - error_setg(errp, "Obtaining memory mappings is unsupported on this CPU."); -} - -/* Resetting the IRQ comes from across the code base so we take the - * BQL here if we need to. cpu_interrupt assumes it is held.*/ -void cpu_reset_interrupt(CPUState *cpu, int mask) -{ - bool need_lock = !qemu_mutex_iothread_locked(); - - if (need_lock) { - qemu_mutex_lock_iothread(); - } - cpu->interrupt_request &= ~mask; - if (need_lock) { - qemu_mutex_unlock_iothread(); - } -} - -void cpu_exit(CPUState *cpu) -{ - qatomic_set(&cpu->exit_request, 1); - /* Ensure cpu_exec will see the exit request after TCG has exited. */ - smp_wmb(); - qatomic_set(&cpu->icount_decr_ptr->u16.high, -1); -} - -int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - return (*cc->write_elf32_qemunote)(f, cpu, opaque); -} - -static int cpu_common_write_elf32_qemunote(WriteCoreDumpFunction f, - CPUState *cpu, void *opaque) -{ - return 0; -} - -int cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - return (*cc->write_elf32_note)(f, cpu, cpuid, opaque); -} - -static int cpu_common_write_elf32_note(WriteCoreDumpFunction f, - CPUState *cpu, int cpuid, - void *opaque) -{ - return -1; -} - -int cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - return (*cc->write_elf64_qemunote)(f, cpu, opaque); -} - -static int cpu_common_write_elf64_qemunote(WriteCoreDumpFunction f, - CPUState *cpu, void *opaque) -{ - return 0; -} - -int cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - return (*cc->write_elf64_note)(f, cpu, cpuid, opaque); -} - -static int cpu_common_write_elf64_note(WriteCoreDumpFunction f, - CPUState *cpu, int cpuid, - void *opaque) -{ - return -1; -} - - -static int cpu_common_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg) -{ - return 0; -} - -static int cpu_common_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg) -{ - return 0; -} - -static bool cpu_common_virtio_is_big_endian(CPUState *cpu) -{ - return target_words_bigendian(); -} - -/* - * XXX the following #if is always true because this is a common_ss - * module, so target CONFIG_* is never defined. - */ -#if !defined(CONFIG_USER_ONLY) -GuestPanicInformation *cpu_get_crash_info(CPUState *cpu) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - GuestPanicInformation *res = NULL; - - if (cc->get_crash_info) { - res = cc->get_crash_info(cpu); - } - return res; -} -#endif - -void cpu_dump_state(CPUState *cpu, FILE *f, int flags) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - if (cc->dump_state) { - cpu_synchronize_state(cpu); - cc->dump_state(cpu, f, flags); - } -} - -void cpu_dump_statistics(CPUState *cpu, int flags) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - if (cc->dump_statistics) { - cc->dump_statistics(cpu, flags); - } -} - -void cpu_reset(CPUState *cpu) -{ - device_cold_reset(DEVICE(cpu)); - - trace_guest_cpu_reset(cpu); -} - -static void cpu_common_reset(DeviceState *dev) -{ - CPUState *cpu = CPU(dev); - CPUClass *cc = CPU_GET_CLASS(cpu); - - if (qemu_loglevel_mask(CPU_LOG_RESET)) { - qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index); - log_cpu_state(cpu, cc->reset_dump_flags); - } - - cpu->interrupt_request = 0; - cpu->halted = cpu->start_powered_off; - cpu->mem_io_pc = 0; - cpu->icount_extra = 0; - qatomic_set(&cpu->icount_decr_ptr->u32, 0); - cpu->can_do_io = 1; - cpu->exception_index = -1; - cpu->crash_occurred = false; - cpu->cflags_next_tb = -1; - - if (tcg_enabled()) { - cpu_tb_jmp_cache_clear(cpu); - - tcg_flush_softmmu_tlb(cpu); - } -} - -static bool cpu_common_has_work(CPUState *cs) -{ - return false; -} - -ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model) -{ - CPUClass *cc = CPU_CLASS(object_class_by_name(typename)); - - assert(cpu_model && cc->class_by_name); - return cc->class_by_name(cpu_model); -} - -static void cpu_common_parse_features(const char *typename, char *features, - Error **errp) -{ - char *val; - static bool cpu_globals_initialized; - /* Single "key=value" string being parsed */ - char *featurestr = features ? strtok(features, ",") : NULL; - - /* should be called only once, catch invalid users */ - assert(!cpu_globals_initialized); - cpu_globals_initialized = true; - - while (featurestr) { - val = strchr(featurestr, '='); - if (val) { - GlobalProperty *prop = g_new0(typeof(*prop), 1); - *val = 0; - val++; - prop->driver = typename; - prop->property = g_strdup(featurestr); - prop->value = g_strdup(val); - qdev_prop_register_global(prop); - } else { - error_setg(errp, "Expected key=value format, found %s.", - featurestr); - return; - } - featurestr = strtok(NULL, ","); - } -} - -static void cpu_common_realizefn(DeviceState *dev, Error **errp) -{ - CPUState *cpu = CPU(dev); - Object *machine = qdev_get_machine(); - - /* qdev_get_machine() can return something that's not TYPE_MACHINE - * if this is one of the user-only emulators; in that case there's - * no need to check the ignore_memory_transaction_failures board flag. - */ - if (object_dynamic_cast(machine, TYPE_MACHINE)) { - ObjectClass *oc = object_get_class(machine); - MachineClass *mc = MACHINE_CLASS(oc); - - if (mc) { - cpu->ignore_memory_transaction_failures = - mc->ignore_memory_transaction_failures; - } - } - - if (dev->hotplugged) { - cpu_synchronize_post_init(cpu); - cpu_resume(cpu); - } - - /* NOTE: latest generic point where the cpu is fully realized */ - trace_init_vcpu(cpu); -} - -static void cpu_common_unrealizefn(DeviceState *dev) -{ - CPUState *cpu = CPU(dev); - - /* NOTE: latest generic point before the cpu is fully unrealized */ - trace_fini_vcpu(cpu); - cpu_exec_unrealizefn(cpu); -} - -static void cpu_common_initfn(Object *obj) -{ - CPUState *cpu = CPU(obj); - CPUClass *cc = CPU_GET_CLASS(obj); - - cpu->cpu_index = UNASSIGNED_CPU_INDEX; - cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX; - cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs; - /* *-user doesn't have configurable SMP topology */ - /* the default value is changed by qemu_init_vcpu() for softmmu */ - cpu->nr_cores = 1; - cpu->nr_threads = 1; - - qemu_mutex_init(&cpu->work_mutex); - QSIMPLEQ_INIT(&cpu->work_list); - QTAILQ_INIT(&cpu->breakpoints); - QTAILQ_INIT(&cpu->watchpoints); - - cpu_exec_initfn(cpu); -} - -static void cpu_common_post_initfn(Object *obj) -{ - /* Now that cpu->env_ptr has been initialized set up instruction logging. */ -#ifdef CONFIG_TCG_LOG_INSTR - qemu_log_instr_init(CPU(obj)); -#endif -} - -static void cpu_common_finalize(Object *obj) -{ - CPUState *cpu = CPU(obj); - - qemu_mutex_destroy(&cpu->work_mutex); -} - -static int64_t cpu_common_get_arch_id(CPUState *cpu) -{ - return cpu->cpu_index; -} - -static Property cpu_common_props[] = { -#ifndef CONFIG_USER_ONLY - /* Create a memory property for softmmu CPU object, - * so users can wire up its memory. (This can't go in hw/core/cpu.c - * because that file is compiled only once for both user-mode - * and system builds.) The default if no link is set up is to use - * the system address space. - */ - DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION, - MemoryRegion *), -#endif - DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false), - DEFINE_PROP_END_OF_LIST(), -}; - -static void cpu_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - CPUClass *k = CPU_CLASS(klass); - - k->parse_features = cpu_common_parse_features; - k->get_arch_id = cpu_common_get_arch_id; - k->has_work = cpu_common_has_work; - k->get_paging_enabled = cpu_common_get_paging_enabled; - k->get_memory_mapping = cpu_common_get_memory_mapping; - k->write_elf32_qemunote = cpu_common_write_elf32_qemunote; - k->write_elf32_note = cpu_common_write_elf32_note; - k->write_elf64_qemunote = cpu_common_write_elf64_qemunote; - k->write_elf64_note = cpu_common_write_elf64_note; - k->gdb_read_register = cpu_common_gdb_read_register; - k->gdb_write_register = cpu_common_gdb_write_register; - k->virtio_is_big_endian = cpu_common_virtio_is_big_endian; - set_bit(DEVICE_CATEGORY_CPU, dc->categories); - dc->realize = cpu_common_realizefn; - dc->unrealize = cpu_common_unrealizefn; - dc->reset = cpu_common_reset; - device_class_set_props(dc, cpu_common_props); - /* - * Reason: CPUs still need special care by board code: wiring up - * IRQs, adding reset handlers, halting non-first CPUs, ... - */ - dc->user_creatable = false; -} - -static const TypeInfo cpu_type_info = { - .name = TYPE_CPU, - .parent = TYPE_DEVICE, - .instance_size = sizeof(CPUState), - .instance_init = cpu_common_initfn, - .instance_post_init = cpu_common_post_initfn, - .instance_finalize = cpu_common_finalize, - .abstract = true, - .class_size = sizeof(CPUClass), - .class_init = cpu_class_init, -}; - -static void cpu_register_types(void) -{ - type_register_static(&cpu_type_info); -} - -type_init(cpu_register_types) diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c index 2b2a7b5e9aa..d14f932eea2 100644 --- a/hw/core/generic-loader.c +++ b/hw/core/generic-loader.c @@ -32,7 +32,6 @@ #include "qemu/osdep.h" #include "hw/core/cpu.h" -#include "hw/sysbus.h" #include "sysemu/dma.h" #include "sysemu/reset.h" #include "hw/boards.h" diff --git a/hw/core/gpio.c b/hw/core/gpio.c new file mode 100644 index 00000000000..8e6b4f5edf3 --- /dev/null +++ b/hw/core/gpio.c @@ -0,0 +1,197 @@ +/* + * qdev GPIO helpers + * + * Copyright (c) 2009 CodeSourcery + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/qdev-core.h" +#include "hw/irq.h" +#include "qapi/error.h" + +static NamedGPIOList *qdev_get_named_gpio_list(DeviceState *dev, + const char *name) +{ + NamedGPIOList *ngl; + + QLIST_FOREACH(ngl, &dev->gpios, node) { + /* NULL is a valid and matchable name. */ + if (g_strcmp0(name, ngl->name) == 0) { + return ngl; + } + } + + ngl = g_malloc0(sizeof(*ngl)); + ngl->name = g_strdup(name); + QLIST_INSERT_HEAD(&dev->gpios, ngl, node); + return ngl; +} + +void qdev_init_gpio_in_named_with_opaque(DeviceState *dev, + qemu_irq_handler handler, + void *opaque, + const char *name, int n) +{ + int i; + NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name); + + assert(gpio_list->num_out == 0 || !name); + gpio_list->in = qemu_extend_irqs(gpio_list->in, gpio_list->num_in, handler, + opaque, n); + + if (!name) { + name = "unnamed-gpio-in"; + } + for (i = gpio_list->num_in; i < gpio_list->num_in + n; i++) { + gchar *propname = g_strdup_printf("%s[%u]", name, i); + + object_property_add_child(OBJECT(dev), propname, + OBJECT(gpio_list->in[i])); + g_free(propname); + } + + gpio_list->num_in += n; +} + +void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n) +{ + qdev_init_gpio_in_named(dev, handler, NULL, n); +} + +void qdev_init_gpio_out_named(DeviceState *dev, qemu_irq *pins, + const char *name, int n) +{ + int i; + NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name); + + assert(gpio_list->num_in == 0 || !name); + + if (!name) { + name = "unnamed-gpio-out"; + } + memset(pins, 0, sizeof(*pins) * n); + for (i = 0; i < n; ++i) { + gchar *propname = g_strdup_printf("%s[%u]", name, + gpio_list->num_out + i); + + object_property_add_link(OBJECT(dev), propname, TYPE_IRQ, + (Object **)&pins[i], + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); + g_free(propname); + } + gpio_list->num_out += n; +} + +void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n) +{ + qdev_init_gpio_out_named(dev, pins, NULL, n); +} + +qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n) +{ + NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name); + + assert(n >= 0 && n < gpio_list->num_in); + return gpio_list->in[n]; +} + +qemu_irq qdev_get_gpio_in(DeviceState *dev, int n) +{ + return qdev_get_gpio_in_named(dev, NULL, n); +} + +void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n, + qemu_irq pin) +{ + char *propname = g_strdup_printf("%s[%d]", + name ? name : "unnamed-gpio-out", n); + if (pin && !OBJECT(pin)->parent) { + /* We need a name for object_property_set_link to work */ + object_property_add_child(container_get(qdev_get_machine(), + "/unattached"), + "non-qdev-gpio[*]", OBJECT(pin)); + } + object_property_set_link(OBJECT(dev), propname, OBJECT(pin), &error_abort); + g_free(propname); +} + +qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n) +{ + g_autofree char *propname = g_strdup_printf("%s[%d]", + name ? name : "unnamed-gpio-out", n); + + qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname, + NULL); + + return ret; +} + +/* disconnect a GPIO output, returning the disconnected input (if any) */ + +static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, + const char *name, int n) +{ + char *propname = g_strdup_printf("%s[%d]", + name ? name : "unnamed-gpio-out", n); + + qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname, + NULL); + if (ret) { + object_property_set_link(OBJECT(dev), propname, NULL, NULL); + } + g_free(propname); + return ret; +} + +qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt, + const char *name, int n) +{ + qemu_irq disconnected = qdev_disconnect_gpio_out_named(dev, name, n); + qdev_connect_gpio_out_named(dev, name, n, icpt); + return disconnected; +} + +void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin) +{ + qdev_connect_gpio_out_named(dev, NULL, n, pin); +} + +void qdev_pass_gpios(DeviceState *dev, DeviceState *container, + const char *name) +{ + int i; + NamedGPIOList *ngl = qdev_get_named_gpio_list(dev, name); + + for (i = 0; i < ngl->num_in; i++) { + const char *nm = ngl->name ? ngl->name : "unnamed-gpio-in"; + char *propname = g_strdup_printf("%s[%d]", nm, i); + + object_property_add_alias(OBJECT(container), propname, + OBJECT(dev), propname); + g_free(propname); + } + for (i = 0; i < ngl->num_out; i++) { + const char *nm = ngl->name ? ngl->name : "unnamed-gpio-out"; + char *propname = g_strdup_printf("%s[%d]", nm, i); + + object_property_add_alias(OBJECT(container), propname, + OBJECT(dev), propname); + g_free(propname); + } + QLIST_REMOVE(ngl, node); + QLIST_INSERT_HEAD(&container->gpios, ngl, node); +} diff --git a/hw/core/guest-loader.c b/hw/core/guest-loader.c index bde44e27b43..d3f9d1a06eb 100644 --- a/hw/core/guest-loader.c +++ b/hw/core/guest-loader.c @@ -26,7 +26,6 @@ #include "qemu/osdep.h" #include "hw/core/cpu.h" -#include "hw/sysbus.h" #include "sysemu/dma.h" #include "hw/loader.h" #include "hw/qdev-properties.h" diff --git a/hw/core/hotplug-stubs.c b/hw/core/hotplug-stubs.c new file mode 100644 index 00000000000..7aadaa29bd5 --- /dev/null +++ b/hw/core/hotplug-stubs.c @@ -0,0 +1,34 @@ +/* + * Hotplug handler stubs + * + * Copyright (c) Red Hat + * + * Authors: + * Philippe Mathieu-Daudé , + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "hw/qdev-core.h" + +HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev) +{ + return NULL; +} + +void hotplug_handler_pre_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp) +{ + g_assert_not_reached(); +} + +void hotplug_handler_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, + Error **errp) +{ + g_assert_not_reached(); +} diff --git a/hw/core/loader.c b/hw/core/loader.c index d3e5f3b423f..052a0fd7198 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -46,6 +46,8 @@ #include "qemu-common.h" #include "qemu/datadir.h" #include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/type-helpers.h" #include "trace.h" #include "hw/hw.h" #include "disas/disas.h" @@ -57,7 +59,6 @@ #include "hw/loader.h" #include "hw/nvram/fw_cfg.h" #include "exec/memory.h" -#include "exec/address-spaces.h" #include "hw/boards.h" #include "qemu/cutils.h" #include "sysemu/runstate.h" @@ -327,7 +328,7 @@ static void *load_at(int fd, off_t offset, size_t size) #define SZ 64 #include "hw/elf_ops.h" -const char *load_elf_strerror(int error) +const char *load_elf_strerror(ssize_t error) { switch (error) { case 0: @@ -403,12 +404,12 @@ void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp) } /* return < 0 if error, otherwise the number of bytes loaded in memory */ -int load_elf(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, - uint64_t *highaddr, uint32_t *pflags, int big_endian, - int elf_machine, int clear_lsb, int data_swab) +ssize_t load_elf(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, uint32_t *pflags, int big_endian, + int elf_machine, int clear_lsb, int data_swab) { return load_elf_as(filename, elf_note_fn, translate_fn, translate_opaque, pentry, lowaddr, highaddr, pflags, big_endian, @@ -416,12 +417,13 @@ int load_elf(const char *filename, } /* return < 0 if error, otherwise the number of bytes loaded in memory */ -int load_elf_as(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, - uint64_t *highaddr, uint32_t *pflags, int big_endian, - int elf_machine, int clear_lsb, int data_swab, AddressSpace *as) +ssize_t load_elf_as(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, uint32_t *pflags, int big_endian, + int elf_machine, int clear_lsb, int data_swab, + AddressSpace *as) { return load_elf_ram(filename, elf_note_fn, translate_fn, translate_opaque, pentry, lowaddr, highaddr, pflags, big_endian, @@ -429,13 +431,13 @@ int load_elf_as(const char *filename, } /* return < 0 if error, otherwise the number of bytes loaded in memory */ -int load_elf_ram(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, - uint64_t *highaddr, uint32_t *pflags, int big_endian, - int elf_machine, int clear_lsb, int data_swab, - AddressSpace *as, bool load_rom) +ssize_t load_elf_ram(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, + uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags, + int big_endian, int elf_machine, int clear_lsb, + int data_swab, AddressSpace *as, bool load_rom) { return load_elf_ram_sym(filename, elf_note_fn, translate_fn, translate_opaque, @@ -445,16 +447,17 @@ int load_elf_ram(const char *filename, } /* return < 0 if error, otherwise the number of bytes loaded in memory */ -int load_elf_ram_sym(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, - uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags, - int big_endian, int elf_machine, - int clear_lsb, int data_swab, - AddressSpace *as, bool load_rom, symbol_fn_t sym_cb) +ssize_t load_elf_ram_sym(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, + uint64_t *lowaddr, uint64_t *highaddr, + uint32_t *pflags, int big_endian, int elf_machine, + int clear_lsb, int data_swab, + AddressSpace *as, bool load_rom, symbol_fn_t sym_cb) { - int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED; + int fd, data_order, target_data_order, must_swab; + ssize_t ret = ELF_LOAD_FAILED; uint8_t e_ident[EI_NIDENT]; fd = open(filename, O_RDONLY | O_BINARY); @@ -556,24 +559,35 @@ ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen) /* skip header */ i = 10; + if (srclen < 4) { + goto toosmall; + } flags = src[3]; if (src[2] != DEFLATED || (flags & RESERVED) != 0) { puts ("Error: Bad gzipped data\n"); return -1; } - if ((flags & EXTRA_FIELD) != 0) + if ((flags & EXTRA_FIELD) != 0) { + if (srclen < 12) { + goto toosmall; + } i = 12 + src[10] + (src[11] << 8); - if ((flags & ORIG_NAME) != 0) - while (src[i++] != 0) - ; - if ((flags & COMMENT) != 0) - while (src[i++] != 0) - ; - if ((flags & HEAD_CRC) != 0) + } + if ((flags & ORIG_NAME) != 0) { + while (i < srclen && src[i++] != 0) { + /* do nothing */ + } + } + if ((flags & COMMENT) != 0) { + while (i < srclen && src[i++] != 0) { + /* do nothing */ + } + } + if ((flags & HEAD_CRC) != 0) { i += 2; + } if (i >= srclen) { - puts ("Error: gunzip out of data in header\n"); - return -1; + goto toosmall; } s.zalloc = zalloc; @@ -597,6 +611,10 @@ ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen) inflateEnd(&s); return dstbytes; + +toosmall: + puts("Error: gunzip out of data in header\n"); + return -1; } /* Load a U-Boot image. */ @@ -1458,32 +1476,35 @@ void *rom_ptr_for_as(AddressSpace *as, hwaddr addr, size_t size) return cbdata.rom; } -void hmp_info_roms(Monitor *mon, const QDict *qdict) +HumanReadableText *qmp_x_query_roms(Error **errp) { Rom *rom; + g_autoptr(GString) buf = g_string_new(""); QTAILQ_FOREACH(rom, &roms, next) { if (rom->mr) { - monitor_printf(mon, "%s" - " size=0x%06zx name=\"%s\"\n", - memory_region_name(rom->mr), - rom->romsize, - rom->name); + g_string_append_printf(buf, "%s" + " size=0x%06zx name=\"%s\"\n", + memory_region_name(rom->mr), + rom->romsize, + rom->name); } else if (!rom->fw_file) { - monitor_printf(mon, "addr=" TARGET_FMT_plx - " size=0x%06zx mem=%s name=\"%s\"\n", - rom->addr, rom->romsize, - rom->isrom ? "rom" : "ram", - rom->name); + g_string_append_printf(buf, "addr=" TARGET_FMT_plx + " size=0x%06zx mem=%s name=\"%s\"\n", + rom->addr, rom->romsize, + rom->isrom ? "rom" : "ram", + rom->name); } else { - monitor_printf(mon, "fw=%s/%s" - " size=0x%06zx name=\"%s\"\n", - rom->fw_dir, - rom->fw_file, - rom->romsize, - rom->name); + g_string_append_printf(buf, "fw=%s/%s" + " size=0x%06zx name=\"%s\"\n", + rom->fw_dir, + rom->fw_file, + rom->romsize, + rom->name); } } + + return human_readable_text_from_str(buf); } typedef enum HexRecord HexRecord; diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c index 58248cffa37..4e2f319aebd 100644 --- a/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c @@ -53,8 +53,7 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) HotpluggableCPUList *saved = l; CpuInstanceProperties *c; - if (err != NULL) { - hmp_handle_error(mon, err); + if (hmp_handle_error(mon, err)) { return; } @@ -110,6 +109,12 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict) m->value->dump ? "true" : "false"); monitor_printf(mon, " prealloc: %s\n", m->value->prealloc ? "true" : "false"); + monitor_printf(mon, " share: %s\n", + m->value->share ? "true" : "false"); + if (m->value->has_reserve) { + monitor_printf(mon, " reserve: %s\n", + m->value->reserve ? "true" : "false"); + } monitor_printf(mon, " policy: %s\n", HostMemPolicy_str(m->value->policy)); visit_complete(v, &str); @@ -125,38 +130,3 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict) qapi_free_MemdevList(memdev_list); hmp_handle_error(mon, err); } - -void hmp_info_numa(Monitor *mon, const QDict *qdict) -{ - int i, nb_numa_nodes; - NumaNodeMem *node_mem; - CpuInfoFastList *cpu_list, *cpu; - MachineState *ms = MACHINE(qdev_get_machine()); - - nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; - monitor_printf(mon, "%d nodes\n", nb_numa_nodes); - if (!nb_numa_nodes) { - return; - } - - cpu_list = qmp_query_cpus_fast(&error_abort); - node_mem = g_new0(NumaNodeMem, nb_numa_nodes); - - query_numa_node_mem(node_mem, ms); - for (i = 0; i < nb_numa_nodes; i++) { - monitor_printf(mon, "node %d cpus:", i); - for (cpu = cpu_list; cpu; cpu = cpu->next) { - if (cpu->value->has_props && cpu->value->props->has_node_id && - cpu->value->props->node_id == i) { - monitor_printf(mon, " %" PRIi64, cpu->value->cpu_index); - } - } - monitor_printf(mon, "\n"); - monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i, - node_mem[i].node_mem >> 20); - monitor_printf(mon, "node %d plugged: %" PRId64 " MB\n", i, - node_mem[i].node_plugged_mem >> 20); - } - qapi_free_CpuInfoFastList(cpu_list); - g_free(node_mem); -} diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c index 68a942595a2..4f4ab30f8c3 100644 --- a/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c @@ -8,7 +8,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "hw/boards.h" #include "qapi/error.h" #include "qapi/qapi-builtin-visit.h" @@ -16,13 +15,13 @@ #include "qapi/qmp/qerror.h" #include "qapi/qmp/qobject.h" #include "qapi/qobject-input-visitor.h" +#include "qapi/type-helpers.h" #include "qemu/main-loop.h" #include "qom/qom-qobject.h" #include "sysemu/hostmem.h" #include "sysemu/hw_accel.h" #include "sysemu/numa.h" #include "sysemu/runstate.h" -#include "sysemu/sysemu.h" static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu) { @@ -159,6 +158,7 @@ void qmp_set_numa_node(NumaOptions *cmd, Error **errp) static int query_memdev(Object *obj, void *opaque) { + Error *err = NULL; MemdevList **list = opaque; Memdev *m; QObject *host_nodes; @@ -174,6 +174,13 @@ static int query_memdev(Object *obj, void *opaque) m->merge = object_property_get_bool(obj, "merge", &error_abort); m->dump = object_property_get_bool(obj, "dump", &error_abort); m->prealloc = object_property_get_bool(obj, "prealloc", &error_abort); + m->share = object_property_get_bool(obj, "share", &error_abort); + m->reserve = object_property_get_bool(obj, "reserve", &err); + if (err) { + error_free_or_abort(&err); + } else { + m->has_reserve = true; + } m->policy = object_property_get_enum(obj, "policy", "HostMemPolicy", &error_abort); host_nodes = object_property_get_qobject(obj, @@ -198,3 +205,42 @@ MemdevList *qmp_query_memdev(Error **errp) object_child_foreach(obj, query_memdev, &list); return list; } + +HumanReadableText *qmp_x_query_numa(Error **errp) +{ + g_autoptr(GString) buf = g_string_new(""); + int i, nb_numa_nodes; + NumaNodeMem *node_mem; + CpuInfoFastList *cpu_list, *cpu; + MachineState *ms = MACHINE(qdev_get_machine()); + + nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; + g_string_append_printf(buf, "%d nodes\n", nb_numa_nodes); + if (!nb_numa_nodes) { + goto done; + } + + cpu_list = qmp_query_cpus_fast(&error_abort); + node_mem = g_new0(NumaNodeMem, nb_numa_nodes); + + query_numa_node_mem(node_mem, ms); + for (i = 0; i < nb_numa_nodes; i++) { + g_string_append_printf(buf, "node %d cpus:", i); + for (cpu = cpu_list; cpu; cpu = cpu->next) { + if (cpu->value->has_props && cpu->value->props->has_node_id && + cpu->value->props->node_id == i) { + g_string_append_printf(buf, " %" PRIi64, cpu->value->cpu_index); + } + } + g_string_append_printf(buf, "\n"); + g_string_append_printf(buf, "node %d size: %" PRId64 " MB\n", i, + node_mem[i].node_mem >> 20); + g_string_append_printf(buf, "node %d plugged: %" PRId64 " MB\n", i, + node_mem[i].node_plugged_mem >> 20); + } + qapi_free_CpuInfoFastList(cpu_list); + g_free(node_mem); + + done: + return human_readable_text_from_str(buf); +} diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c new file mode 100644 index 00000000000..116a0cbbfab --- /dev/null +++ b/hw/core/machine-smp.c @@ -0,0 +1,181 @@ +/* + * QEMU Machine core (related to -smp parsing) + * + * Copyright (c) 2021 Huawei Technologies Co., Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/boards.h" +#include "qapi/error.h" + +/* + * Report information of a machine's supported CPU topology hierarchy. + * Topology members will be ordered from the largest to the smallest + * in the string. + */ +static char *cpu_hierarchy_to_string(MachineState *ms) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + GString *s = g_string_new(NULL); + + g_string_append_printf(s, "sockets (%u)", ms->smp.sockets); + + if (mc->smp_props.dies_supported) { + g_string_append_printf(s, " * dies (%u)", ms->smp.dies); + } + + g_string_append_printf(s, " * cores (%u)", ms->smp.cores); + g_string_append_printf(s, " * threads (%u)", ms->smp.threads); + + return g_string_free(s, false); +} + +/* + * smp_parse - Generic function used to parse the given SMP configuration + * + * Any missing parameter in "cpus/maxcpus/sockets/cores/threads" will be + * automatically computed based on the provided ones. + * + * In the calculation of omitted sockets/cores/threads: we prefer sockets + * over cores over threads before 6.2, while preferring cores over sockets + * over threads since 6.2. + * + * In the calculation of cpus/maxcpus: When both maxcpus and cpus are omitted, + * maxcpus will be computed from the given parameters and cpus will be set + * equal to maxcpus. When only one of maxcpus and cpus is given then the + * omitted one will be set to its given counterpart's value. Both maxcpus and + * cpus may be specified, but maxcpus must be equal to or greater than cpus. + * + * For compatibility, apart from the parameters that will be computed, newly + * introduced topology members which are likely to be target specific should + * be directly set as 1 if they are omitted (e.g. dies for PC since 4.1). + */ +void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + unsigned cpus = config->has_cpus ? config->cpus : 0; + unsigned sockets = config->has_sockets ? config->sockets : 0; + unsigned dies = config->has_dies ? config->dies : 0; + unsigned cores = config->has_cores ? config->cores : 0; + unsigned threads = config->has_threads ? config->threads : 0; + unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0; + + /* + * Specified CPU topology parameters must be greater than zero, + * explicit configuration like "cpus=0" is not allowed. + */ + if ((config->has_cpus && config->cpus == 0) || + (config->has_sockets && config->sockets == 0) || + (config->has_dies && config->dies == 0) || + (config->has_cores && config->cores == 0) || + (config->has_threads && config->threads == 0) || + (config->has_maxcpus && config->maxcpus == 0)) { + warn_report("Deprecated CPU topology (considered invalid): " + "CPU topology parameters must be greater than zero"); + } + + /* + * If not supported by the machine, a topology parameter must be + * omitted or specified equal to 1. + */ + if (!mc->smp_props.dies_supported && dies > 1) { + error_setg(errp, "dies not supported by this machine's CPU topology"); + return; + } + + dies = dies > 0 ? dies : 1; + + /* compute missing values based on the provided ones */ + if (cpus == 0 && maxcpus == 0) { + sockets = sockets > 0 ? sockets : 1; + cores = cores > 0 ? cores : 1; + threads = threads > 0 ? threads : 1; + } else { + maxcpus = maxcpus > 0 ? maxcpus : cpus; + + if (mc->smp_props.prefer_sockets) { + /* prefer sockets over cores before 6.2 */ + if (sockets == 0) { + cores = cores > 0 ? cores : 1; + threads = threads > 0 ? threads : 1; + sockets = maxcpus / (dies * cores * threads); + } else if (cores == 0) { + threads = threads > 0 ? threads : 1; + cores = maxcpus / (sockets * dies * threads); + } + } else { + /* prefer cores over sockets since 6.2 */ + if (cores == 0) { + sockets = sockets > 0 ? sockets : 1; + threads = threads > 0 ? threads : 1; + cores = maxcpus / (sockets * dies * threads); + } else if (sockets == 0) { + threads = threads > 0 ? threads : 1; + sockets = maxcpus / (dies * cores * threads); + } + } + + /* try to calculate omitted threads at last */ + if (threads == 0) { + threads = maxcpus / (sockets * dies * cores); + } + } + + maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * cores * threads; + cpus = cpus > 0 ? cpus : maxcpus; + + ms->smp.cpus = cpus; + ms->smp.sockets = sockets; + ms->smp.dies = dies; + ms->smp.cores = cores; + ms->smp.threads = threads; + ms->smp.max_cpus = maxcpus; + + /* sanity-check of the computed topology */ + if (sockets * dies * cores * threads != maxcpus) { + g_autofree char *topo_msg = cpu_hierarchy_to_string(ms); + error_setg(errp, "Invalid CPU topology: " + "product of the hierarchy must match maxcpus: " + "%s != maxcpus (%u)", + topo_msg, maxcpus); + return; + } + + if (maxcpus < cpus) { + g_autofree char *topo_msg = cpu_hierarchy_to_string(ms); + error_setg(errp, "Invalid CPU topology: " + "maxcpus must be equal to or greater than smp: " + "%s == maxcpus (%u) < smp_cpus (%u)", + topo_msg, maxcpus, cpus); + return; + } + + if (ms->smp.cpus < mc->min_cpus) { + error_setg(errp, "Invalid SMP CPUs %d. The min CPUs " + "supported by machine '%s' is %d", + ms->smp.cpus, + mc->name, mc->min_cpus); + return; + } + + if (ms->smp.max_cpus > mc->max_cpus) { + error_setg(errp, "Invalid SMP CPUs %d. The max CPUs " + "supported by machine '%s' is %d", + ms->smp.max_cpus, + mc->name, mc->max_cpus); + return; + } +} diff --git a/hw/core/machine.c b/hw/core/machine.c index 40def78183a..53a99abc560 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -19,6 +19,7 @@ #include "hw/loader.h" #include "qapi/error.h" #include "qapi/qapi-visit-common.h" +#include "qapi/qapi-visit-machine.h" #include "qapi/visitor.h" #include "hw/sysbus.h" #include "sysemu/cpus.h" @@ -36,11 +37,27 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" +GlobalProperty hw_compat_6_1[] = { + { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, +}; +const size_t hw_compat_6_1_len = G_N_ELEMENTS(hw_compat_6_1); + +GlobalProperty hw_compat_6_0[] = { + { "gpex-pcihost", "allow-unmapped-accesses", "false" }, + { "i8042", "extended-state", "false"}, + { "nvme-ns", "eui64-default", "off"}, + { "e1000", "init-vet", "off" }, + { "e1000e", "init-vet", "off" }, + { "vhost-vsock-device", "seqpacket", "off" }, +}; +const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0); + GlobalProperty hw_compat_5_2[] = { { "ICH9-LPC", "smm-compat", "on"}, { "PIIX4_PM", "smm-compat", "on"}, { "virtio-blk-device", "report-discard-granularity", "off" }, - { "virtio-net-pci", "vectors", "3"}, + { "virtio-net-pci-base", "vectors", "3"}, }; const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2); @@ -532,35 +549,30 @@ void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type) bool device_is_dynamic_sysbus(MachineClass *mc, DeviceState *dev) { - bool allowed = false; - strList *wl; Object *obj = OBJECT(dev); if (!object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE)) { return false; } + return device_type_is_dynamic_sysbus(mc, object_get_typename(obj)); +} + +bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type) +{ + bool allowed = false; + strList *wl; + ObjectClass *klass = object_class_by_name(type); + for (wl = mc->allowed_dynamic_sysbus_devices; !allowed && wl; wl = wl->next) { - allowed |= !!object_dynamic_cast(obj, wl->value); + allowed |= !!object_class_dynamic_cast(klass, wl->value); } return allowed; } -static void validate_sysbus_device(SysBusDevice *sbdev, void *opaque) -{ - MachineState *machine = opaque; - MachineClass *mc = MACHINE_GET_CLASS(machine); - - if (!device_is_dynamic_sysbus(mc, DEVICE(sbdev))) { - error_report("Option '-device %s' cannot be handled by this machine", - object_class_get_name(object_get_class(OBJECT(sbdev)))); - exit(1); - } -} - static char *machine_get_memdev(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -576,18 +588,6 @@ static void machine_set_memdev(Object *obj, const char *value, Error **errp) ms->ram_memdev_id = g_strdup(value); } - -static void machine_init_notify(Notifier *notifier, void *data) -{ - MachineState *machine = MACHINE(qdev_get_machine()); - - /* - * Loop through all dynamically created sysbus devices and check if they are - * all allowed. If a device is not allowed, error out. - */ - foreach_dynamic_sysbus_device(validate_sysbus_device, machine); -} - HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) { int i; @@ -720,7 +720,8 @@ void machine_set_cpu_numa_node(MachineState *machine, if ((numa_info[props->node_id].initiator < MAX_NODES) && (props->node_id != numa_info[props->node_id].initiator)) { error_setg(errp, "The initiator of CPU NUMA node %" PRId64 - " should be itself", props->node_id); + " should be itself (got %" PRIu16 ")", + props->node_id, numa_info[props->node_id].initiator); return; } numa_info[props->node_id].has_cpu = true; @@ -733,69 +734,34 @@ void machine_set_cpu_numa_node(MachineState *machine, } } -static void smp_parse(MachineState *ms, QemuOpts *opts) +static void machine_get_smp(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { - if (opts) { - unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); - unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); - unsigned cores = qemu_opt_get_number(opts, "cores", 0); - unsigned threads = qemu_opt_get_number(opts, "threads", 0); - - /* compute missing values, prefer sockets over cores over threads */ - if (cpus == 0 || sockets == 0) { - cores = cores > 0 ? cores : 1; - threads = threads > 0 ? threads : 1; - if (cpus == 0) { - sockets = sockets > 0 ? sockets : 1; - cpus = cores * threads * sockets; - } else { - ms->smp.max_cpus = - qemu_opt_get_number(opts, "maxcpus", cpus); - sockets = ms->smp.max_cpus / (cores * threads); - } - } else if (cores == 0) { - threads = threads > 0 ? threads : 1; - cores = cpus / (sockets * threads); - cores = cores > 0 ? cores : 1; - } else if (threads == 0) { - threads = cpus / (cores * sockets); - threads = threads > 0 ? threads : 1; - } else if (sockets * cores * threads < cpus) { - error_report("cpu topology: " - "sockets (%u) * cores (%u) * threads (%u) < " - "smp_cpus (%u)", - sockets, cores, threads, cpus); - exit(1); - } - - ms->smp.max_cpus = - qemu_opt_get_number(opts, "maxcpus", cpus); - - if (ms->smp.max_cpus < cpus) { - error_report("maxcpus must be equal to or greater than smp"); - exit(1); - } + MachineState *ms = MACHINE(obj); + SMPConfiguration *config = &(SMPConfiguration){ + .has_cpus = true, .cpus = ms->smp.cpus, + .has_sockets = true, .sockets = ms->smp.sockets, + .has_dies = true, .dies = ms->smp.dies, + .has_cores = true, .cores = ms->smp.cores, + .has_threads = true, .threads = ms->smp.threads, + .has_maxcpus = true, .maxcpus = ms->smp.max_cpus, + }; + if (!visit_type_SMPConfiguration(v, name, &config, &error_abort)) { + return; + } +} - if (sockets * cores * threads != ms->smp.max_cpus) { - error_report("Invalid CPU topology: " - "sockets (%u) * cores (%u) * threads (%u) " - "!= maxcpus (%u)", - sockets, cores, threads, - ms->smp.max_cpus); - exit(1); - } +static void machine_set_smp(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + MachineState *ms = MACHINE(obj); + g_autoptr(SMPConfiguration) config = NULL; - ms->smp.cpus = cpus; - ms->smp.cores = cores; - ms->smp.threads = threads; - ms->smp.sockets = sockets; + if (!visit_type_SMPConfiguration(v, name, &config, errp)) { + return; } - if (ms->smp.cpus > 1) { - Error *blocker = NULL; - error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); - replay_add_blocker(blocker); - } + smp_parse(ms, config, errp); } static void machine_class_init(ObjectClass *oc, void *data) @@ -805,7 +771,6 @@ static void machine_class_init(ObjectClass *oc, void *data) /* Default 128 MB as guest ram size */ mc->default_ram_size = 128 * MiB; mc->rom_file_has_mr = true; - mc->smp_parse = smp_parse; /* numa node memory size aligned on 8MB by default. * On Linux, each node's border has to be 8MB aligned @@ -837,6 +802,12 @@ static void machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "dumpdtb", "Dump current dtb to a file and quit"); + object_class_property_add(oc, "smp", "SMPConfiguration", + machine_get_smp, machine_set_smp, + NULL, NULL); + object_class_property_set_description(oc, "smp", + "CPU topology"); + object_class_property_add(oc, "phandle-start", "int", machine_get_phandle_start, machine_set_phandle_start, NULL, NULL); @@ -956,16 +927,13 @@ static void machine_initfn(Object *obj) "Table (HMAT)"); } - /* Register notifier when init is done for sysbus sanity checks */ - ms->sysbus_notifier.notify = machine_init_notify; - qemu_add_machine_init_done_notifier(&ms->sysbus_notifier); - /* default to mc->default_cpus */ ms->smp.cpus = mc->default_cpus; ms->smp.max_cpus = mc->default_cpus; + ms->smp.sockets = 1; + ms->smp.dies = 1; ms->smp.cores = 1; ms->smp.threads = 1; - ms->smp.sockets = 1; } static void machine_finalize(Object *obj) @@ -1011,6 +979,9 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id); } if (cpu->props.has_die_id) { + if (s->len) { + g_string_append_printf(s, ", "); + } g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id); } if (cpu->props.has_core_id) { @@ -1124,29 +1095,6 @@ MemoryRegion *machine_consume_memdev(MachineState *machine, return ret; } -bool machine_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp) -{ - MachineClass *mc = MACHINE_GET_CLASS(ms); - - mc->smp_parse(ms, opts); - - /* sanity-check smp_cpus and max_cpus against mc */ - if (ms->smp.cpus < mc->min_cpus) { - error_setg(errp, "Invalid SMP CPUs %d. The min CPUs " - "supported by machine '%s' is %d", - ms->smp.cpus, - mc->name, mc->min_cpus); - return false; - } else if (ms->smp.max_cpus > mc->max_cpus) { - error_setg(errp, "Invalid SMP CPUs %d. The max CPUs " - "supported by machine '%s' is %d", - current_machine->smp.max_cpus, - mc->name, mc->max_cpus); - return false; - } - return true; -} - void machine_run_board_init(MachineState *machine) { MachineClass *machine_class = MACHINE_GET_CLASS(machine); @@ -1229,6 +1177,7 @@ void machine_run_board_init(MachineState *machine) "on", false); } + accel_init_interfaces(ACCEL_GET_CLASS(machine->accelerator)); machine_class->init(machine); phase_advance(PHASE_MACHINE_INITIALIZED); } diff --git a/hw/core/meson.build b/hw/core/meson.build index 59f1605bb07..0f884d6fd4d 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -1,7 +1,6 @@ # core qdev-related obj files, also used by *-user and unit tests -hwcore_files = files( +hwcore_ss.add(files( 'bus.c', - 'hotplug.c', 'qdev-properties.c', 'qdev.c', 'reset.c', @@ -11,21 +10,34 @@ hwcore_files = files( 'irq.c', 'clock.c', 'qdev-clock.c', -) +)) +if have_system + hwcore_ss.add(files( + 'hotplug.c', + 'qdev-hotplug.c', + )) +else + hwcore_ss.add(files( + 'hotplug-stubs.c', + )) +endif -common_ss.add(files('cpu.c')) -common_ss.add(when: 'CONFIG_FITLOADER', if_true: files('loader-fit.c')) -common_ss.add(when: 'CONFIG_GENERIC_LOADER', if_true: files('generic-loader.c')) -common_ss.add(when: ['CONFIG_GUEST_LOADER', fdt], if_true: files('guest-loader.c')) -common_ss.add(when: 'CONFIG_OR_IRQ', if_true: files('or-irq.c')) -common_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('platform-bus.c')) -common_ss.add(when: 'CONFIG_PTIMER', if_true: files('ptimer.c')) -common_ss.add(when: 'CONFIG_REGISTER', if_true: files('register.c')) -common_ss.add(when: 'CONFIG_SPLIT_IRQ', if_true: files('split-irq.c')) -common_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c')) +common_ss.add(files('cpu-common.c')) +common_ss.add(files('machine-smp.c')) +softmmu_ss.add(when: 'CONFIG_FITLOADER', if_true: files('loader-fit.c')) +softmmu_ss.add(when: 'CONFIG_GENERIC_LOADER', if_true: files('generic-loader.c')) +softmmu_ss.add(when: ['CONFIG_GUEST_LOADER', fdt], if_true: files('guest-loader.c')) +softmmu_ss.add(when: 'CONFIG_OR_IRQ', if_true: files('or-irq.c')) +softmmu_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('platform-bus.c')) +softmmu_ss.add(when: 'CONFIG_PTIMER', if_true: files('ptimer.c')) +softmmu_ss.add(when: 'CONFIG_REGISTER', if_true: files('register.c')) +softmmu_ss.add(when: 'CONFIG_SPLIT_IRQ', if_true: files('split-irq.c')) +softmmu_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c')) softmmu_ss.add(files( + 'cpu-sysemu.c', 'fw-path-provider.c', + 'gpio.c', 'loader.c', 'machine-hmp-cmds.c', 'machine.c', diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c index 7e693523d75..f586a4bef54 100644 --- a/hw/core/null-machine.c +++ b/hw/core/null-machine.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "hw/boards.h" -#include "sysemu/sysemu.h" #include "exec/address-spaces.h" #include "hw/core/cpu.h" diff --git a/hw/core/numa.c b/hw/core/numa.c index 68cee65f614..e6050b22739 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -26,7 +26,6 @@ #include "qemu/units.h" #include "sysemu/hostmem.h" #include "sysemu/numa.h" -#include "sysemu/sysemu.h" #include "exec/cpu-common.h" #include "exec/ramlist.h" #include "qemu/bitmap.h" @@ -89,6 +88,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, return; } + /* + * If not set the initiator, set it to MAX_NODES. And if + * HMAT is enabled and this node has no cpus, QEMU will raise error. + */ + numa_info[nodenr].initiator = MAX_NODES; + if (node->has_initiator) { + if (!ms->numa_state->hmat_enabled) { + error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " + "(HMAT) is disabled, enable it with -machine hmat=on " + "before using any of hmat specific options"); + return; + } + + if (node->initiator >= MAX_NODES) { + error_report("The initiator id %" PRIu16 " expects an integer " + "between 0 and %d", node->initiator, + MAX_NODES - 1); + return; + } + + numa_info[nodenr].initiator = node->initiator; + } + for (cpus = node->cpus; cpus; cpus = cpus->next) { CpuInstanceProperties props; if (cpus->value >= max_cpus) { @@ -143,28 +165,6 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); } - /* - * If not set the initiator, set it to MAX_NODES. And if - * HMAT is enabled and this node has no cpus, QEMU will raise error. - */ - numa_info[nodenr].initiator = MAX_NODES; - if (node->has_initiator) { - if (!ms->numa_state->hmat_enabled) { - error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " - "(HMAT) is disabled, enable it with -machine hmat=on " - "before using any of hmat specific options"); - return; - } - - if (node->initiator >= MAX_NODES) { - error_report("The initiator id %" PRIu16 " expects an integer " - "between 0 and %d", node->initiator, - MAX_NODES - 1); - return; - } - - numa_info[nodenr].initiator = node->initiator; - } numa_info[nodenr].present = true; max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); ms->numa_state->num_nodes++; @@ -756,6 +756,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) PCDIMMDeviceInfo *pcdimm_info; VirtioPMEMDeviceInfo *vpi; VirtioMEMDeviceInfo *vmi; + SgxEPCDeviceInfo *se; for (info = info_list; info; info = info->next) { MemoryDeviceInfo *value = info->value; @@ -781,6 +782,12 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) node_mem[vmi->node].node_mem += vmi->size; node_mem[vmi->node].node_plugged_mem += vmi->size; break; + case MEMORY_DEVICE_INFO_KIND_SGX_EPC: + se = value->u.sgx_epc.data; + /* TODO: once we support numa, assign to right node */ + node_mem[0].node_mem += se->size; + node_mem[0].node_plugged_mem += se->size; + break; default: g_assert_not_reached(); } @@ -803,9 +810,27 @@ void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms) } } +static int ram_block_notify_add_single(RAMBlock *rb, void *opaque) +{ + const ram_addr_t max_size = qemu_ram_get_max_length(rb); + const ram_addr_t size = qemu_ram_get_used_length(rb); + void *host = qemu_ram_get_host_addr(rb); + RAMBlockNotifier *notifier = opaque; + + if (host) { + notifier->ram_block_added(notifier, host, size, max_size); + } + return 0; +} + void ram_block_notifier_add(RAMBlockNotifier *n) { QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); + + /* Notify about all existing ram blocks. */ + if (n->ram_block_added) { + qemu_ram_foreach_block(ram_block_notify_add_single, n); + } } void ram_block_notifier_remove(RAMBlockNotifier *n) @@ -813,20 +838,35 @@ void ram_block_notifier_remove(RAMBlockNotifier *n) QLIST_REMOVE(n, next); } -void ram_block_notify_add(void *host, size_t size) +void ram_block_notify_add(void *host, size_t size, size_t max_size) +{ + RAMBlockNotifier *notifier; + + QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + if (notifier->ram_block_added) { + notifier->ram_block_added(notifier, host, size, max_size); + } + } +} + +void ram_block_notify_remove(void *host, size_t size, size_t max_size) { RAMBlockNotifier *notifier; QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { - notifier->ram_block_added(notifier, host, size); + if (notifier->ram_block_removed) { + notifier->ram_block_removed(notifier, host, size, max_size); + } } } -void ram_block_notify_remove(void *host, size_t size) +void ram_block_notify_resize(void *host, size_t old_size, size_t new_size) { RAMBlockNotifier *notifier; QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { - notifier->ram_block_removed(notifier, host, size); + if (notifier->ram_block_resized) { + notifier->ram_block_resized(notifier, host, old_size, new_size); + } } } diff --git a/hw/core/qdev-hotplug.c b/hw/core/qdev-hotplug.c new file mode 100644 index 00000000000..d495d0e9c70 --- /dev/null +++ b/hw/core/qdev-hotplug.c @@ -0,0 +1,73 @@ +/* + * QDev Hotplug handlers + * + * Copyright (c) Red Hat + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/qdev-core.h" +#include "hw/boards.h" + +HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev) +{ + MachineState *machine; + MachineClass *mc; + Object *m_obj = qdev_get_machine(); + + if (object_dynamic_cast(m_obj, TYPE_MACHINE)) { + machine = MACHINE(m_obj); + mc = MACHINE_GET_CLASS(machine); + if (mc->get_hotplug_handler) { + return mc->get_hotplug_handler(machine, dev); + } + } + + return NULL; +} + +bool qdev_hotplug_allowed(DeviceState *dev, Error **errp) +{ + MachineState *machine; + MachineClass *mc; + Object *m_obj = qdev_get_machine(); + + if (object_dynamic_cast(m_obj, TYPE_MACHINE)) { + machine = MACHINE(m_obj); + mc = MACHINE_GET_CLASS(machine); + if (mc->hotplug_allowed) { + return mc->hotplug_allowed(machine, dev, errp); + } + } + + return true; +} + +HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev) +{ + if (dev->parent_bus) { + return dev->parent_bus->hotplug_handler; + } + return NULL; +} + +HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_machine_hotplug_handler(dev); + + if (hotplug_ctrl == NULL && dev->parent_bus) { + hotplug_ctrl = qdev_get_bus_hotplug_handler(dev); + } + return hotplug_ctrl; +} + +/* can be used as ->unplug() callback for the simple cases */ +void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + qdev_unrealize(dev); +} diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 2760c21f111..a91f60567aa 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -36,11 +36,11 @@ static bool check_prop_still_unset(Object *obj, const char *name, const void *old_val, const char *new_val, - Error **errp) + bool allow_override, Error **errp) { const GlobalProperty *prop = qdev_find_global_prop(obj, name); - if (!old_val) { + if (!old_val || (!prop && allow_override)) { return true; } @@ -93,16 +93,34 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, BlockBackend *blk; bool blk_created = false; int ret; + BlockDriverState *bs; + AioContext *ctx; if (!visit_type_str(v, name, &str, errp)) { return; } - /* - * TODO Should this really be an error? If no, the old value - * needs to be released before we store the new one. - */ - if (!check_prop_still_unset(obj, name, *ptr, str, errp)) { + if (!check_prop_still_unset(obj, name, *ptr, str, true, errp)) { + return; + } + + if (*ptr) { + /* BlockBackend alread exists. So, we want to change attached node */ + blk = *ptr; + ctx = blk_get_aio_context(blk); + bs = bdrv_lookup_bs(NULL, str, errp); + if (!bs) { + return; + } + + if (ctx != bdrv_get_aio_context(bs)) { + error_setg(errp, "Different aio context is not supported for new " + "node"); + } + + aio_context_acquire(ctx); + blk_replace_bs(blk, bs, errp); + aio_context_release(ctx); return; } @@ -114,7 +132,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, blk = blk_by_name(str); if (!blk) { - BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); + bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { /* * If the device supports iothreads, it will make sure to move the @@ -123,8 +141,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, * aware of iothreads require their BlockBackends to be in the main * AioContext. */ - AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : - qemu_get_aio_context(); + ctx = iothread ? bdrv_get_aio_context(bs) : qemu_get_aio_context(); blk = blk_new(ctx, 0, BLK_PERM_ALL); blk_created = true; @@ -196,6 +213,7 @@ static void release_drive(Object *obj, const char *name, void *opaque) const PropertyInfo qdev_prop_drive = { .name = "str", .description = "Node name or ID of a block device to use as a backend", + .realized_set_allowed = true, .get = get_drive, .set = set_drive, .release = release_drive, @@ -204,6 +222,7 @@ const PropertyInfo qdev_prop_drive = { const PropertyInfo qdev_prop_drive_iothread = { .name = "str", .description = "Node name or ID of a block device to use as a backend", + .realized_set_allowed = true, .get = get_drive, .set = set_drive_iothread, .release = release_drive, @@ -238,7 +257,7 @@ static void set_chr(Object *obj, Visitor *v, const char *name, void *opaque, * TODO Should this really be an error? If no, the old value * needs to be released before we store the new one. */ - if (!check_prop_still_unset(obj, name, be->chr, str, errp)) { + if (!check_prop_still_unset(obj, name, be->chr, str, false, errp)) { return; } @@ -408,10 +427,16 @@ static void set_netdev(Object *obj, Visitor *v, const char *name, * TODO Should this really be an error? If no, the old value * needs to be released before we store the new one. */ - if (!check_prop_still_unset(obj, name, ncs[i], str, errp)) { + if (!check_prop_still_unset(obj, name, ncs[i], str, false, errp)) { goto out; } + if (peers[i]->info->check_peer_type) { + if (!peers[i]->info->check_peer_type(peers[i], obj->class, errp)) { + goto out; + } + } + ncs[i] = peers[i]; ncs[i]->queue_index = i; } diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 50f40949f52..c34aac6ebc9 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -26,11 +26,11 @@ void qdev_prop_set_after_realize(DeviceState *dev, const char *name, /* returns: true if property is allowed to be set, false otherwise */ static bool qdev_prop_allow_set(Object *obj, const char *name, - Error **errp) + const PropertyInfo *info, Error **errp) { DeviceState *dev = DEVICE(obj); - if (dev->realized) { + if (dev->realized && !info->realized_set_allowed) { qdev_prop_set_after_realize(dev, name, errp); return false; } @@ -79,7 +79,7 @@ static void field_prop_set(Object *obj, Visitor *v, const char *name, { Property *prop = opaque; - if (!qdev_prop_allow_set(obj, name, errp)) { + if (!qdev_prop_allow_set(obj, name, prop->info, errp)) { return; } diff --git a/hw/core/qdev.c b/hw/core/qdev.c index cefc5eaa0a9..84f3019440f 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -28,11 +28,11 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qapi-events-qdev.h" +#include "qapi/qmp/qdict.h" #include "qapi/qmp/qerror.h" #include "qapi/visitor.h" #include "qemu/error-report.h" #include "qemu/option.h" -#include "hw/hotplug.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/boards.h" @@ -211,14 +211,17 @@ void device_listener_unregister(DeviceListener *listener) QTAILQ_REMOVE(&device_listeners, listener, link); } -bool qdev_should_hide_device(QemuOpts *opts) +bool qdev_should_hide_device(const QDict *opts, bool from_json, Error **errp) { + ERRP_GUARD(); DeviceListener *listener; QTAILQ_FOREACH(listener, &device_listeners, link) { if (listener->hide_device) { - if (listener->hide_device(listener, opts)) { + if (listener->hide_device(listener, opts, from_json, errp)) { return true; + } else if (*errp) { + return false; } } } @@ -234,58 +237,6 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id, dev->alias_required_for_version = required_for_version; } -HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev) -{ - MachineState *machine; - MachineClass *mc; - Object *m_obj = qdev_get_machine(); - - if (object_dynamic_cast(m_obj, TYPE_MACHINE)) { - machine = MACHINE(m_obj); - mc = MACHINE_GET_CLASS(machine); - if (mc->get_hotplug_handler) { - return mc->get_hotplug_handler(machine, dev); - } - } - - return NULL; -} - -bool qdev_hotplug_allowed(DeviceState *dev, Error **errp) -{ - MachineState *machine; - MachineClass *mc; - Object *m_obj = qdev_get_machine(); - - if (object_dynamic_cast(m_obj, TYPE_MACHINE)) { - machine = MACHINE(m_obj); - mc = MACHINE_GET_CLASS(machine); - if (mc->hotplug_allowed) { - return mc->hotplug_allowed(machine, dev, errp); - } - } - - return true; -} - -HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev) -{ - if (dev->parent_bus) { - return dev->parent_bus->hotplug_handler; - } - return NULL; -} - -HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev) -{ - HotplugHandler *hotplug_ctrl = qdev_get_machine_hotplug_handler(dev); - - if (hotplug_ctrl == NULL && dev->parent_bus) { - hotplug_ctrl = qdev_get_bus_hotplug_handler(dev); - } - return hotplug_ctrl; -} - static int qdev_prereset(DeviceState *dev, void *opaque) { trace_qdev_reset_tree(dev, object_get_typename(OBJECT(dev))); @@ -367,13 +318,6 @@ static void device_reset_child_foreach(Object *obj, ResettableChildCallback cb, } } -/* can be used as ->unplug() callback for the simple cases */ -void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - qdev_unrealize(dev); -} - bool qdev_realize(DeviceState *dev, BusState *bus, Error **errp) { assert(!dev->realized && !dev->parent_bus); @@ -432,180 +376,6 @@ BusState *qdev_get_parent_bus(DeviceState *dev) return dev->parent_bus; } -static NamedGPIOList *qdev_get_named_gpio_list(DeviceState *dev, - const char *name) -{ - NamedGPIOList *ngl; - - QLIST_FOREACH(ngl, &dev->gpios, node) { - /* NULL is a valid and matchable name. */ - if (g_strcmp0(name, ngl->name) == 0) { - return ngl; - } - } - - ngl = g_malloc0(sizeof(*ngl)); - ngl->name = g_strdup(name); - QLIST_INSERT_HEAD(&dev->gpios, ngl, node); - return ngl; -} - -void qdev_init_gpio_in_named_with_opaque(DeviceState *dev, - qemu_irq_handler handler, - void *opaque, - const char *name, int n) -{ - int i; - NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name); - - assert(gpio_list->num_out == 0 || !name); - gpio_list->in = qemu_extend_irqs(gpio_list->in, gpio_list->num_in, handler, - opaque, n); - - if (!name) { - name = "unnamed-gpio-in"; - } - for (i = gpio_list->num_in; i < gpio_list->num_in + n; i++) { - gchar *propname = g_strdup_printf("%s[%u]", name, i); - - object_property_add_child(OBJECT(dev), propname, - OBJECT(gpio_list->in[i])); - g_free(propname); - } - - gpio_list->num_in += n; -} - -void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n) -{ - qdev_init_gpio_in_named(dev, handler, NULL, n); -} - -void qdev_init_gpio_out_named(DeviceState *dev, qemu_irq *pins, - const char *name, int n) -{ - int i; - NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name); - - assert(gpio_list->num_in == 0 || !name); - - if (!name) { - name = "unnamed-gpio-out"; - } - memset(pins, 0, sizeof(*pins) * n); - for (i = 0; i < n; ++i) { - gchar *propname = g_strdup_printf("%s[%u]", name, - gpio_list->num_out + i); - - object_property_add_link(OBJECT(dev), propname, TYPE_IRQ, - (Object **)&pins[i], - object_property_allow_set_link, - OBJ_PROP_LINK_STRONG); - g_free(propname); - } - gpio_list->num_out += n; -} - -void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n) -{ - qdev_init_gpio_out_named(dev, pins, NULL, n); -} - -qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n) -{ - NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name); - - assert(n >= 0 && n < gpio_list->num_in); - return gpio_list->in[n]; -} - -qemu_irq qdev_get_gpio_in(DeviceState *dev, int n) -{ - return qdev_get_gpio_in_named(dev, NULL, n); -} - -void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n, - qemu_irq pin) -{ - char *propname = g_strdup_printf("%s[%d]", - name ? name : "unnamed-gpio-out", n); - if (pin && !OBJECT(pin)->parent) { - /* We need a name for object_property_set_link to work */ - object_property_add_child(container_get(qdev_get_machine(), - "/unattached"), - "non-qdev-gpio[*]", OBJECT(pin)); - } - object_property_set_link(OBJECT(dev), propname, OBJECT(pin), &error_abort); - g_free(propname); -} - -qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n) -{ - g_autofree char *propname = g_strdup_printf("%s[%d]", - name ? name : "unnamed-gpio-out", n); - - qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname, - NULL); - - return ret; -} - -/* disconnect a GPIO output, returning the disconnected input (if any) */ - -static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, - const char *name, int n) -{ - char *propname = g_strdup_printf("%s[%d]", - name ? name : "unnamed-gpio-out", n); - - qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname, - NULL); - if (ret) { - object_property_set_link(OBJECT(dev), propname, NULL, NULL); - } - g_free(propname); - return ret; -} - -qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt, - const char *name, int n) -{ - qemu_irq disconnected = qdev_disconnect_gpio_out_named(dev, name, n); - qdev_connect_gpio_out_named(dev, name, n, icpt); - return disconnected; -} - -void qdev_connect_gpio_out(DeviceState * dev, int n, qemu_irq pin) -{ - qdev_connect_gpio_out_named(dev, NULL, n, pin); -} - -void qdev_pass_gpios(DeviceState *dev, DeviceState *container, - const char *name) -{ - int i; - NamedGPIOList *ngl = qdev_get_named_gpio_list(dev, name); - - for (i = 0; i < ngl->num_in; i++) { - const char *nm = ngl->name ? ngl->name : "unnamed-gpio-in"; - char *propname = g_strdup_printf("%s[%d]", nm, i); - - object_property_add_alias(OBJECT(container), propname, - OBJECT(dev), propname); - g_free(propname); - } - for (i = 0; i < ngl->num_out; i++) { - const char *nm = ngl->name ? ngl->name : "unnamed-gpio-out"; - char *propname = g_strdup_printf("%s[%d]", nm, i); - - object_property_add_alias(OBJECT(container), propname, - OBJECT(dev), propname); - g_free(propname); - } - QLIST_REMOVE(ngl, node); - QLIST_INSERT_HEAD(&container->gpios, ngl, node); -} - BusState *qdev_get_child_bus(DeviceState *dev, const char *name) { BusState *bus; @@ -955,7 +725,8 @@ static void device_finalize(Object *obj) dev->canonical_path = NULL; } - qemu_opts_del(dev->opts); + qobject_unref(dev->opts); + g_free(dev->id); } static void device_class_base_init(ObjectClass *class, void *data) diff --git a/hw/core/register.c b/hw/core/register.c index d6f8c208161..95b0150c0aa 100644 --- a/hw/core/register.c +++ b/hw/core/register.c @@ -300,6 +300,18 @@ RegisterInfoArray *register_init_block32(DeviceState *owner, data, ops, debug_enabled, memory_size, 32); } +RegisterInfoArray *register_init_block64(DeviceState *owner, + const RegisterAccessInfo *rae, + int num, RegisterInfo *ri, + uint64_t *data, + const MemoryRegionOps *ops, + bool debug_enabled, + uint64_t memory_size) +{ + return register_init_block(owner, rae, num, ri, (void *) + data, ops, debug_enabled, memory_size, 64); +} + void register_finalize_block(RegisterInfoArray *r_array) { object_unparent(OBJECT(&r_array->mem)); diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index aaae8e23cc4..05c1da3d311 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -340,11 +340,13 @@ static BusState *main_system_bus; static void main_system_bus_create(void) { - /* assign main_system_bus before qbus_create_inplace() - * in order to make "if (bus != sysbus_get_default())" work */ + /* + * assign main_system_bus before qbus_init() + * in order to make "if (bus != sysbus_get_default())" work + */ main_system_bus = g_malloc0(system_bus_info.instance_size); - qbus_create_inplace(main_system_bus, system_bus_info.instance_size, - TYPE_SYSTEM_BUS, NULL, "main-system-bus"); + qbus_init(main_system_bus, system_bus_info.instance_size, + TYPE_SYSTEM_BUS, NULL, "main-system-bus"); OBJECT(main_system_bus)->free = g_free; } diff --git a/hw/core/trace-events b/hw/core/trace-events index 360ddeb2c87..9b3ecce3b2f 100644 --- a/hw/core/trace-events +++ b/hw/core/trace-events @@ -34,3 +34,4 @@ clock_disconnect(const char *clk) "'%s'" clock_set(const char *clk, uint64_t old, uint64_t new) "'%s', %"PRIu64"Hz->%"PRIu64"Hz" clock_propagate(const char *clk) "'%s'" clock_update(const char *clk, const char *src, uint64_t hz, int cb) "'%s', src='%s', val=%"PRIu64"Hz cb=%d" +clock_set_mul_div(const char *clk, uint32_t oldmul, uint32_t mul, uint32_t olddiv, uint32_t div) "'%s', mul: %u -> %u, div: %u -> %u" diff --git a/hw/cris/axis_dev88.c b/hw/cris/axis_dev88.c index af5a0e35173..d82050d927d 100644 --- a/hw/cris/axis_dev88.c +++ b/hw/cris/axis_dev88.c @@ -34,7 +34,6 @@ #include "hw/loader.h" #include "elf.h" #include "boot.h" -#include "exec/address-spaces.h" #include "sysemu/qtest.h" #include "sysemu/sysemu.h" diff --git a/hw/display/Kconfig b/hw/display/Kconfig index ca46b5830e7..a2306b67d87 100644 --- a/hw/display/Kconfig +++ b/hw/display/Kconfig @@ -72,10 +72,6 @@ config BLIZZARD config FRAMEBUFFER bool -config MILKYMIST_TMU2 - bool - depends on OPENGL && X11 - config SM501 bool select I2C diff --git a/hw/display/artist.c b/hw/display/artist.c index aa7bd594aac..21b7fd1b440 100644 --- a/hw/display/artist.c +++ b/hw/display/artist.c @@ -1170,8 +1170,8 @@ static void artist_vram_write(void *opaque, hwaddr addr, uint64_t val, } buf = vram_write_buffer(s); - posy = ADDR_TO_Y(addr); - posx = ADDR_TO_X(addr); + posy = ADDR_TO_Y(addr >> 2); + posx = ADDR_TO_X(addr >> 2); if (!buf->size) { return; @@ -1232,8 +1232,8 @@ static uint64_t artist_vram_read(void *opaque, hwaddr addr, unsigned size) return 0; } - posy = ADDR_TO_Y(addr); - posx = ADDR_TO_X(addr); + posy = ADDR_TO_Y(addr >> 2); + posx = ADDR_TO_X(addr >> 2); if (posy > buf->height || posx > buf->width) { return 0; diff --git a/hw/display/ati.c b/hw/display/ati.c index 4c3ad8f47b0..31f22754dce 100644 --- a/hw/display/ati.c +++ b/hw/display/ati.c @@ -968,7 +968,7 @@ static void ati_vga_realize(PCIDevice *dev, Error **errp) I2CBus *i2cbus = i2c_init_bus(DEVICE(s), "ati-vga.ddc"); bitbang_i2c_init(&s->bbi2c, i2cbus); I2CSlave *i2cddc = I2C_SLAVE(qdev_new(TYPE_I2CDDC)); - i2c_set_slave_address(i2cddc, 0x50); + i2c_slave_set_address(i2cddc, 0x50); qdev_realize_and_unref(DEVICE(i2cddc), BUS(i2cbus), &error_abort); /* mmio register space */ diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c index a1bea9a3aa3..f2b874d5e35 100644 --- a/hw/display/edid-generate.c +++ b/hw/display/edid-generate.c @@ -45,6 +45,35 @@ static const struct edid_mode { { .xres = 640, .yres = 480, .byte = 35, .bit = 5 }, }; +typedef struct Timings { + uint32_t xfront; + uint32_t xsync; + uint32_t xblank; + + uint32_t yfront; + uint32_t ysync; + uint32_t yblank; + + uint64_t clock; +} Timings; + +static void generate_timings(Timings *timings, uint32_t refresh_rate, + uint32_t xres, uint32_t yres) +{ + /* pull some realistic looking timings out of thin air */ + timings->xfront = xres * 25 / 100; + timings->xsync = xres * 3 / 100; + timings->xblank = xres * 35 / 100; + + timings->yfront = yres * 5 / 1000; + timings->ysync = yres * 5 / 1000; + timings->yblank = yres * 35 / 1000; + + timings->clock = ((uint64_t)refresh_rate * + (xres + timings->xblank) * + (yres + timings->yblank)) / 10000000; +} + static void edid_ext_dta(uint8_t *dta) { dta[0] = 0x02; @@ -130,20 +159,39 @@ static void edid_fill_modes(uint8_t *edid, uint8_t *xtra3, uint8_t *dta, } } -static void edid_checksum(uint8_t *edid) +static void edid_checksum(uint8_t *edid, size_t len) { uint32_t sum = 0; int i; - for (i = 0; i < 127; i++) { + for (i = 0; i < len; i++) { sum += edid[i]; } sum &= 0xff; if (sum) { - edid[127] = 0x100 - sum; + edid[len] = 0x100 - sum; } } +static uint8_t *edid_desc_next(uint8_t *edid, uint8_t *dta, uint8_t *desc) +{ + if (desc == NULL) { + return NULL; + } + if (desc + 18 + 18 < edid + 127) { + return desc + 18; + } + if (dta) { + if (desc < edid + 127) { + return dta + dta[2]; + } + if (desc + 18 + 18 < dta + 127) { + return desc + 18; + } + } + return NULL; +} + static void edid_desc_type(uint8_t *desc, uint8_t type) { desc[0] = 0; @@ -181,8 +229,8 @@ static void edid_desc_ranges(uint8_t *desc) desc[7] = 30; desc[8] = 160; - /* max dot clock (1200 MHz) */ - desc[9] = 1200 / 10; + /* max dot clock (2550 MHz) */ + desc[9] = 2550 / 10; /* no extended timing information */ desc[10] = 0x01; @@ -204,42 +252,33 @@ static void edid_desc_dummy(uint8_t *desc) edid_desc_type(desc, 0x10); } -static void edid_desc_timing(uint8_t *desc, +static void edid_desc_timing(uint8_t *desc, uint32_t refresh_rate, uint32_t xres, uint32_t yres, uint32_t xmm, uint32_t ymm) { - /* pull some realistic looking timings out of thin air */ - uint32_t xfront = xres * 25 / 100; - uint32_t xsync = xres * 3 / 100; - uint32_t xblank = xres * 35 / 100; - - uint32_t yfront = yres * 5 / 1000; - uint32_t ysync = yres * 5 / 1000; - uint32_t yblank = yres * 35 / 1000; - - uint32_t clock = 75 * (xres + xblank) * (yres + yblank); - - stl_le_p(desc, clock / 10000); + Timings timings; + generate_timings(&timings, refresh_rate, xres, yres); + stl_le_p(desc, timings.clock); desc[2] = xres & 0xff; - desc[3] = xblank & 0xff; + desc[3] = timings.xblank & 0xff; desc[4] = (((xres & 0xf00) >> 4) | - ((xblank & 0xf00) >> 8)); + ((timings.xblank & 0xf00) >> 8)); desc[5] = yres & 0xff; - desc[6] = yblank & 0xff; + desc[6] = timings.yblank & 0xff; desc[7] = (((yres & 0xf00) >> 4) | - ((yblank & 0xf00) >> 8)); + ((timings.yblank & 0xf00) >> 8)); - desc[8] = xfront & 0xff; - desc[9] = xsync & 0xff; + desc[8] = timings.xfront & 0xff; + desc[9] = timings.xsync & 0xff; - desc[10] = (((yfront & 0x00f) << 4) | - ((ysync & 0x00f) << 0)); - desc[11] = (((xfront & 0x300) >> 2) | - ((xsync & 0x300) >> 4) | - ((yfront & 0x030) >> 2) | - ((ysync & 0x030) >> 4)); + desc[10] = (((timings.yfront & 0x00f) << 4) | + ((timings.ysync & 0x00f) << 0)); + desc[11] = (((timings.xfront & 0x300) >> 2) | + ((timings.xsync & 0x300) >> 4) | + ((timings.yfront & 0x030) >> 2) | + ((timings.ysync & 0x030) >> 4)); desc[12] = xmm & 0xff; desc[13] = ymm & 0xff; @@ -297,14 +336,61 @@ uint32_t qemu_edid_dpi_to_mm(uint32_t dpi, uint32_t res) return res * 254 / 10 / dpi; } +static void init_displayid(uint8_t *did) +{ + did[0] = 0x70; /* display id extension */ + did[1] = 0x13; /* version 1.3 */ + did[2] = 4; /* length */ + did[3] = 0x03; /* product type (0x03 == standalone display device) */ + edid_checksum(did + 1, did[2] + 4); +} + +static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate, + uint32_t xres, uint32_t yres, + uint32_t xmm, uint32_t ymm) +{ + Timings timings; + generate_timings(&timings, refresh_rate, xres, yres); + + did[0] = 0x70; /* display id extension */ + did[1] = 0x13; /* version 1.3 */ + did[2] = 23; /* length */ + did[3] = 0x03; /* product type (0x03 == standalone display device) */ + + did[5] = 0x03; /* Detailed Timings Data Block */ + did[6] = 0x00; /* revision */ + did[7] = 0x14; /* block length */ + + did[8] = timings.clock & 0xff; + did[9] = (timings.clock & 0xff00) >> 8; + did[10] = (timings.clock & 0xff0000) >> 16; + + did[11] = 0x88; /* leave aspect ratio undefined */ + + stw_le_p(did + 12, 0xffff & (xres - 1)); + stw_le_p(did + 14, 0xffff & (timings.xblank - 1)); + stw_le_p(did + 16, 0xffff & (timings.xfront - 1)); + stw_le_p(did + 18, 0xffff & (timings.xsync - 1)); + + stw_le_p(did + 20, 0xffff & (yres - 1)); + stw_le_p(did + 22, 0xffff & (timings.yblank - 1)); + stw_le_p(did + 24, 0xffff & (timings.yfront - 1)); + stw_le_p(did + 26, 0xffff & (timings.ysync - 1)); + + edid_checksum(did + 1, did[2] + 4); +} + void qemu_edid_generate(uint8_t *edid, size_t size, qemu_edid_info *info) { - uint32_t desc = 54; + uint8_t *desc = edid + 54; uint8_t *xtra3 = NULL; uint8_t *dta = NULL; + uint8_t *did = NULL; uint32_t width_mm, height_mm; + uint32_t refresh_rate = info->refresh_rate ? info->refresh_rate : 75000; uint32_t dpi = 100; /* if no width_mm/height_mm */ + uint32_t large_screen = 0; /* =============== set defaults =============== */ @@ -320,6 +406,9 @@ void qemu_edid_generate(uint8_t *edid, size_t size, if (!info->prefy) { info->prefy = 768; } + if (info->prefx >= 4096 || info->prefy >= 4096) { + large_screen = 1; + } if (info->width_mm && info->height_mm) { width_mm = info->width_mm; height_mm = info->height_mm; @@ -337,6 +426,12 @@ void qemu_edid_generate(uint8_t *edid, size_t size, edid_ext_dta(dta); } + if (size >= 384 && large_screen) { + did = edid + 256; + edid[126]++; + init_displayid(did); + } + /* =============== header information =============== */ /* fixed */ @@ -401,40 +496,55 @@ void qemu_edid_generate(uint8_t *edid, size_t size, /* =============== descriptor blocks =============== */ - edid_desc_timing(edid + desc, info->prefx, info->prefy, - width_mm, height_mm); - desc += 18; + if (!large_screen) { + /* The DTD section has only 12 bits to store the resolution */ + edid_desc_timing(desc, refresh_rate, info->prefx, info->prefy, + width_mm, height_mm); + desc = edid_desc_next(edid, dta, desc); + } - edid_desc_ranges(edid + desc); - desc += 18; + xtra3 = desc; + edid_desc_xtra3_std(xtra3); + desc = edid_desc_next(edid, dta, desc); + edid_fill_modes(edid, xtra3, dta, info->maxx, info->maxy); + /* + * dta video data block is finished at thus point, + * so dta descriptor offsets don't move any more. + */ + + edid_desc_ranges(desc); + desc = edid_desc_next(edid, dta, desc); - if (info->name) { - edid_desc_text(edid + desc, 0xfc, info->name); - desc += 18; + if (desc && info->name) { + edid_desc_text(desc, 0xfc, info->name); + desc = edid_desc_next(edid, dta, desc); } - if (info->serial) { - edid_desc_text(edid + desc, 0xff, info->serial); - desc += 18; + if (desc && info->serial) { + edid_desc_text(desc, 0xff, info->serial); + desc = edid_desc_next(edid, dta, desc); } - if (desc < 126) { - xtra3 = edid + desc; - edid_desc_xtra3_std(xtra3); - desc += 18; + while (desc) { + edid_desc_dummy(desc); + desc = edid_desc_next(edid, dta, desc); } - while (desc < 126) { - edid_desc_dummy(edid + desc); - desc += 18; + /* =============== display id extensions =============== */ + + if (did && large_screen) { + qemu_displayid_generate(did, refresh_rate, info->prefx, info->prefy, + width_mm, height_mm); } /* =============== finish up =============== */ - edid_fill_modes(edid, xtra3, dta, info->maxx, info->maxy); - edid_checksum(edid); + edid_checksum(edid, 127); if (dta) { - edid_checksum(dta); + edid_checksum(dta, 127); + } + if (did) { + edid_checksum(did, 127); } } diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c index 8f1725432cd..caca86d7738 100644 --- a/hw/display/g364fb.c +++ b/hw/display/g364fb.c @@ -22,6 +22,7 @@ #include "hw/hw.h" #include "hw/irq.h" #include "hw/qdev-properties.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "ui/console.h" @@ -33,7 +34,6 @@ typedef struct G364State { /* hardware */ - uint8_t *vram; uint32_t vram_size; qemu_irq irq; MemoryRegion mem_vram; @@ -125,7 +125,7 @@ static void g364fb_draw_graphic8(G364State *s) xcursor = ycursor = -65; } - vram = s->vram + s->top_of_screen; + vram = memory_region_get_ram_ptr(&s->mem_vram) + s->top_of_screen; /* XXX: out of range in vram? */ data_display = dd = surface_data(surface); snap = memory_region_snapshot_and_clear_dirty(&s->mem_vram, 0, s->vram_size, @@ -274,6 +274,8 @@ static inline void g364fb_invalidate_display(void *opaque) static void g364fb_reset(G364State *s) { + uint8_t *vram = memory_region_get_ram_ptr(&s->mem_vram); + qemu_irq_lower(s->irq); memset(s->color_palette, 0, sizeof(s->color_palette)); @@ -283,7 +285,7 @@ static void g364fb_reset(G364State *s) s->ctla = 0; s->top_of_screen = 0; s->width = s->height = 0; - memset(s->vram, 0, s->vram_size); + memset(vram, 0, s->vram_size); g364fb_invalidate_display(s); } @@ -450,11 +452,10 @@ static int g364fb_post_load(void *opaque, int version_id) static const VMStateDescription vmstate_g364fb = { .name = "g364fb", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .post_load = g364fb_post_load, .fields = (VMStateField[]) { - VMSTATE_VBUFFER_UINT32(vram, G364State, 1, NULL, vram_size), VMSTATE_BUFFER_UNSAFE(color_palette, G364State, 0, 256 * 3), VMSTATE_BUFFER_UNSAFE(cursor_palette, G364State, 0, 9), VMSTATE_UINT16_ARRAY(cursor, G364State, 512), @@ -474,15 +475,12 @@ static const GraphicHwOps g364fb_ops = { static void g364fb_init(DeviceState *dev, G364State *s) { - s->vram = g_malloc0(s->vram_size); - s->con = graphic_console_init(dev, 0, &g364fb_ops, s); memory_region_init_io(&s->mem_ctrl, OBJECT(dev), &g364fb_ctrl_ops, s, "ctrl", 0x180000); - memory_region_init_ram_ptr(&s->mem_vram, NULL, "vram", - s->vram_size, s->vram); - vmstate_register_ram(&s->mem_vram, dev); + memory_region_init_ram(&s->mem_vram, NULL, "g364fb.vram", s->vram_size, + &error_fatal); memory_region_set_log(&s->mem_vram, true, DIRTY_MEMORY_VGA); } @@ -519,6 +517,16 @@ static Property g364fb_sysbus_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static const VMStateDescription vmstate_g364fb_sysbus = { + .name = "g364fb-sysbus", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(g364, G364SysBusState, 2, vmstate_g364fb, G364State), + VMSTATE_END_OF_LIST() + } +}; + static void g364fb_sysbus_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -527,7 +535,7 @@ static void g364fb_sysbus_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); dc->desc = "G364 framebuffer"; dc->reset = g364fb_sysbus_reset; - dc->vmsd = &vmstate_g364fb; + dc->vmsd = &vmstate_g364fb_sysbus; device_class_set_props(dc, g364fb_sysbus_properties); } diff --git a/hw/display/macfb.c b/hw/display/macfb.c index ff8bdb846b9..277d3e66333 100644 --- a/hw/display/macfb.c +++ b/hw/display/macfb.c @@ -20,16 +20,102 @@ #include "qapi/error.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" +#include "trace.h" -#define VIDEO_BASE 0x00001000 +#define VIDEO_BASE 0x0 #define DAFB_BASE 0x00800000 #define MACFB_PAGE_SIZE 4096 #define MACFB_VRAM_SIZE (4 * MiB) -#define DAFB_RESET 0x200 -#define DAFB_LUT 0x213 +#define DAFB_MODE_VADDR1 0x0 +#define DAFB_MODE_VADDR2 0x4 +#define DAFB_MODE_CTRL1 0x8 +#define DAFB_MODE_CTRL2 0xc +#define DAFB_MODE_SENSE 0x1c +#define DAFB_INTR_MASK 0x104 +#define DAFB_INTR_STAT 0x108 +#define DAFB_INTR_CLEAR 0x10c +#define DAFB_RESET 0x200 +#define DAFB_LUT 0x213 +#define DAFB_INTR_VBL 0x4 + +/* Vertical Blank period (60.15Hz) */ +#define DAFB_INTR_VBL_PERIOD_NS 16625800 + +/* + * Quadra sense codes taken from Apple Technical Note HW26: + * "Macintosh Quadra Built-In Video". The sense codes and + * extended sense codes have different meanings: + * + * Sense: + * bit 2: SENSE2 (pin 10) + * bit 1: SENSE1 (pin 7) + * bit 0: SENSE0 (pin 4) + * + * 0 = pin tied to ground + * 1 = pin unconnected + * + * Extended Sense: + * bit 2: pins 4-10 + * bit 1: pins 10-7 + * bit 0: pins 7-4 + * + * 0 = pins tied together + * 1 = pins unconnected + * + * Reads from the sense register appear to be active low, i.e. a 1 indicates + * that the pin is tied to ground, a 0 indicates the pin is disconnected. + * + * Writes to the sense register appear to activate pulldowns i.e. a 1 enables + * a pulldown on a particular pin. + * + * The MacOS toolbox appears to use a series of reads and writes to first + * determine if extended sense is to be used, and then check which pins are + * tied together in order to determine the display type. + */ + +typedef struct MacFbSense { + uint8_t type; + uint8_t sense; + uint8_t ext_sense; +} MacFbSense; + +static MacFbSense macfb_sense_table[] = { + { MACFB_DISPLAY_APPLE_21_COLOR, 0x0, 0 }, + { MACFB_DISPLAY_APPLE_PORTRAIT, 0x1, 0 }, + { MACFB_DISPLAY_APPLE_12_RGB, 0x2, 0 }, + { MACFB_DISPLAY_APPLE_2PAGE_MONO, 0x3, 0 }, + { MACFB_DISPLAY_NTSC_UNDERSCAN, 0x4, 0 }, + { MACFB_DISPLAY_NTSC_OVERSCAN, 0x4, 0 }, + { MACFB_DISPLAY_APPLE_12_MONO, 0x6, 0 }, + { MACFB_DISPLAY_APPLE_13_RGB, 0x6, 0 }, + { MACFB_DISPLAY_16_COLOR, 0x7, 0x3 }, + { MACFB_DISPLAY_PAL1_UNDERSCAN, 0x7, 0x0 }, + { MACFB_DISPLAY_PAL1_OVERSCAN, 0x7, 0x0 }, + { MACFB_DISPLAY_PAL2_UNDERSCAN, 0x7, 0x6 }, + { MACFB_DISPLAY_PAL2_OVERSCAN, 0x7, 0x6 }, + { MACFB_DISPLAY_VGA, 0x7, 0x5 }, + { MACFB_DISPLAY_SVGA, 0x7, 0x5 }, +}; + +static MacFbMode macfb_mode_table[] = { + { MACFB_DISPLAY_VGA, 1, 0x100, 0x71e, 640, 480, 0x400, 0x1000 }, + { MACFB_DISPLAY_VGA, 2, 0x100, 0x70e, 640, 480, 0x400, 0x1000 }, + { MACFB_DISPLAY_VGA, 4, 0x100, 0x706, 640, 480, 0x400, 0x1000 }, + { MACFB_DISPLAY_VGA, 8, 0x100, 0x702, 640, 480, 0x400, 0x1000 }, + { MACFB_DISPLAY_VGA, 24, 0x100, 0x7ff, 640, 480, 0x1000, 0x1000 }, + { MACFB_DISPLAY_VGA, 1, 0xd0 , 0x70e, 800, 600, 0x340, 0xe00 }, + { MACFB_DISPLAY_VGA, 2, 0xd0 , 0x706, 800, 600, 0x340, 0xe00 }, + { MACFB_DISPLAY_VGA, 4, 0xd0 , 0x702, 800, 600, 0x340, 0xe00 }, + { MACFB_DISPLAY_VGA, 8, 0xd0, 0x700, 800, 600, 0x340, 0xe00 }, + { MACFB_DISPLAY_VGA, 24, 0x340, 0x100, 800, 600, 0xd00, 0xe00 }, + { MACFB_DISPLAY_APPLE_21_COLOR, 1, 0x90, 0x506, 1152, 870, 0x240, 0x80 }, + { MACFB_DISPLAY_APPLE_21_COLOR, 2, 0x90, 0x502, 1152, 870, 0x240, 0x80 }, + { MACFB_DISPLAY_APPLE_21_COLOR, 4, 0x90, 0x500, 1152, 870, 0x240, 0x80 }, + { MACFB_DISPLAY_APPLE_21_COLOR, 8, 0x120, 0x5ff, 1152, 870, 0x480, 0x80 }, +}; typedef void macfb_draw_line_func(MacfbState *s, uint8_t *d, uint32_t addr, int width); @@ -49,7 +135,9 @@ static void macfb_draw_line1(MacfbState *s, uint8_t *d, uint32_t addr, for (x = 0; x < width; x++) { int bit = x & 7; int idx = (macfb_read_byte(s, addr) >> (7 - bit)) & 1; - r = g = b = ((1 - idx) << 7); + r = s->color_palette[idx * 3]; + g = s->color_palette[idx * 3 + 1]; + b = s->color_palette[idx * 3 + 2]; addr += (bit == 7); *(uint32_t *)d = rgb_to_pixel32(r, g, b); @@ -143,10 +231,10 @@ static void macfb_draw_line24(MacfbState *s, uint8_t *d, uint32_t addr, int x; for (x = 0; x < width; x++) { - r = macfb_read_byte(s, addr); - g = macfb_read_byte(s, addr + 1); - b = macfb_read_byte(s, addr + 2); - addr += 3; + r = macfb_read_byte(s, addr + 1); + g = macfb_read_byte(s, addr + 2); + b = macfb_read_byte(s, addr + 3); + addr += 4; *(uint32_t *)d = rgb_to_pixel32(r, g, b); d += 4; @@ -187,7 +275,7 @@ static void macfb_draw_graphic(MacfbState *s) ram_addr_t page; uint32_t v = 0; int y, ymin; - int macfb_stride = (s->depth * s->width + 7) / 8; + int macfb_stride = s->mode->stride; macfb_draw_line_func *macfb_draw_line; switch (s->depth) { @@ -219,7 +307,7 @@ static void macfb_draw_graphic(MacfbState *s) DIRTY_MEMORY_VGA); ymin = -1; - page = 0; + page = s->mode->offset; for (y = 0; y < s->height; y++, page += macfb_stride) { if (macfb_check_dirty(s, snap, page, macfb_stride)) { uint8_t *data_display; @@ -252,6 +340,121 @@ static void macfb_invalidate_display(void *opaque) memory_region_set_dirty(&s->mem_vram, 0, MACFB_VRAM_SIZE); } +static uint32_t macfb_sense_read(MacfbState *s) +{ + MacFbSense *macfb_sense; + uint8_t sense; + + assert(s->type < ARRAY_SIZE(macfb_sense_table)); + macfb_sense = &macfb_sense_table[s->type]; + if (macfb_sense->sense == 0x7) { + /* Extended sense */ + sense = 0; + if (!(macfb_sense->ext_sense & 1)) { + /* Pins 7-4 together */ + if (~s->regs[DAFB_MODE_SENSE >> 2] & 3) { + sense = (~s->regs[DAFB_MODE_SENSE >> 2] & 7) | 3; + } + } + if (!(macfb_sense->ext_sense & 2)) { + /* Pins 10-7 together */ + if (~s->regs[DAFB_MODE_SENSE >> 2] & 6) { + sense = (~s->regs[DAFB_MODE_SENSE >> 2] & 7) | 6; + } + } + if (!(macfb_sense->ext_sense & 4)) { + /* Pins 4-10 together */ + if (~s->regs[DAFB_MODE_SENSE >> 2] & 5) { + sense = (~s->regs[DAFB_MODE_SENSE >> 2] & 7) | 5; + } + } + } else { + /* Normal sense */ + sense = (~macfb_sense->sense & 7) | + (~s->regs[DAFB_MODE_SENSE >> 2] & 7); + } + + trace_macfb_sense_read(sense); + return sense; +} + +static void macfb_sense_write(MacfbState *s, uint32_t val) +{ + s->regs[DAFB_MODE_SENSE >> 2] = val; + + trace_macfb_sense_write(val); + return; +} + +static void macfb_update_mode(MacfbState *s) +{ + s->width = s->mode->width; + s->height = s->mode->height; + s->depth = s->mode->depth; + + trace_macfb_update_mode(s->width, s->height, s->depth); + macfb_invalidate_display(s); +} + +static void macfb_mode_write(MacfbState *s) +{ + MacFbMode *macfb_mode; + int i; + + for (i = 0; i < ARRAY_SIZE(macfb_mode_table); i++) { + macfb_mode = &macfb_mode_table[i]; + + if (s->type != macfb_mode->type) { + continue; + } + + if ((s->regs[DAFB_MODE_CTRL1 >> 2] & 0xff) == + (macfb_mode->mode_ctrl1 & 0xff) && + (s->regs[DAFB_MODE_CTRL2 >> 2] & 0xff) == + (macfb_mode->mode_ctrl2 & 0xff)) { + s->mode = macfb_mode; + macfb_update_mode(s); + break; + } + } +} + +static MacFbMode *macfb_find_mode(MacfbDisplayType display_type, + uint16_t width, uint16_t height, + uint8_t depth) +{ + MacFbMode *macfb_mode; + int i; + + for (i = 0; i < ARRAY_SIZE(macfb_mode_table); i++) { + macfb_mode = &macfb_mode_table[i]; + + if (display_type == macfb_mode->type && width == macfb_mode->width && + height == macfb_mode->height && depth == macfb_mode->depth) { + return macfb_mode; + } + } + + return NULL; +} + +static gchar *macfb_mode_list(void) +{ + GString *list = g_string_new(""); + MacFbMode *macfb_mode; + int i; + + for (i = 0; i < ARRAY_SIZE(macfb_mode_table); i++) { + macfb_mode = &macfb_mode_table[i]; + + g_string_append_printf(list, " %dx%dx%d\n", macfb_mode->width, + macfb_mode->height, macfb_mode->depth); + } + + return g_string_free(list, FALSE); +} + + static void macfb_update_display(void *opaque) { MacfbState *s = opaque; @@ -271,6 +474,36 @@ static void macfb_update_display(void *opaque) macfb_draw_graphic(s); } +static void macfb_update_irq(MacfbState *s) +{ + uint32_t irq_state = s->irq_state & s->irq_mask; + + if (irq_state) { + qemu_irq_raise(s->irq); + } else { + qemu_irq_lower(s->irq); + } +} + +static int64_t macfb_next_vbl(void) +{ + return (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + DAFB_INTR_VBL_PERIOD_NS) / + DAFB_INTR_VBL_PERIOD_NS * DAFB_INTR_VBL_PERIOD_NS; +} + +static void macfb_vbl_timer(void *opaque) +{ + MacfbState *s = opaque; + int64_t next_vbl; + + s->irq_state |= DAFB_INTR_VBL; + macfb_update_irq(s); + + /* 60 Hz irq */ + next_vbl = macfb_next_vbl(); + timer_mod(s->vbl_timer, next_vbl); +} + static void macfb_reset(MacfbState *s) { int i; @@ -289,7 +522,26 @@ static uint64_t macfb_ctrl_read(void *opaque, hwaddr addr, unsigned int size) { - return 0; + MacfbState *s = opaque; + uint64_t val = 0; + + switch (addr) { + case DAFB_MODE_VADDR1: + case DAFB_MODE_VADDR2: + case DAFB_MODE_CTRL1: + case DAFB_MODE_CTRL2: + val = s->regs[addr >> 2]; + break; + case DAFB_INTR_STAT: + val = s->irq_state; + break; + case DAFB_MODE_SENSE: + val = macfb_sense_read(s); + break; + } + + trace_macfb_ctrl_read(addr, val, size); + return val; } static void macfb_ctrl_write(void *opaque, @@ -298,17 +550,52 @@ static void macfb_ctrl_write(void *opaque, unsigned int size) { MacfbState *s = opaque; + int64_t next_vbl; + switch (addr) { + case DAFB_MODE_VADDR1: + case DAFB_MODE_VADDR2: + s->regs[addr >> 2] = val; + break; + case DAFB_MODE_CTRL1 ... DAFB_MODE_CTRL1 + 3: + case DAFB_MODE_CTRL2 ... DAFB_MODE_CTRL2 + 3: + s->regs[addr >> 2] = val; + if (val) { + macfb_mode_write(s); + } + break; + case DAFB_MODE_SENSE: + macfb_sense_write(s, val); + break; + case DAFB_INTR_MASK: + s->irq_mask = val; + if (val & DAFB_INTR_VBL) { + next_vbl = macfb_next_vbl(); + timer_mod(s->vbl_timer, next_vbl); + } else { + timer_del(s->vbl_timer); + } + break; + case DAFB_INTR_CLEAR: + s->irq_state &= ~DAFB_INTR_VBL; + macfb_update_irq(s); + break; case DAFB_RESET: s->palette_current = 0; + s->irq_state &= ~DAFB_INTR_VBL; + macfb_update_irq(s); break; case DAFB_LUT: - s->color_palette[s->palette_current++] = val; + s->color_palette[s->palette_current] = val; + s->palette_current = (s->palette_current + 1) % + ARRAY_SIZE(s->color_palette); if (s->palette_current % 3) { macfb_invalidate_display(s); } break; } + + trace_macfb_ctrl_write(addr, val, size); } static const MemoryRegionOps macfb_ctrl_ops = { @@ -321,7 +608,7 @@ static const MemoryRegionOps macfb_ctrl_ops = { static int macfb_post_load(void *opaque, int version_id) { - macfb_invalidate_display(opaque); + macfb_mode_write(opaque); return 0; } @@ -334,6 +621,7 @@ static const VMStateDescription vmstate_macfb = { .fields = (VMStateField[]) { VMSTATE_UINT8_ARRAY(color_palette, MacfbState, 256 * 3), VMSTATE_UINT32(palette_current, MacfbState), + VMSTATE_UINT32_ARRAY(regs, MacfbState, MACFB_NUM_REGS), VMSTATE_END_OF_LIST() } }; @@ -343,14 +631,20 @@ static const GraphicHwOps macfb_ops = { .gfx_update = macfb_update_display, }; -static void macfb_common_realize(DeviceState *dev, MacfbState *s, Error **errp) +static bool macfb_common_realize(DeviceState *dev, MacfbState *s, Error **errp) { DisplaySurface *surface; - if (s->depth != 1 && s->depth != 2 && s->depth != 4 && s->depth != 8 && - s->depth != 16 && s->depth != 24) { - error_setg(errp, "unknown guest depth %d", s->depth); - return; + s->mode = macfb_find_mode(s->type, s->width, s->height, s->depth); + if (!s->mode) { + gchar *list; + error_setg(errp, "unknown display mode: width %d, height %d, depth %d", + s->width, s->height, s->depth); + list = macfb_mode_list(); + error_append_hint(errp, "Available modes:\n%s", list); + g_free(list); + + return false; } s->con = graphic_console_init(dev, 0, &macfb_ops, s); @@ -359,18 +653,21 @@ static void macfb_common_realize(DeviceState *dev, MacfbState *s, Error **errp) if (surface_bits_per_pixel(surface) != 32) { error_setg(errp, "unknown host depth %d", surface_bits_per_pixel(surface)); - return; + return false; } memory_region_init_io(&s->mem_ctrl, OBJECT(dev), &macfb_ctrl_ops, s, "macfb-ctrl", 0x1000); - memory_region_init_ram_nomigrate(&s->mem_vram, OBJECT(s), "macfb-vram", - MACFB_VRAM_SIZE, errp); + memory_region_init_ram(&s->mem_vram, OBJECT(dev), "macfb-vram", + MACFB_VRAM_SIZE, &error_abort); s->vram = memory_region_get_ram_ptr(&s->mem_vram); s->vram_bit_mask = MACFB_VRAM_SIZE - 1; - vmstate_register_ram(&s->mem_vram, dev); memory_region_set_coalescing(&s->mem_vram); + + s->vbl_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, macfb_vbl_timer, s); + macfb_update_mode(s); + return true; } static void macfb_sysbus_realize(DeviceState *dev, Error **errp) @@ -378,14 +675,23 @@ static void macfb_sysbus_realize(DeviceState *dev, Error **errp) MacfbSysBusState *s = MACFB(dev); MacfbState *ms = &s->macfb; - macfb_common_realize(dev, ms, errp); + if (!macfb_common_realize(dev, ms, errp)) { + return; + } + sysbus_init_mmio(SYS_BUS_DEVICE(s), &ms->mem_ctrl); sysbus_init_mmio(SYS_BUS_DEVICE(s), &ms->mem_vram); + + qdev_init_gpio_out(dev, &ms->irq, 1); } -const uint8_t macfb_rom[] = { - 255, 0, 0, 0, -}; +static void macfb_nubus_set_irq(void *opaque, int n, int level) +{ + MacfbNubusState *s = NUBUS_MACFB(opaque); + NubusDevice *nd = NUBUS_DEVICE(s); + + nubus_set_irq(nd, level); +} static void macfb_nubus_realize(DeviceState *dev, Error **errp) { @@ -395,12 +701,29 @@ static void macfb_nubus_realize(DeviceState *dev, Error **errp) MacfbState *ms = &s->macfb; ndc->parent_realize(dev, errp); + if (*errp) { + return; + } + + if (!macfb_common_realize(dev, ms, errp)) { + return; + } - macfb_common_realize(dev, ms, errp); memory_region_add_subregion(&nd->slot_mem, DAFB_BASE, &ms->mem_ctrl); memory_region_add_subregion(&nd->slot_mem, VIDEO_BASE, &ms->mem_vram); - nubus_register_rom(nd, macfb_rom, sizeof(macfb_rom), 1, 9, 0xf); + ms->irq = qemu_allocate_irq(macfb_nubus_set_irq, s, 0); +} + +static void macfb_nubus_unrealize(DeviceState *dev) +{ + MacfbNubusState *s = NUBUS_MACFB(dev); + MacfbNubusDeviceClass *ndc = NUBUS_MACFB_GET_CLASS(dev); + MacfbState *ms = &s->macfb; + + ndc->parent_unrealize(dev); + + qemu_free_irq(ms->irq); } static void macfb_sysbus_reset(DeviceState *d) @@ -419,6 +742,8 @@ static Property macfb_sysbus_properties[] = { DEFINE_PROP_UINT32("width", MacfbSysBusState, macfb.width, 640), DEFINE_PROP_UINT32("height", MacfbSysBusState, macfb.height, 480), DEFINE_PROP_UINT8("depth", MacfbSysBusState, macfb.depth, 8), + DEFINE_PROP_UINT8("display", MacfbSysBusState, macfb.type, + MACFB_DISPLAY_VGA), DEFINE_PROP_END_OF_LIST(), }; @@ -426,6 +751,8 @@ static Property macfb_nubus_properties[] = { DEFINE_PROP_UINT32("width", MacfbNubusState, macfb.width, 640), DEFINE_PROP_UINT32("height", MacfbNubusState, macfb.height, 480), DEFINE_PROP_UINT8("depth", MacfbNubusState, macfb.depth, 8), + DEFINE_PROP_UINT8("display", MacfbNubusState, macfb.type, + MACFB_DISPLAY_VGA), DEFINE_PROP_END_OF_LIST(), }; @@ -447,9 +774,12 @@ static void macfb_nubus_class_init(ObjectClass *klass, void *data) device_class_set_parent_realize(dc, macfb_nubus_realize, &ndc->parent_realize); + device_class_set_parent_unrealize(dc, macfb_nubus_unrealize, + &ndc->parent_unrealize); dc->desc = "Nubus Macintosh framebuffer"; dc->reset = macfb_nubus_reset; dc->vmsd = &vmstate_macfb; + set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); device_class_set_props(dc, macfb_nubus_properties); } diff --git a/hw/display/meson.build b/hw/display/meson.build index 9d79e3951d9..861c43ff984 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -48,7 +48,6 @@ endif softmmu_ss.add(when: 'CONFIG_DPCD', if_true: files('dpcd.c')) softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx_dp.c')) -softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-vgafb.c')) softmmu_ss.add(when: 'CONFIG_ARTIST', if_true: files('artist.c')) softmmu_ss.add(when: [pixman, 'CONFIG_ATI_VGA'], if_true: files('ati.c', 'ati_2d.c', 'ati_dbg.c')) @@ -56,11 +55,16 @@ softmmu_ss.add(when: [pixman, 'CONFIG_ATI_VGA'], if_true: files('ati.c', 'ati_2d if config_all_devices.has_key('CONFIG_VIRTIO_GPU') virtio_gpu_ss = ss.source_set() virtio_gpu_ss.add(when: 'CONFIG_VIRTIO_GPU', - if_true: [files('virtio-gpu-base.c', 'virtio-gpu.c'), pixman, virgl]) - virtio_gpu_ss.add(when: ['CONFIG_VIRTIO_GPU', 'CONFIG_VIRGL'], - if_true: [files('virtio-gpu-3d.c'), pixman, virgl]) + if_true: [files('virtio-gpu-base.c', 'virtio-gpu.c'), pixman]) + virtio_gpu_ss.add(when: 'CONFIG_LINUX', if_true: files('virtio-gpu-udmabuf.c'), + if_false: files('virtio-gpu-udmabuf-stubs.c')) virtio_gpu_ss.add(when: 'CONFIG_VHOST_USER_GPU', if_true: files('vhost-user-gpu.c')) hw_display_modules += {'virtio-gpu': virtio_gpu_ss} + + virtio_gpu_gl_ss = ss.source_set() + virtio_gpu_gl_ss.add(when: ['CONFIG_VIRTIO_GPU', virgl, opengl], + if_true: [files('virtio-gpu-gl.c', 'virtio-gpu-virgl.c'), pixman, virgl]) + hw_display_modules += {'virtio-gpu-gl': virtio_gpu_gl_ss} endif if config_all_devices.has_key('CONFIG_VIRTIO_PCI') @@ -70,6 +74,11 @@ if config_all_devices.has_key('CONFIG_VIRTIO_PCI') virtio_gpu_pci_ss.add(when: ['CONFIG_VHOST_USER_GPU', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-gpu-pci.c')) hw_display_modules += {'virtio-gpu-pci': virtio_gpu_pci_ss} + + virtio_gpu_pci_gl_ss = ss.source_set() + virtio_gpu_pci_gl_ss.add(when: ['CONFIG_VIRTIO_GPU', 'CONFIG_VIRTIO_PCI', virgl, opengl], + if_true: [files('virtio-gpu-pci-gl.c'), pixman]) + hw_display_modules += {'virtio-gpu-pci-gl': virtio_gpu_pci_gl_ss} endif if config_all_devices.has_key('CONFIG_VIRTIO_VGA') @@ -79,9 +88,13 @@ if config_all_devices.has_key('CONFIG_VIRTIO_VGA') virtio_vga_ss.add(when: 'CONFIG_VHOST_USER_VGA', if_true: files('vhost-user-vga.c')) hw_display_modules += {'virtio-vga': virtio_vga_ss} + + virtio_vga_gl_ss = ss.source_set() + virtio_vga_gl_ss.add(when: ['CONFIG_VIRTIO_VGA', virgl, opengl], + if_true: [files('virtio-vga-gl.c'), pixman]) + hw_display_modules += {'virtio-vga-gl': virtio_vga_gl_ss} endif -specific_ss.add(when: [x11, opengl, 'CONFIG_MILKYMIST_TMU2'], if_true: files('milkymist-tmu2.c')) specific_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_lcdc.c')) modules += { 'hw-display': hw_display_modules } diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c deleted file mode 100644 index 02a28c807b5..00000000000 --- a/hw/display/milkymist-tmu2.c +++ /dev/null @@ -1,551 +0,0 @@ -/* - * QEMU model of the Milkymist texture mapping unit. - * - * Copyright (c) 2010 Michael Walle - * Copyright (c) 2010 Sebastien Bourdeauducq - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/tmu2.pdf - * - */ - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "qapi/error.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qapi/error.h" -#include "hw/display/milkymist_tmu2.h" - -#include -#include -#include -#include "qom/object.h" - -enum { - R_CTL = 0, - R_HMESHLAST, - R_VMESHLAST, - R_BRIGHTNESS, - R_CHROMAKEY, - R_VERTICESADDR, - R_TEXFBUF, - R_TEXHRES, - R_TEXVRES, - R_TEXHMASK, - R_TEXVMASK, - R_DSTFBUF, - R_DSTHRES, - R_DSTVRES, - R_DSTHOFFSET, - R_DSTVOFFSET, - R_DSTSQUAREW, - R_DSTSQUAREH, - R_ALPHA, - R_MAX -}; - -enum { - CTL_START_BUSY = (1<<0), - CTL_CHROMAKEY = (1<<1), -}; - -enum { - MAX_BRIGHTNESS = 63, - MAX_ALPHA = 63, -}; - -enum { - MESH_MAXSIZE = 128, -}; - -struct vertex { - int x; - int y; -} QEMU_PACKED; - -#define TYPE_MILKYMIST_TMU2 "milkymist-tmu2" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistTMU2State, MILKYMIST_TMU2) - -struct MilkymistTMU2State { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - Chardev *chr; - qemu_irq irq; - - uint32_t regs[R_MAX]; - - Display *dpy; - GLXFBConfig glx_fb_config; - GLXContext glx_context; -}; - -static const int glx_fbconfig_attr[] = { - GLX_GREEN_SIZE, 5, - GLX_GREEN_SIZE, 6, - GLX_BLUE_SIZE, 5, - None -}; - -static int tmu2_glx_init(MilkymistTMU2State *s) -{ - GLXFBConfig *configs; - int nelements; - - s->dpy = XOpenDisplay(NULL); /* FIXME: call XCloseDisplay() */ - if (s->dpy == NULL) { - return 1; - } - - configs = glXChooseFBConfig(s->dpy, 0, glx_fbconfig_attr, &nelements); - if (configs == NULL) { - return 1; - } - - s->glx_fb_config = *configs; - XFree(configs); - - /* FIXME: call glXDestroyContext() */ - s->glx_context = glXCreateNewContext(s->dpy, s->glx_fb_config, - GLX_RGBA_TYPE, NULL, 1); - if (s->glx_context == NULL) { - return 1; - } - - return 0; -} - -static void tmu2_gl_map(struct vertex *mesh, int texhres, int texvres, - int hmeshlast, int vmeshlast, int ho, int vo, int sw, int sh) -{ - int x, y; - int x0, y0, x1, y1; - int u0, v0, u1, v1, u2, v2, u3, v3; - double xscale = 1.0 / ((double)(64 * texhres)); - double yscale = 1.0 / ((double)(64 * texvres)); - - glLoadIdentity(); - glTranslatef(ho, vo, 0); - glEnable(GL_TEXTURE_2D); - glBegin(GL_QUADS); - - for (y = 0; y < vmeshlast; y++) { - y0 = y * sh; - y1 = y0 + sh; - for (x = 0; x < hmeshlast; x++) { - x0 = x * sw; - x1 = x0 + sw; - - u0 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x].x); - v0 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x].y); - u1 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x + 1].x); - v1 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x + 1].y); - u2 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x + 1].x); - v2 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x + 1].y); - u3 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x].x); - v3 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x].y); - - glTexCoord2d(((double)u0) * xscale, ((double)v0) * yscale); - glVertex3i(x0, y0, 0); - glTexCoord2d(((double)u1) * xscale, ((double)v1) * yscale); - glVertex3i(x1, y0, 0); - glTexCoord2d(((double)u2) * xscale, ((double)v2) * yscale); - glVertex3i(x1, y1, 0); - glTexCoord2d(((double)u3) * xscale, ((double)v3) * yscale); - glVertex3i(x0, y1, 0); - } - } - - glEnd(); -} - -static void tmu2_start(MilkymistTMU2State *s) -{ - int pbuffer_attrib[6] = { - GLX_PBUFFER_WIDTH, - 0, - GLX_PBUFFER_HEIGHT, - 0, - GLX_PRESERVED_CONTENTS, - True - }; - - GLXPbuffer pbuffer; - GLuint texture; - void *fb; - hwaddr fb_len; - void *mesh; - hwaddr mesh_len; - float m; - - trace_milkymist_tmu2_start(); - - /* Create and set up a suitable OpenGL context */ - pbuffer_attrib[1] = s->regs[R_DSTHRES]; - pbuffer_attrib[3] = s->regs[R_DSTVRES]; - pbuffer = glXCreatePbuffer(s->dpy, s->glx_fb_config, pbuffer_attrib); - glXMakeContextCurrent(s->dpy, pbuffer, pbuffer, s->glx_context); - - /* Fixup endianness. TODO: would it work on BE hosts? */ - glPixelStorei(GL_UNPACK_SWAP_BYTES, 1); - glPixelStorei(GL_PACK_SWAP_BYTES, 1); - - /* Row alignment */ - glPixelStorei(GL_UNPACK_ALIGNMENT, 2); - glPixelStorei(GL_PACK_ALIGNMENT, 2); - - /* Read the QEMU source framebuffer into an OpenGL texture */ - glGenTextures(1, &texture); - glBindTexture(GL_TEXTURE_2D, texture); - fb_len = 2ULL * s->regs[R_TEXHRES] * s->regs[R_TEXVRES]; - fb = cpu_physical_memory_map(s->regs[R_TEXFBUF], &fb_len, false); - if (fb == NULL) { - glDeleteTextures(1, &texture); - glXMakeContextCurrent(s->dpy, None, None, NULL); - glXDestroyPbuffer(s->dpy, pbuffer); - return; - } - glTexImage2D(GL_TEXTURE_2D, 0, 3, s->regs[R_TEXHRES], s->regs[R_TEXVRES], - 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, fb); - cpu_physical_memory_unmap(fb, fb_len, 0, fb_len); - - /* Set up texturing options */ - /* WARNING: - * Many cases of TMU2 masking are not supported by OpenGL. - * We only implement the most common ones: - * - full bilinear filtering vs. nearest texel - * - texture clamping vs. texture wrapping - */ - if ((s->regs[R_TEXHMASK] & 0x3f) > 0x20) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - } else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - if ((s->regs[R_TEXHMASK] >> 6) & s->regs[R_TEXHRES]) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); - } else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - } - if ((s->regs[R_TEXVMASK] >> 6) & s->regs[R_TEXVRES]) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP); - } else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - } - - /* Translucency and decay */ - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - m = (float)(s->regs[R_BRIGHTNESS] + 1) / 64.0f; - glColor4f(m, m, m, (float)(s->regs[R_ALPHA] + 1) / 64.0f); - - /* Read the QEMU dest. framebuffer into the OpenGL framebuffer */ - fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES]; - fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, false); - if (fb == NULL) { - glDeleteTextures(1, &texture); - glXMakeContextCurrent(s->dpy, None, None, NULL); - glXDestroyPbuffer(s->dpy, pbuffer); - return; - } - - glDrawPixels(s->regs[R_DSTHRES], s->regs[R_DSTVRES], GL_RGB, - GL_UNSIGNED_SHORT_5_6_5, fb); - cpu_physical_memory_unmap(fb, fb_len, 0, fb_len); - glViewport(0, 0, s->regs[R_DSTHRES], s->regs[R_DSTVRES]); - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - glOrtho(0.0, s->regs[R_DSTHRES], 0.0, s->regs[R_DSTVRES], -1.0, 1.0); - glMatrixMode(GL_MODELVIEW); - - /* Map the texture */ - mesh_len = MESH_MAXSIZE*MESH_MAXSIZE*sizeof(struct vertex); - mesh = cpu_physical_memory_map(s->regs[R_VERTICESADDR], &mesh_len, false); - if (mesh == NULL) { - glDeleteTextures(1, &texture); - glXMakeContextCurrent(s->dpy, None, None, NULL); - glXDestroyPbuffer(s->dpy, pbuffer); - return; - } - - tmu2_gl_map((struct vertex *)mesh, - s->regs[R_TEXHRES], s->regs[R_TEXVRES], - s->regs[R_HMESHLAST], s->regs[R_VMESHLAST], - s->regs[R_DSTHOFFSET], s->regs[R_DSTVOFFSET], - s->regs[R_DSTSQUAREW], s->regs[R_DSTSQUAREH]); - cpu_physical_memory_unmap(mesh, mesh_len, 0, mesh_len); - - /* Write back the OpenGL framebuffer to the QEMU framebuffer */ - fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES]; - fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, true); - if (fb == NULL) { - glDeleteTextures(1, &texture); - glXMakeContextCurrent(s->dpy, None, None, NULL); - glXDestroyPbuffer(s->dpy, pbuffer); - return; - } - - glReadPixels(0, 0, s->regs[R_DSTHRES], s->regs[R_DSTVRES], GL_RGB, - GL_UNSIGNED_SHORT_5_6_5, fb); - cpu_physical_memory_unmap(fb, fb_len, 1, fb_len); - - /* Free OpenGL allocs */ - glDeleteTextures(1, &texture); - glXMakeContextCurrent(s->dpy, None, None, NULL); - glXDestroyPbuffer(s->dpy, pbuffer); - - s->regs[R_CTL] &= ~CTL_START_BUSY; - - trace_milkymist_tmu2_pulse_irq(); - qemu_irq_pulse(s->irq); -} - -static uint64_t tmu2_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistTMU2State *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_CTL: - case R_HMESHLAST: - case R_VMESHLAST: - case R_BRIGHTNESS: - case R_CHROMAKEY: - case R_VERTICESADDR: - case R_TEXFBUF: - case R_TEXHRES: - case R_TEXVRES: - case R_TEXHMASK: - case R_TEXVMASK: - case R_DSTFBUF: - case R_DSTHRES: - case R_DSTVRES: - case R_DSTHOFFSET: - case R_DSTVOFFSET: - case R_DSTSQUAREW: - case R_DSTSQUAREH: - case R_ALPHA: - r = s->regs[addr]; - break; - - default: - error_report("milkymist_tmu2: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_tmu2_memory_read(addr << 2, r); - - return r; -} - -static void tmu2_check_registers(MilkymistTMU2State *s) -{ - if (s->regs[R_BRIGHTNESS] > MAX_BRIGHTNESS) { - error_report("milkymist_tmu2: max brightness is %d", MAX_BRIGHTNESS); - } - - if (s->regs[R_ALPHA] > MAX_ALPHA) { - error_report("milkymist_tmu2: max alpha is %d", MAX_ALPHA); - } - - if (s->regs[R_VERTICESADDR] & 0x07) { - error_report("milkymist_tmu2: vertex mesh address has to be 64-bit " - "aligned"); - } - - if (s->regs[R_TEXFBUF] & 0x01) { - error_report("milkymist_tmu2: texture buffer address has to be " - "16-bit aligned"); - } -} - -static void tmu2_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistTMU2State *s = opaque; - - trace_milkymist_tmu2_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_CTL: - s->regs[addr] = value; - if (value & CTL_START_BUSY) { - tmu2_start(s); - } - break; - case R_BRIGHTNESS: - case R_HMESHLAST: - case R_VMESHLAST: - case R_CHROMAKEY: - case R_VERTICESADDR: - case R_TEXFBUF: - case R_TEXHRES: - case R_TEXVRES: - case R_TEXHMASK: - case R_TEXVMASK: - case R_DSTFBUF: - case R_DSTHRES: - case R_DSTVRES: - case R_DSTHOFFSET: - case R_DSTVOFFSET: - case R_DSTSQUAREW: - case R_DSTSQUAREH: - case R_ALPHA: - s->regs[addr] = value; - break; - - default: - error_report("milkymist_tmu2: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - tmu2_check_registers(s); -} - -static const MemoryRegionOps tmu2_mmio_ops = { - .read = tmu2_read, - .write = tmu2_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void milkymist_tmu2_reset(DeviceState *d) -{ - MilkymistTMU2State *s = MILKYMIST_TMU2(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } -} - -static void milkymist_tmu2_init(Object *obj) -{ - MilkymistTMU2State *s = MILKYMIST_TMU2(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); - - sysbus_init_irq(dev, &s->irq); - - memory_region_init_io(&s->regs_region, obj, &tmu2_mmio_ops, s, - "milkymist-tmu2", R_MAX * 4); - sysbus_init_mmio(dev, &s->regs_region); -} - -static void milkymist_tmu2_realize(DeviceState *dev, Error **errp) -{ - MilkymistTMU2State *s = MILKYMIST_TMU2(dev); - - if (tmu2_glx_init(s)) { - error_setg(errp, "tmu2_glx_init failed"); - } -} - -static const VMStateDescription vmstate_milkymist_tmu2 = { - .name = "milkymist-tmu2", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistTMU2State, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static void milkymist_tmu2_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_tmu2_realize; - dc->reset = milkymist_tmu2_reset; - dc->vmsd = &vmstate_milkymist_tmu2; -} - -static const TypeInfo milkymist_tmu2_info = { - .name = TYPE_MILKYMIST_TMU2, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistTMU2State), - .instance_init = milkymist_tmu2_init, - .class_init = milkymist_tmu2_class_init, -}; - -static void milkymist_tmu2_register_types(void) -{ - type_register_static(&milkymist_tmu2_info); -} - -type_init(milkymist_tmu2_register_types) - -DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq) -{ - DeviceState *dev; - Display *d; - GLXFBConfig *configs; - int nelements; - int ver_major, ver_minor; - - /* check that GLX will work */ - d = XOpenDisplay(NULL); - if (d == NULL) { - return NULL; - } - - if (!glXQueryVersion(d, &ver_major, &ver_minor)) { - /* - * Yeah, sometimes getting the GLX version can fail. - * Isn't X beautiful? - */ - XCloseDisplay(d); - return NULL; - } - - if ((ver_major < 1) || ((ver_major == 1) && (ver_minor < 3))) { - printf("Your GLX version is %d.%d," - "but TMU emulation needs at least 1.3. TMU disabled.\n", - ver_major, ver_minor); - XCloseDisplay(d); - return NULL; - } - - configs = glXChooseFBConfig(d, 0, glx_fbconfig_attr, &nelements); - if (configs == NULL) { - XCloseDisplay(d); - return NULL; - } - - XFree(configs); - XCloseDisplay(d); - - dev = qdev_new(TYPE_MILKYMIST_TMU2); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq); - - return dev; -} diff --git a/hw/display/milkymist-vgafb.c b/hw/display/milkymist-vgafb.c deleted file mode 100644 index e2c587e2dfc..00000000000 --- a/hw/display/milkymist-vgafb.c +++ /dev/null @@ -1,360 +0,0 @@ - -/* - * QEMU model of the Milkymist VGA framebuffer. - * - * Copyright (c) 2010-2012 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/vgafb.pdf - */ - -#include "qemu/osdep.h" -#include "hw/hw.h" -#include "hw/qdev-properties.h" -#include "hw/sysbus.h" -#include "trace.h" -#include "ui/console.h" -#include "framebuffer.h" -#include "ui/pixel_ops.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -#define BITS 8 -#include "migration/vmstate.h" -#include "milkymist-vgafb_template.h" -#define BITS 15 -#include "milkymist-vgafb_template.h" -#define BITS 16 -#include "milkymist-vgafb_template.h" -#define BITS 24 -#include "milkymist-vgafb_template.h" -#define BITS 32 -#include "milkymist-vgafb_template.h" - -enum { - R_CTRL = 0, - R_HRES, - R_HSYNC_START, - R_HSYNC_END, - R_HSCAN, - R_VRES, - R_VSYNC_START, - R_VSYNC_END, - R_VSCAN, - R_BASEADDRESS, - R_BASEADDRESS_ACT, - R_BURST_COUNT, - R_DDC, - R_SOURCE_CLOCK, - R_MAX -}; - -enum { - CTRL_RESET = (1<<0), -}; - -#define TYPE_MILKYMIST_VGAFB "milkymist-vgafb" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistVgafbState, MILKYMIST_VGAFB) - -struct MilkymistVgafbState { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - MemoryRegionSection fbsection; - QemuConsole *con; - - int invalidate; - uint32_t fb_offset; - uint32_t fb_mask; - - uint32_t regs[R_MAX]; -}; - -static int vgafb_enabled(MilkymistVgafbState *s) -{ - return !(s->regs[R_CTRL] & CTRL_RESET); -} - -static void vgafb_update_display(void *opaque) -{ - MilkymistVgafbState *s = opaque; - SysBusDevice *sbd; - DisplaySurface *surface = qemu_console_surface(s->con); - int src_width; - int first = 0; - int last = 0; - drawfn fn; - - if (!vgafb_enabled(s)) { - return; - } - - sbd = SYS_BUS_DEVICE(s); - int dest_width = s->regs[R_HRES]; - - switch (surface_bits_per_pixel(surface)) { - case 0: - return; - case 8: - fn = draw_line_8; - break; - case 15: - fn = draw_line_15; - dest_width *= 2; - break; - case 16: - fn = draw_line_16; - dest_width *= 2; - break; - case 24: - fn = draw_line_24; - dest_width *= 3; - break; - case 32: - fn = draw_line_32; - dest_width *= 4; - break; - default: - hw_error("milkymist_vgafb: bad color depth\n"); - break; - } - - src_width = s->regs[R_HRES] * 2; - if (s->invalidate) { - framebuffer_update_memory_section(&s->fbsection, - sysbus_address_space(sbd), - s->regs[R_BASEADDRESS] + s->fb_offset, - s->regs[R_VRES], src_width); - } - - framebuffer_update_display(surface, &s->fbsection, - s->regs[R_HRES], - s->regs[R_VRES], - src_width, - dest_width, - 0, - s->invalidate, - fn, - NULL, - &first, &last); - - if (first >= 0) { - dpy_gfx_update(s->con, 0, first, s->regs[R_HRES], last - first + 1); - } - s->invalidate = 0; -} - -static void vgafb_invalidate_display(void *opaque) -{ - MilkymistVgafbState *s = opaque; - s->invalidate = 1; -} - -static void vgafb_resize(MilkymistVgafbState *s) -{ - if (!vgafb_enabled(s)) { - return; - } - - qemu_console_resize(s->con, s->regs[R_HRES], s->regs[R_VRES]); - s->invalidate = 1; -} - -static uint64_t vgafb_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistVgafbState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_CTRL: - case R_HRES: - case R_HSYNC_START: - case R_HSYNC_END: - case R_HSCAN: - case R_VRES: - case R_VSYNC_START: - case R_VSYNC_END: - case R_VSCAN: - case R_BASEADDRESS: - case R_BURST_COUNT: - case R_DDC: - case R_SOURCE_CLOCK: - r = s->regs[addr]; - break; - case R_BASEADDRESS_ACT: - r = s->regs[R_BASEADDRESS]; - break; - - default: - error_report("milkymist_vgafb: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_vgafb_memory_read(addr << 2, r); - - return r; -} - -static void vgafb_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistVgafbState *s = opaque; - - trace_milkymist_vgafb_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_CTRL: - s->regs[addr] = value; - vgafb_resize(s); - break; - case R_HSYNC_START: - case R_HSYNC_END: - case R_HSCAN: - case R_VSYNC_START: - case R_VSYNC_END: - case R_VSCAN: - case R_BURST_COUNT: - case R_DDC: - case R_SOURCE_CLOCK: - s->regs[addr] = value; - break; - case R_BASEADDRESS: - if (value & 0x1f) { - error_report("milkymist_vgafb: framebuffer base address have to " - "be 32 byte aligned"); - break; - } - s->regs[addr] = value & s->fb_mask; - s->invalidate = 1; - break; - case R_HRES: - case R_VRES: - s->regs[addr] = value; - vgafb_resize(s); - break; - case R_BASEADDRESS_ACT: - error_report("milkymist_vgafb: write to read-only register 0x" - TARGET_FMT_plx, addr << 2); - break; - - default: - error_report("milkymist_vgafb: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } -} - -static const MemoryRegionOps vgafb_mmio_ops = { - .read = vgafb_read, - .write = vgafb_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void milkymist_vgafb_reset(DeviceState *d) -{ - MilkymistVgafbState *s = MILKYMIST_VGAFB(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - - /* defaults */ - s->regs[R_CTRL] = CTRL_RESET; - s->regs[R_HRES] = 640; - s->regs[R_VRES] = 480; - s->regs[R_BASEADDRESS] = 0; -} - -static const GraphicHwOps vgafb_ops = { - .invalidate = vgafb_invalidate_display, - .gfx_update = vgafb_update_display, -}; - -static void milkymist_vgafb_init(Object *obj) -{ - MilkymistVgafbState *s = MILKYMIST_VGAFB(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); - - memory_region_init_io(&s->regs_region, OBJECT(s), &vgafb_mmio_ops, s, - "milkymist-vgafb", R_MAX * 4); - sysbus_init_mmio(dev, &s->regs_region); -} - -static void milkymist_vgafb_realize(DeviceState *dev, Error **errp) -{ - MilkymistVgafbState *s = MILKYMIST_VGAFB(dev); - - s->con = graphic_console_init(dev, 0, &vgafb_ops, s); -} - -static int vgafb_post_load(void *opaque, int version_id) -{ - vgafb_invalidate_display(opaque); - return 0; -} - -static const VMStateDescription vmstate_milkymist_vgafb = { - .name = "milkymist-vgafb", - .version_id = 1, - .minimum_version_id = 1, - .post_load = vgafb_post_load, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistVgafbState, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static Property milkymist_vgafb_properties[] = { - DEFINE_PROP_UINT32("fb_offset", MilkymistVgafbState, fb_offset, 0x0), - DEFINE_PROP_UINT32("fb_mask", MilkymistVgafbState, fb_mask, 0xffffffff), - DEFINE_PROP_END_OF_LIST(), -}; - -static void milkymist_vgafb_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = milkymist_vgafb_reset; - dc->vmsd = &vmstate_milkymist_vgafb; - device_class_set_props(dc, milkymist_vgafb_properties); - dc->realize = milkymist_vgafb_realize; -} - -static const TypeInfo milkymist_vgafb_info = { - .name = TYPE_MILKYMIST_VGAFB, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistVgafbState), - .instance_init = milkymist_vgafb_init, - .class_init = milkymist_vgafb_class_init, -}; - -static void milkymist_vgafb_register_types(void) -{ - type_register_static(&milkymist_vgafb_info); -} - -type_init(milkymist_vgafb_register_types) diff --git a/hw/display/milkymist-vgafb_template.h b/hw/display/milkymist-vgafb_template.h deleted file mode 100644 index 96137f97093..00000000000 --- a/hw/display/milkymist-vgafb_template.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * QEMU model of the Milkymist VGA framebuffer. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - */ - -#if BITS == 8 -#define COPY_PIXEL(to, r, g, b) \ - do { \ - *to = rgb_to_pixel8(r, g, b); \ - to += 1; \ - } while (0) -#elif BITS == 15 -#define COPY_PIXEL(to, r, g, b) \ - do { \ - *(uint16_t *)to = rgb_to_pixel15(r, g, b); \ - to += 2; \ - } while (0) -#elif BITS == 16 -#define COPY_PIXEL(to, r, g, b) \ - do { \ - *(uint16_t *)to = rgb_to_pixel16(r, g, b); \ - to += 2; \ - } while (0) -#elif BITS == 24 -#define COPY_PIXEL(to, r, g, b) \ - do { \ - uint32_t tmp = rgb_to_pixel24(r, g, b); \ - *(to++) = tmp & 0xff; \ - *(to++) = (tmp >> 8) & 0xff; \ - *(to++) = (tmp >> 16) & 0xff; \ - } while (0) -#elif BITS == 32 -#define COPY_PIXEL(to, r, g, b) \ - do { \ - *(uint32_t *)to = rgb_to_pixel32(r, g, b); \ - to += 4; \ - } while (0) -#else -#error unknown bit depth -#endif - -static void glue(draw_line_, BITS)(void *opaque, uint8_t *d, const uint8_t *s, - int width, int deststep) -{ - uint16_t rgb565; - uint8_t r, g, b; - - while (width--) { - rgb565 = lduw_be_p(s); - r = ((rgb565 >> 11) & 0x1f) << 3; - g = ((rgb565 >> 5) & 0x3f) << 2; - b = ((rgb565 >> 0) & 0x1f) << 3; - COPY_PIXEL(d, r, g, b); - s += 2; - } -} - -#undef BITS -#undef COPY_PIXEL diff --git a/hw/display/next-fb.c b/hw/display/next-fb.c index e2d895109db..dd6a1aa8aee 100644 --- a/hw/display/next-fb.c +++ b/hw/display/next-fb.c @@ -24,8 +24,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "ui/console.h" -#include "hw/hw.h" -#include "hw/boards.h" #include "hw/loader.h" #include "framebuffer.h" #include "ui/pixel_ops.h" diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 93907e82a33..29c80b4289b 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -30,7 +30,6 @@ #include "qemu/module.h" #include "hw/qdev-properties.h" #include "sysemu/runstate.h" -#include "migration/blocker.h" #include "migration/vmstate.h" #include "trace.h" @@ -321,7 +320,7 @@ static ram_addr_t qxl_rom_size(void) #define QXL_ROM_SZ 8192 QEMU_BUILD_BUG_ON(QXL_REQUIRED_SZ > QXL_ROM_SZ); - return QXL_ROM_SZ; + return QEMU_ALIGN_UP(QXL_REQUIRED_SZ, qemu_real_host_page_size); } static void init_qxl_rom(PCIQXLDevice *d) @@ -666,30 +665,6 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext) qxl->guest_primary.commands++; qxl_track_command(qxl, ext); qxl_log_command(qxl, "cmd", ext); - { - /* - * Windows 8 drivers place qxl commands in the vram - * (instead of the ram) bar. We can't live migrate such a - * guest, so add a migration blocker in case we detect - * this, to avoid triggering the assert in pre_save(). - * - * https://cgit.freedesktop.org/spice/win32/qxl-wddm-dod/commit/?id=f6e099db39e7d0787f294d5fd0dce328b5210faa - */ - void *msg = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); - if (msg != NULL && ( - msg < (void *)qxl->vga.vram_ptr || - msg > ((void *)qxl->vga.vram_ptr + qxl->vga.vram_size))) { - if (!qxl->migration_blocker) { - Error *local_err = NULL; - error_setg(&qxl->migration_blocker, - "qxl: guest bug: command not in ram bar"); - migrate_add_blocker(qxl->migration_blocker, &local_err); - if (local_err) { - error_report_err(local_err); - } - } - } - } trace_qxl_ring_command_get(qxl->id, qxl_mode_to_string(qxl->mode)); return true; default: @@ -1283,12 +1258,6 @@ static void qxl_hard_reset(PCIQXLDevice *d, int loadvm) qemu_spice_create_host_memslot(&d->ssd); qxl_soft_reset(d); - if (d->migration_blocker) { - migrate_del_blocker(d->migration_blocker); - error_free(d->migration_blocker); - d->migration_blocker = NULL; - } - if (startstop) { qemu_spice_display_start(); } @@ -2283,7 +2252,9 @@ static int qxl_pre_save(void *opaque) } else { d->last_release_offset = (uint8_t *)d->last_release - ram_start; } - assert(d->last_release_offset < d->vga.vram_size); + if (d->last_release_offset >= d->vga.vram_size) { + return 1; + } return 0; } @@ -2384,7 +2355,7 @@ static bool qxl_monitors_config_needed(void *opaque) } -static VMStateDescription qxl_memslot = { +static const VMStateDescription qxl_memslot = { .name = "qxl-memslot", .version_id = QXL_SAVE_VERSION, .minimum_version_id = QXL_SAVE_VERSION, @@ -2396,7 +2367,7 @@ static VMStateDescription qxl_memslot = { } }; -static VMStateDescription qxl_surface = { +static const VMStateDescription qxl_surface = { .name = "qxl-surface", .version_id = QXL_SAVE_VERSION, .minimum_version_id = QXL_SAVE_VERSION, @@ -2414,7 +2385,7 @@ static VMStateDescription qxl_surface = { } }; -static VMStateDescription qxl_vmstate_monitors_config = { +static const VMStateDescription qxl_vmstate_monitors_config = { .name = "qxl/monitors-config", .version_id = 1, .minimum_version_id = 1, @@ -2425,7 +2396,7 @@ static VMStateDescription qxl_vmstate_monitors_config = { }, }; -static VMStateDescription qxl_vmstate = { +static const VMStateDescription qxl_vmstate = { .name = "qxl", .version_id = QXL_SAVE_VERSION, .minimum_version_id = QXL_SAVE_VERSION, @@ -2522,6 +2493,7 @@ static const TypeInfo qxl_primary_info = { .parent = TYPE_PCI_QXL, .class_init = qxl_primary_class_init, }; +module_obj("qxl-vga"); static void qxl_secondary_class_init(ObjectClass *klass, void *data) { @@ -2538,6 +2510,7 @@ static const TypeInfo qxl_secondary_info = { .parent = TYPE_PCI_QXL, .class_init = qxl_secondary_class_init, }; +module_obj("qxl"); static void qxl_register_types(void) { @@ -2547,3 +2520,5 @@ static void qxl_register_types(void) } type_init(qxl_register_types) + +module_dep("ui-spice-core"); diff --git a/hw/display/qxl.h b/hw/display/qxl.h index 379d3304abc..30d21f4d0bd 100644 --- a/hw/display/qxl.h +++ b/hw/display/qxl.h @@ -39,7 +39,6 @@ struct PCIQXLDevice { uint32_t cmdlog; uint32_t guest_bug; - Error *migration_blocker; enum qxl_mode mode; uint32_t cmdflags; diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 8789722ef27..663c37e7f28 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -1033,16 +1033,18 @@ static void sm501_i2c_write(void *opaque, hwaddr addr, uint64_t value, case SM501_I2C_CONTROL: if (value & SM501_I2C_CONTROL_ENABLE) { if (value & SM501_I2C_CONTROL_START) { + bool is_recv = s->i2c_addr & 1; int res = i2c_start_transfer(s->i2c_bus, s->i2c_addr >> 1, - s->i2c_addr & 1); - s->i2c_status |= (res ? SM501_I2C_STATUS_ERROR : 0); - if (!res) { + is_recv); + if (res) { + s->i2c_status |= SM501_I2C_STATUS_ERROR; + } else { int i; for (i = 0; i <= s->i2c_byte_count; i++) { - res = i2c_send_recv(s->i2c_bus, &s->i2c_data[i], - !(s->i2c_addr & 1)); - if (res) { + if (is_recv) { + s->i2c_data[i] = i2c_recv(s->i2c_bus); + } else if (i2c_send(s->i2c_bus, s->i2c_data[i]) < 0) { s->i2c_status |= SM501_I2C_STATUS_ERROR; return; } @@ -1826,7 +1828,7 @@ static void sm501_init(SM501State *s, DeviceState *dev, s->i2c_bus = i2c_init_bus(dev, "sm501.i2c"); /* ddc */ I2CDDCState *ddc = I2CDDC(qdev_new(TYPE_I2CDDC)); - i2c_set_slave_address(I2C_SLAVE(ddc), 0x50); + i2c_slave_set_address(I2C_SLAVE(ddc), 0x50); qdev_realize_and_unref(DEVICE(ddc), BUS(s->i2c_bus), &error_abort); /* mmio */ diff --git a/hw/display/trace-events b/hw/display/trace-events index 957b8ba9943..3a7a2c957f4 100644 --- a/hw/display/trace-events +++ b/hw/display/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # jazz_led.c jazz_led_read(uint64_t addr, uint8_t val) "read addr=0x%"PRIx64": 0x%x" @@ -13,16 +13,6 @@ xenfb_input_connected(void *xendev, int abs_pointer_wanted) "%p abs %d" g364fb_read(uint64_t addr, uint32_t val) "read addr=0x%"PRIx64": 0x%x" g364fb_write(uint64_t addr, uint32_t new) "write addr=0x%"PRIx64": 0x%x" -# milkymist-tmu2.c -milkymist_tmu2_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_tmu2_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_tmu2_start(void) "Start TMU" -milkymist_tmu2_pulse_irq(void) "Pulse IRQ" - -# milkymist-vgafb.c -milkymist_vgafb_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_vgafb_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" - # vmware_vga.c vmware_value_read(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_value_write(uint32_t index, uint32_t value) "index %d, value 0x%x" @@ -40,8 +30,10 @@ virtio_gpu_features(bool virgl) "virgl %d" virtio_gpu_cmd_get_display_info(void) "" virtio_gpu_cmd_get_edid(uint32_t scanout) "scanout %d" virtio_gpu_cmd_set_scanout(uint32_t id, uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint32_t y) "id %d, res 0x%x, w %d, h %d, x %d, y %d" +virtio_gpu_cmd_set_scanout_blob(uint32_t id, uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint32_t y) "id %d, res 0x%x, w %d, h %d, x %d, y %d" virtio_gpu_cmd_res_create_2d(uint32_t res, uint32_t fmt, uint32_t w, uint32_t h) "res 0x%x, fmt 0x%x, w %d, h %d" virtio_gpu_cmd_res_create_3d(uint32_t res, uint32_t fmt, uint32_t w, uint32_t h, uint32_t d) "res 0x%x, fmt 0x%x, w %d, h %d, d %d" +virtio_gpu_cmd_res_create_blob(uint32_t res, uint64_t size) "res 0x%x, size %" PRId64 virtio_gpu_cmd_res_unref(uint32_t res) "res 0x%x" virtio_gpu_cmd_res_back_attach(uint32_t res) "res 0x%x" virtio_gpu_cmd_res_back_detach(uint32_t res) "res 0x%x" @@ -175,3 +167,10 @@ sm501_disp_ctrl_read(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x" sm501_disp_ctrl_write(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x" sm501_2d_engine_read(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x" sm501_2d_engine_write(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x" + +# macfb.c +macfb_ctrl_read(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%"PRIx64 " value 0x%"PRIx64 " size %u" +macfb_ctrl_write(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%"PRIx64 " value 0x%"PRIx64 " size %u" +macfb_sense_read(uint32_t value) "video sense: 0x%"PRIx32 +macfb_sense_write(uint32_t value) "video sense: 0x%"PRIx32 +macfb_update_mode(uint32_t width, uint32_t height, uint8_t depth) "setting mode to width %"PRId32 " height %"PRId32 " size %d" diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c index 48d29630ab7..62fb5c38c1f 100644 --- a/hw/display/vga-pci.c +++ b/hw/display/vga-pci.c @@ -49,7 +49,7 @@ struct PCIVGAState { qemu_edid_info edid_info; MemoryRegion mmio; MemoryRegion mrs[4]; - uint8_t edid[256]; + uint8_t edid[384]; }; #define TYPE_PCI_VGA "pci-vga" diff --git a/hw/display/vga.c b/hw/display/vga.c index 836ad50c7b6..9d1f66af402 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -39,6 +39,8 @@ //#define DEBUG_VGA_MEM //#define DEBUG_VGA_REG +bool have_vga = true; + /* 16 state changes per vertical frame @60 Hz */ #define VGA_TEXT_CURSOR_PERIOD_MS (1000 * 2 * 16 / 60) @@ -750,7 +752,8 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val) val == VBE_DISPI_ID1 || val == VBE_DISPI_ID2 || val == VBE_DISPI_ID3 || - val == VBE_DISPI_ID4) { + val == VBE_DISPI_ID4 || + val == VBE_DISPI_ID5) { s->vbe_regs[s->vbe_index] = val; } break; diff --git a/hw/display/vhost-user-gpu-pci.c b/hw/display/vhost-user-gpu-pci.c index a02b23ecaf1..daefcf71015 100644 --- a/hw/display/vhost-user-gpu-pci.c +++ b/hw/display/vhost-user-gpu-pci.c @@ -43,6 +43,7 @@ static const VirtioPCIDeviceTypeInfo vhost_user_gpu_pci_info = { .instance_size = sizeof(VhostUserGPUPCI), .instance_init = vhost_user_gpu_pci_initfn, }; +module_obj(TYPE_VHOST_USER_GPU_PCI); static void vhost_user_gpu_pci_register_types(void) { diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 6cdaa1c73b9..49df56cd14e 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -415,14 +415,16 @@ vhost_user_gpu_get_config(VirtIODevice *vdev, uint8_t *config_data) VirtIOGPUBase *b = VIRTIO_GPU_BASE(vdev); struct virtio_gpu_config *vgconfig = (struct virtio_gpu_config *)config_data; + Error *local_err = NULL; int ret; memset(config_data, 0, sizeof(struct virtio_gpu_config)); ret = vhost_dev_get_config(&g->vhost->dev, - config_data, sizeof(struct virtio_gpu_config)); + config_data, sizeof(struct virtio_gpu_config), + &local_err); if (ret) { - error_report("vhost-user-gpu: get device config space failed"); + error_report_err(local_err); return; } @@ -596,6 +598,7 @@ static const TypeInfo vhost_user_gpu_info = { .instance_finalize = vhost_user_gpu_instance_finalize, .class_init = vhost_user_gpu_class_init, }; +module_obj(TYPE_VHOST_USER_GPU); static void vhost_user_gpu_register_types(void) { diff --git a/hw/display/vhost-user-vga.c b/hw/display/vhost-user-vga.c index a34a99856d7..072c9c65bc7 100644 --- a/hw/display/vhost-user-vga.c +++ b/hw/display/vhost-user-vga.c @@ -44,6 +44,7 @@ static const VirtioPCIDeviceTypeInfo vhost_user_vga_info = { .instance_size = sizeof(VhostUserVGA), .instance_init = vhost_user_vga_inst_initfn, }; +module_obj(TYPE_VHOST_USER_VGA); static void vhost_user_vga_register_types(void) { diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c deleted file mode 100644 index d98964858e1..00000000000 --- a/hw/display/virtio-gpu-3d.c +++ /dev/null @@ -1,628 +0,0 @@ -/* - * Virtio GPU Device - * - * Copyright Red Hat, Inc. 2013-2014 - * - * Authors: - * Dave Airlie - * Gerd Hoffmann - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qemu/iov.h" -#include "trace.h" -#include "hw/virtio/virtio.h" -#include "hw/virtio/virtio-gpu.h" - -#include - -static struct virgl_renderer_callbacks virtio_gpu_3d_cbs; - -static void virgl_cmd_create_resource_2d(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_resource_create_2d c2d; - struct virgl_renderer_resource_create_args args; - - VIRTIO_GPU_FILL_CMD(c2d); - trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format, - c2d.width, c2d.height); - - args.handle = c2d.resource_id; - args.target = 2; - args.format = c2d.format; - args.bind = (1 << 1); - args.width = c2d.width; - args.height = c2d.height; - args.depth = 1; - args.array_size = 1; - args.last_level = 0; - args.nr_samples = 0; - args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP; - virgl_renderer_resource_create(&args, NULL, 0); -} - -static void virgl_cmd_create_resource_3d(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_resource_create_3d c3d; - struct virgl_renderer_resource_create_args args; - - VIRTIO_GPU_FILL_CMD(c3d); - trace_virtio_gpu_cmd_res_create_3d(c3d.resource_id, c3d.format, - c3d.width, c3d.height, c3d.depth); - - args.handle = c3d.resource_id; - args.target = c3d.target; - args.format = c3d.format; - args.bind = c3d.bind; - args.width = c3d.width; - args.height = c3d.height; - args.depth = c3d.depth; - args.array_size = c3d.array_size; - args.last_level = c3d.last_level; - args.nr_samples = c3d.nr_samples; - args.flags = c3d.flags; - virgl_renderer_resource_create(&args, NULL, 0); -} - -static void virgl_cmd_resource_unref(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_resource_unref unref; - struct iovec *res_iovs = NULL; - int num_iovs = 0; - - VIRTIO_GPU_FILL_CMD(unref); - trace_virtio_gpu_cmd_res_unref(unref.resource_id); - - virgl_renderer_resource_detach_iov(unref.resource_id, - &res_iovs, - &num_iovs); - if (res_iovs != NULL && num_iovs != 0) { - virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs); - } - virgl_renderer_resource_unref(unref.resource_id); -} - -static void virgl_cmd_context_create(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_ctx_create cc; - - VIRTIO_GPU_FILL_CMD(cc); - trace_virtio_gpu_cmd_ctx_create(cc.hdr.ctx_id, - cc.debug_name); - - virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen, - cc.debug_name); -} - -static void virgl_cmd_context_destroy(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_ctx_destroy cd; - - VIRTIO_GPU_FILL_CMD(cd); - trace_virtio_gpu_cmd_ctx_destroy(cd.hdr.ctx_id); - - virgl_renderer_context_destroy(cd.hdr.ctx_id); -} - -static void virtio_gpu_rect_update(VirtIOGPU *g, int idx, int x, int y, - int width, int height) -{ - if (!g->parent_obj.scanout[idx].con) { - return; - } - - dpy_gl_update(g->parent_obj.scanout[idx].con, x, y, width, height); -} - -static void virgl_cmd_resource_flush(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_resource_flush rf; - int i; - - VIRTIO_GPU_FILL_CMD(rf); - trace_virtio_gpu_cmd_res_flush(rf.resource_id, - rf.r.width, rf.r.height, rf.r.x, rf.r.y); - - for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { - if (g->parent_obj.scanout[i].resource_id != rf.resource_id) { - continue; - } - virtio_gpu_rect_update(g, i, rf.r.x, rf.r.y, rf.r.width, rf.r.height); - } -} - -static void virgl_cmd_set_scanout(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_set_scanout ss; - struct virgl_renderer_resource_info info; - int ret; - - VIRTIO_GPU_FILL_CMD(ss); - trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id, - ss.r.width, ss.r.height, ss.r.x, ss.r.y); - - if (ss.scanout_id >= g->parent_obj.conf.max_outputs) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d", - __func__, ss.scanout_id); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; - return; - } - g->parent_obj.enable = 1; - - memset(&info, 0, sizeof(info)); - - if (ss.resource_id && ss.r.width && ss.r.height) { - ret = virgl_renderer_resource_get_info(ss.resource_id, &info); - if (ret == -1) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: illegal resource specified %d\n", - __func__, ss.resource_id); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; - return; - } - qemu_console_resize(g->parent_obj.scanout[ss.scanout_id].con, - ss.r.width, ss.r.height); - virgl_renderer_force_ctx_0(); - dpy_gl_scanout_texture( - g->parent_obj.scanout[ss.scanout_id].con, info.tex_id, - info.flags & 1 /* FIXME: Y_0_TOP */, - info.width, info.height, - ss.r.x, ss.r.y, ss.r.width, ss.r.height); - } else { - dpy_gfx_replace_surface( - g->parent_obj.scanout[ss.scanout_id].con, NULL); - dpy_gl_scanout_disable(g->parent_obj.scanout[ss.scanout_id].con); - } - g->parent_obj.scanout[ss.scanout_id].resource_id = ss.resource_id; -} - -static void virgl_cmd_submit_3d(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_cmd_submit cs; - void *buf; - size_t s; - - VIRTIO_GPU_FILL_CMD(cs); - trace_virtio_gpu_cmd_ctx_submit(cs.hdr.ctx_id, cs.size); - - buf = g_malloc(cs.size); - s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, - sizeof(cs), buf, cs.size); - if (s != cs.size) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: size mismatch (%zd/%d)", - __func__, s, cs.size); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; - goto out; - } - - if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { - g->stats.req_3d++; - g->stats.bytes_3d += cs.size; - } - - virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4); - -out: - g_free(buf); -} - -static void virgl_cmd_transfer_to_host_2d(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_transfer_to_host_2d t2d; - struct virtio_gpu_box box; - - VIRTIO_GPU_FILL_CMD(t2d); - trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id); - - box.x = t2d.r.x; - box.y = t2d.r.y; - box.z = 0; - box.w = t2d.r.width; - box.h = t2d.r.height; - box.d = 1; - - virgl_renderer_transfer_write_iov(t2d.resource_id, - 0, - 0, - 0, - 0, - (struct virgl_box *)&box, - t2d.offset, NULL, 0); -} - -static void virgl_cmd_transfer_to_host_3d(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_transfer_host_3d t3d; - - VIRTIO_GPU_FILL_CMD(t3d); - trace_virtio_gpu_cmd_res_xfer_toh_3d(t3d.resource_id); - - virgl_renderer_transfer_write_iov(t3d.resource_id, - t3d.hdr.ctx_id, - t3d.level, - t3d.stride, - t3d.layer_stride, - (struct virgl_box *)&t3d.box, - t3d.offset, NULL, 0); -} - -static void -virgl_cmd_transfer_from_host_3d(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_transfer_host_3d tf3d; - - VIRTIO_GPU_FILL_CMD(tf3d); - trace_virtio_gpu_cmd_res_xfer_fromh_3d(tf3d.resource_id); - - virgl_renderer_transfer_read_iov(tf3d.resource_id, - tf3d.hdr.ctx_id, - tf3d.level, - tf3d.stride, - tf3d.layer_stride, - (struct virgl_box *)&tf3d.box, - tf3d.offset, NULL, 0); -} - - -static void virgl_resource_attach_backing(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_resource_attach_backing att_rb; - struct iovec *res_iovs; - int ret; - - VIRTIO_GPU_FILL_CMD(att_rb); - trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id); - - ret = virtio_gpu_create_mapping_iov(g, &att_rb, cmd, NULL, &res_iovs); - if (ret != 0) { - cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; - return; - } - - ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, - res_iovs, att_rb.nr_entries); - - if (ret != 0) - virtio_gpu_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries); -} - -static void virgl_resource_detach_backing(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_resource_detach_backing detach_rb; - struct iovec *res_iovs = NULL; - int num_iovs = 0; - - VIRTIO_GPU_FILL_CMD(detach_rb); - trace_virtio_gpu_cmd_res_back_detach(detach_rb.resource_id); - - virgl_renderer_resource_detach_iov(detach_rb.resource_id, - &res_iovs, - &num_iovs); - if (res_iovs == NULL || num_iovs == 0) { - return; - } - virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs); -} - - -static void virgl_cmd_ctx_attach_resource(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_ctx_resource att_res; - - VIRTIO_GPU_FILL_CMD(att_res); - trace_virtio_gpu_cmd_ctx_res_attach(att_res.hdr.ctx_id, - att_res.resource_id); - - virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id); -} - -static void virgl_cmd_ctx_detach_resource(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_ctx_resource det_res; - - VIRTIO_GPU_FILL_CMD(det_res); - trace_virtio_gpu_cmd_ctx_res_detach(det_res.hdr.ctx_id, - det_res.resource_id); - - virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id); -} - -static void virgl_cmd_get_capset_info(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_get_capset_info info; - struct virtio_gpu_resp_capset_info resp; - - VIRTIO_GPU_FILL_CMD(info); - - memset(&resp, 0, sizeof(resp)); - if (info.capset_index == 0) { - resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL; - virgl_renderer_get_cap_set(resp.capset_id, - &resp.capset_max_version, - &resp.capset_max_size); - } else if (info.capset_index == 1) { - resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2; - virgl_renderer_get_cap_set(resp.capset_id, - &resp.capset_max_version, - &resp.capset_max_size); - } else { - resp.capset_max_version = 0; - resp.capset_max_size = 0; - } - resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO; - virtio_gpu_ctrl_response(g, cmd, &resp.hdr, sizeof(resp)); -} - -static void virgl_cmd_get_capset(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - struct virtio_gpu_get_capset gc; - struct virtio_gpu_resp_capset *resp; - uint32_t max_ver, max_size; - VIRTIO_GPU_FILL_CMD(gc); - - virgl_renderer_get_cap_set(gc.capset_id, &max_ver, - &max_size); - if (!max_size) { - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; - return; - } - - resp = g_malloc0(sizeof(*resp) + max_size); - resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET; - virgl_renderer_fill_caps(gc.capset_id, - gc.capset_version, - (void *)resp->capset_data); - virtio_gpu_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size); - g_free(resp); -} - -void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) -{ - VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr); - - virgl_renderer_force_ctx_0(); - switch (cmd->cmd_hdr.type) { - case VIRTIO_GPU_CMD_CTX_CREATE: - virgl_cmd_context_create(g, cmd); - break; - case VIRTIO_GPU_CMD_CTX_DESTROY: - virgl_cmd_context_destroy(g, cmd); - break; - case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: - virgl_cmd_create_resource_2d(g, cmd); - break; - case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D: - virgl_cmd_create_resource_3d(g, cmd); - break; - case VIRTIO_GPU_CMD_SUBMIT_3D: - virgl_cmd_submit_3d(g, cmd); - break; - case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: - virgl_cmd_transfer_to_host_2d(g, cmd); - break; - case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D: - virgl_cmd_transfer_to_host_3d(g, cmd); - break; - case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D: - virgl_cmd_transfer_from_host_3d(g, cmd); - break; - case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: - virgl_resource_attach_backing(g, cmd); - break; - case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: - virgl_resource_detach_backing(g, cmd); - break; - case VIRTIO_GPU_CMD_SET_SCANOUT: - virgl_cmd_set_scanout(g, cmd); - break; - case VIRTIO_GPU_CMD_RESOURCE_FLUSH: - virgl_cmd_resource_flush(g, cmd); - break; - case VIRTIO_GPU_CMD_RESOURCE_UNREF: - virgl_cmd_resource_unref(g, cmd); - break; - case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE: - /* TODO add security */ - virgl_cmd_ctx_attach_resource(g, cmd); - break; - case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE: - /* TODO add security */ - virgl_cmd_ctx_detach_resource(g, cmd); - break; - case VIRTIO_GPU_CMD_GET_CAPSET_INFO: - virgl_cmd_get_capset_info(g, cmd); - break; - case VIRTIO_GPU_CMD_GET_CAPSET: - virgl_cmd_get_capset(g, cmd); - break; - case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: - virtio_gpu_get_display_info(g, cmd); - break; - case VIRTIO_GPU_CMD_GET_EDID: - virtio_gpu_get_edid(g, cmd); - break; - default: - cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; - break; - } - - if (cmd->finished) { - return; - } - if (cmd->error) { - fprintf(stderr, "%s: ctrl 0x%x, error 0x%x\n", __func__, - cmd->cmd_hdr.type, cmd->error); - virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error); - return; - } - if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) { - virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); - return; - } - - trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); - virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); -} - -static void virgl_write_fence(void *opaque, uint32_t fence) -{ - VirtIOGPU *g = opaque; - struct virtio_gpu_ctrl_command *cmd, *tmp; - - QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { - /* - * the guest can end up emitting fences out of order - * so we should check all fenced cmds not just the first one. - */ - if (cmd->cmd_hdr.fence_id > fence) { - continue; - } - trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); - virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); - QTAILQ_REMOVE(&g->fenceq, cmd, next); - g_free(cmd); - g->inflight--; - if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { - fprintf(stderr, "inflight: %3d (-)\r", g->inflight); - } - } -} - -static virgl_renderer_gl_context -virgl_create_context(void *opaque, int scanout_idx, - struct virgl_renderer_gl_ctx_param *params) -{ - VirtIOGPU *g = opaque; - QEMUGLContext ctx; - QEMUGLParams qparams; - - qparams.major_ver = params->major_ver; - qparams.minor_ver = params->minor_ver; - - ctx = dpy_gl_ctx_create(g->parent_obj.scanout[scanout_idx].con, &qparams); - return (virgl_renderer_gl_context)ctx; -} - -static void virgl_destroy_context(void *opaque, virgl_renderer_gl_context ctx) -{ - VirtIOGPU *g = opaque; - QEMUGLContext qctx = (QEMUGLContext)ctx; - - dpy_gl_ctx_destroy(g->parent_obj.scanout[0].con, qctx); -} - -static int virgl_make_context_current(void *opaque, int scanout_idx, - virgl_renderer_gl_context ctx) -{ - VirtIOGPU *g = opaque; - QEMUGLContext qctx = (QEMUGLContext)ctx; - - return dpy_gl_ctx_make_current(g->parent_obj.scanout[scanout_idx].con, - qctx); -} - -static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = { - .version = 1, - .write_fence = virgl_write_fence, - .create_gl_context = virgl_create_context, - .destroy_gl_context = virgl_destroy_context, - .make_current = virgl_make_context_current, -}; - -static void virtio_gpu_print_stats(void *opaque) -{ - VirtIOGPU *g = opaque; - - if (g->stats.requests) { - fprintf(stderr, "stats: vq req %4d, %3d -- 3D %4d (%5d)\n", - g->stats.requests, - g->stats.max_inflight, - g->stats.req_3d, - g->stats.bytes_3d); - g->stats.requests = 0; - g->stats.max_inflight = 0; - g->stats.req_3d = 0; - g->stats.bytes_3d = 0; - } else { - fprintf(stderr, "stats: idle\r"); - } - timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000); -} - -static void virtio_gpu_fence_poll(void *opaque) -{ - VirtIOGPU *g = opaque; - - virgl_renderer_poll(); - virtio_gpu_process_cmdq(g); - if (!QTAILQ_EMPTY(&g->cmdq) || !QTAILQ_EMPTY(&g->fenceq)) { - timer_mod(g->fence_poll, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 10); - } -} - -void virtio_gpu_virgl_fence_poll(VirtIOGPU *g) -{ - virtio_gpu_fence_poll(g); -} - -void virtio_gpu_virgl_reset(VirtIOGPU *g) -{ - int i; - - virgl_renderer_reset(); - for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { - dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL); - dpy_gl_scanout_disable(g->parent_obj.scanout[i].con); - } -} - -int virtio_gpu_virgl_init(VirtIOGPU *g) -{ - int ret; - - ret = virgl_renderer_init(g, 0, &virtio_gpu_3d_cbs); - if (ret != 0) { - return ret; - } - - g->fence_poll = timer_new_ms(QEMU_CLOCK_VIRTUAL, - virtio_gpu_fence_poll, g); - - if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { - g->print_stats = timer_new_ms(QEMU_CLOCK_VIRTUAL, - virtio_gpu_print_stats, g); - timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000); - } - return 0; -} - -int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g) -{ - uint32_t capset2_max_ver, capset2_max_size; - virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2, - &capset2_max_ver, - &capset2_max_size); - - return capset2_max_ver ? 2 : 1; -} diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 25f8920fdb6..c8da4806e0b 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -25,7 +25,6 @@ virtio_gpu_base_reset(VirtIOGPUBase *g) int i; g->enable = 0; - g->use_virgl_renderer = false; for (i = 0; i < g->conf.max_outputs; i++) { g->scanout[i].resource_id = 0; @@ -162,7 +161,6 @@ virtio_gpu_base_device_realize(DeviceState *qdev, return false; } - g->use_virgl_renderer = false; if (virtio_gpu_virgl_enabled(g->conf)) { error_setg(&g->migration_blocker, "virgl is not yet migratable"); if (migrate_add_blocker(g->migration_blocker, errp) < 0) { @@ -210,6 +208,9 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features, if (virtio_gpu_edid_enabled(g->conf)) { features |= (1 << VIRTIO_GPU_F_EDID); } + if (virtio_gpu_blob_enabled(g->conf)) { + features |= (1 << VIRTIO_GPU_F_RESOURCE_BLOB); + } return features; } @@ -218,10 +219,8 @@ static void virtio_gpu_base_set_features(VirtIODevice *vdev, uint64_t features) { static const uint32_t virgl = (1 << VIRTIO_GPU_F_VIRGL); - VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev); - g->use_virgl_renderer = ((features & virgl) == virgl); - trace_virtio_gpu_features(g->use_virgl_renderer); + trace_virtio_gpu_features(((features & virgl) == virgl)); } static void @@ -257,6 +256,7 @@ static const TypeInfo virtio_gpu_base_info = { .class_init = virtio_gpu_base_class_init, .abstract = true }; +module_obj(TYPE_VIRTIO_GPU_BASE); static void virtio_register_types(void) diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c new file mode 100644 index 00000000000..6cc4313b1af --- /dev/null +++ b/hw/display/virtio-gpu-gl.c @@ -0,0 +1,171 @@ +/* + * Virtio GPU Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "qemu/module.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "sysemu/sysemu.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-gpu.h" +#include "hw/virtio/virtio-gpu-bswap.h" +#include "hw/virtio/virtio-gpu-pixman.h" +#include "hw/qdev-properties.h" + +#include + +static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g, + struct virtio_gpu_scanout *s, + uint32_t resource_id) +{ + uint32_t width, height; + uint32_t pixels, *data; + + data = virgl_renderer_get_cursor_data(resource_id, &width, &height); + if (!data) { + return; + } + + if (width != s->current_cursor->width || + height != s->current_cursor->height) { + free(data); + return; + } + + pixels = s->current_cursor->width * s->current_cursor->height; + memcpy(s->current_cursor->data, data, pixels * sizeof(uint32_t)); + free(data); +} + +static void virtio_gpu_gl_flushed(VirtIOGPUBase *b) +{ + VirtIOGPU *g = VIRTIO_GPU(b); + + virtio_gpu_process_cmdq(g); +} + +static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOGPU *g = VIRTIO_GPU(vdev); + VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev); + struct virtio_gpu_ctrl_command *cmd; + + if (!virtio_queue_ready(vq)) { + return; + } + + if (!gl->renderer_inited) { + virtio_gpu_virgl_init(g); + gl->renderer_inited = true; + } + if (gl->renderer_reset) { + gl->renderer_reset = false; + virtio_gpu_virgl_reset(g); + } + + cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command)); + while (cmd) { + cmd->vq = vq; + cmd->error = 0; + cmd->finished = false; + QTAILQ_INSERT_TAIL(&g->cmdq, cmd, next); + cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command)); + } + + virtio_gpu_process_cmdq(g); + virtio_gpu_virgl_fence_poll(g); +} + +static void virtio_gpu_gl_reset(VirtIODevice *vdev) +{ + VirtIOGPU *g = VIRTIO_GPU(vdev); + VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev); + + virtio_gpu_reset(vdev); + + /* + * GL functions must be called with the associated GL context in main + * thread, and when the renderer is unblocked. + */ + if (gl->renderer_inited && !gl->renderer_reset) { + virtio_gpu_virgl_reset_scanout(g); + gl->renderer_reset = true; + } +} + +static void virtio_gpu_gl_device_realize(DeviceState *qdev, Error **errp) +{ + VirtIOGPU *g = VIRTIO_GPU(qdev); + +#if defined(HOST_WORDS_BIGENDIAN) + error_setg(errp, "virgl is not supported on bigendian platforms"); + return; +#endif + + if (!object_resolve_path_type("", TYPE_VIRTIO_GPU_GL, NULL)) { + error_setg(errp, "at most one %s device is permitted", TYPE_VIRTIO_GPU_GL); + return; + } + + if (!display_opengl) { + error_setg(errp, "opengl is not available"); + return; + } + + g->parent_obj.conf.flags |= (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED); + VIRTIO_GPU_BASE(g)->virtio_config.num_capsets = + virtio_gpu_virgl_get_num_capsets(g); + + virtio_gpu_device_realize(qdev, errp); +} + +static Property virtio_gpu_gl_properties[] = { + DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags, + VIRTIO_GPU_FLAG_STATS_ENABLED, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_gpu_gl_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VirtIOGPUBaseClass *vbc = VIRTIO_GPU_BASE_CLASS(klass); + VirtIOGPUClass *vgc = VIRTIO_GPU_CLASS(klass); + + vbc->gl_flushed = virtio_gpu_gl_flushed; + vgc->handle_ctrl = virtio_gpu_gl_handle_ctrl; + vgc->process_cmd = virtio_gpu_virgl_process_cmd; + vgc->update_cursor_data = virtio_gpu_gl_update_cursor_data; + + vdc->realize = virtio_gpu_gl_device_realize; + vdc->reset = virtio_gpu_gl_reset; + device_class_set_props(dc, virtio_gpu_gl_properties); +} + +static const TypeInfo virtio_gpu_gl_info = { + .name = TYPE_VIRTIO_GPU_GL, + .parent = TYPE_VIRTIO_GPU, + .instance_size = sizeof(VirtIOGPUGL), + .class_init = virtio_gpu_gl_class_init, +}; +module_obj(TYPE_VIRTIO_GPU_GL); + +static void virtio_register_types(void) +{ + type_register_static(&virtio_gpu_gl_info); +} + +type_init(virtio_register_types) + +module_dep("hw-display-virtio-gpu"); diff --git a/hw/display/virtio-gpu-pci-gl.c b/hw/display/virtio-gpu-pci-gl.c new file mode 100644 index 00000000000..99b14a07185 --- /dev/null +++ b/hw/display/virtio-gpu-pci-gl.c @@ -0,0 +1,58 @@ +/* + * Virtio video device + * + * Copyright Red Hat + * + * Authors: + * Dave Airlie + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-gpu-pci.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_GPU_GL_PCI "virtio-gpu-gl-pci" +typedef struct VirtIOGPUGLPCI VirtIOGPUGLPCI; +DECLARE_INSTANCE_CHECKER(VirtIOGPUGLPCI, VIRTIO_GPU_GL_PCI, + TYPE_VIRTIO_GPU_GL_PCI) + +struct VirtIOGPUGLPCI { + VirtIOGPUPCIBase parent_obj; + VirtIOGPUGL vdev; +}; + +static void virtio_gpu_gl_initfn(Object *obj) +{ + VirtIOGPUGLPCI *dev = VIRTIO_GPU_GL_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU_GL); + VIRTIO_GPU_PCI_BASE(obj)->vgpu = VIRTIO_GPU_BASE(&dev->vdev); +} + +static const VirtioPCIDeviceTypeInfo virtio_gpu_gl_pci_info = { + .generic_name = TYPE_VIRTIO_GPU_GL_PCI, + .parent = TYPE_VIRTIO_GPU_PCI_BASE, + .instance_size = sizeof(VirtIOGPUGLPCI), + .instance_init = virtio_gpu_gl_initfn, +}; +module_obj(TYPE_VIRTIO_GPU_GL_PCI); + +static void virtio_gpu_gl_pci_register_types(void) +{ + virtio_pci_types_register(&virtio_gpu_gl_pci_info); +} + +type_init(virtio_gpu_gl_pci_register_types) + +module_dep("hw-display-virtio-gpu-pci"); diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index d742a30aecf..e36eee0c409 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -64,6 +64,7 @@ static const TypeInfo virtio_gpu_pci_base_info = { .class_init = virtio_gpu_pci_base_class_init, .abstract = true }; +module_obj(TYPE_VIRTIO_GPU_PCI_BASE); #define TYPE_VIRTIO_GPU_PCI "virtio-gpu-pci" typedef struct VirtIOGPUPCI VirtIOGPUPCI; @@ -90,6 +91,7 @@ static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = { .instance_size = sizeof(VirtIOGPUPCI), .instance_init = virtio_gpu_initfn, }; +module_obj(TYPE_VIRTIO_GPU_PCI); static void virtio_gpu_pci_register_types(void) { diff --git a/hw/display/virtio-gpu-udmabuf-stubs.c b/hw/display/virtio-gpu-udmabuf-stubs.c new file mode 100644 index 00000000000..f692e135103 --- /dev/null +++ b/hw/display/virtio-gpu-udmabuf-stubs.c @@ -0,0 +1,28 @@ +#include "qemu/osdep.h" +#include "hw/virtio/virtio-gpu.h" + +bool virtio_gpu_have_udmabuf(void) +{ + /* nothing (stub) */ + return false; +} + +void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res) +{ + /* nothing (stub) */ +} + +void virtio_gpu_fini_udmabuf(struct virtio_gpu_simple_resource *res) +{ + /* nothing (stub) */ +} + +int virtio_gpu_update_dmabuf(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_simple_resource *res, + struct virtio_gpu_framebuffer *fb, + struct virtio_gpu_rect *r) +{ + /* nothing (stub) */ + return 0; +} diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c new file mode 100644 index 00000000000..1597921c51f --- /dev/null +++ b/hw/display/virtio-gpu-udmabuf.c @@ -0,0 +1,230 @@ +/* + * Virtio GPU Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu-common.h" +#include "qemu/iov.h" +#include "ui/console.h" +#include "hw/virtio/virtio-gpu.h" +#include "hw/virtio/virtio-gpu-pixman.h" +#include "trace.h" +#include "exec/ramblock.h" +#include "sysemu/hostmem.h" +#include +#include +#include +#include "qemu/memfd.h" +#include "standard-headers/linux/udmabuf.h" + +static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res) +{ + struct udmabuf_create_list *list; + RAMBlock *rb; + ram_addr_t offset; + int udmabuf, i; + + udmabuf = udmabuf_fd(); + if (udmabuf < 0) { + return; + } + + list = g_malloc0(sizeof(struct udmabuf_create_list) + + sizeof(struct udmabuf_create_item) * res->iov_cnt); + + for (i = 0; i < res->iov_cnt; i++) { + rcu_read_lock(); + rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset); + rcu_read_unlock(); + + if (!rb || rb->fd < 0) { + g_free(list); + return; + } + + list->list[i].memfd = rb->fd; + list->list[i].offset = offset; + list->list[i].size = res->iov[i].iov_len; + } + + list->count = res->iov_cnt; + list->flags = UDMABUF_FLAGS_CLOEXEC; + + res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list); + if (res->dmabuf_fd < 0) { + warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__, + strerror(errno)); + } + g_free(list); +} + +static void virtio_gpu_remap_udmabuf(struct virtio_gpu_simple_resource *res) +{ + res->remapped = mmap(NULL, res->blob_size, PROT_READ, + MAP_SHARED, res->dmabuf_fd, 0); + if (res->remapped == MAP_FAILED) { + warn_report("%s: dmabuf mmap failed: %s", __func__, + strerror(errno)); + res->remapped = NULL; + } +} + +static void virtio_gpu_destroy_udmabuf(struct virtio_gpu_simple_resource *res) +{ + if (res->remapped) { + munmap(res->remapped, res->blob_size); + res->remapped = NULL; + } + if (res->dmabuf_fd >= 0) { + close(res->dmabuf_fd); + res->dmabuf_fd = -1; + } +} + +static int find_memory_backend_type(Object *obj, void *opaque) +{ + bool *memfd_backend = opaque; + int ret; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + RAMBlock *rb = backend->mr.ram_block; + + if (rb && rb->fd > 0) { + ret = fcntl(rb->fd, F_GET_SEALS); + if (ret > 0) { + *memfd_backend = true; + } + } + } + + return 0; +} + +bool virtio_gpu_have_udmabuf(void) +{ + Object *memdev_root; + int udmabuf; + bool memfd_backend = false; + + udmabuf = udmabuf_fd(); + if (udmabuf < 0) { + return false; + } + + memdev_root = object_resolve_path("/objects", NULL); + object_child_foreach(memdev_root, find_memory_backend_type, &memfd_backend); + + return memfd_backend; +} + +void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res) +{ + void *pdata = NULL; + + res->dmabuf_fd = -1; + if (res->iov_cnt == 1) { + pdata = res->iov[0].iov_base; + } else { + virtio_gpu_create_udmabuf(res); + if (res->dmabuf_fd < 0) { + return; + } + virtio_gpu_remap_udmabuf(res); + if (!res->remapped) { + return; + } + pdata = res->remapped; + } + + res->blob = pdata; +} + +void virtio_gpu_fini_udmabuf(struct virtio_gpu_simple_resource *res) +{ + if (res->remapped) { + virtio_gpu_destroy_udmabuf(res); + } +} + +static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf) +{ + struct virtio_gpu_scanout *scanout; + + scanout = &g->parent_obj.scanout[dmabuf->scanout_id]; + dpy_gl_release_dmabuf(scanout->con, &dmabuf->buf); + QTAILQ_REMOVE(&g->dmabuf.bufs, dmabuf, next); + g_free(dmabuf); +} + +static VGPUDMABuf +*virtio_gpu_create_dmabuf(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_simple_resource *res, + struct virtio_gpu_framebuffer *fb, + struct virtio_gpu_rect *r) +{ + VGPUDMABuf *dmabuf; + + if (res->dmabuf_fd < 0) { + return NULL; + } + + dmabuf = g_new0(VGPUDMABuf, 1); + dmabuf->buf.width = fb->width; + dmabuf->buf.height = fb->height; + dmabuf->buf.stride = fb->stride; + dmabuf->buf.x = r->x; + dmabuf->buf.y = r->y; + dmabuf->buf.scanout_width = r->width; + dmabuf->buf.scanout_height = r->height; + dmabuf->buf.fourcc = qemu_pixman_to_drm_format(fb->format); + dmabuf->buf.fd = res->dmabuf_fd; + dmabuf->buf.allow_fences = true; + dmabuf->buf.draw_submitted = false; + dmabuf->scanout_id = scanout_id; + QTAILQ_INSERT_HEAD(&g->dmabuf.bufs, dmabuf, next); + + return dmabuf; +} + +int virtio_gpu_update_dmabuf(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_simple_resource *res, + struct virtio_gpu_framebuffer *fb, + struct virtio_gpu_rect *r) +{ + struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id]; + VGPUDMABuf *new_primary, *old_primary = NULL; + + new_primary = virtio_gpu_create_dmabuf(g, scanout_id, res, fb, r); + if (!new_primary) { + return -EINVAL; + } + + if (g->dmabuf.primary[scanout_id]) { + old_primary = g->dmabuf.primary[scanout_id]; + } + + g->dmabuf.primary[scanout_id] = new_primary; + qemu_console_resize(scanout->con, + new_primary->buf.scanout_width, + new_primary->buf.scanout_height); + dpy_gl_scanout_dmabuf(scanout->con, &new_primary->buf); + + if (old_primary) { + virtio_gpu_free_dmabuf(g, old_primary); + } + + return 0; +} diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c new file mode 100644 index 00000000000..18d054922fe --- /dev/null +++ b/hw/display/virtio-gpu-virgl.c @@ -0,0 +1,634 @@ +/* + * Virtio GPU Device + * + * Copyright Red Hat, Inc. 2013-2014 + * + * Authors: + * Dave Airlie + * Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "trace.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-gpu.h" + +#include + +static struct virgl_renderer_callbacks virtio_gpu_3d_cbs; + +static void virgl_cmd_create_resource_2d(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_create_2d c2d; + struct virgl_renderer_resource_create_args args; + + VIRTIO_GPU_FILL_CMD(c2d); + trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format, + c2d.width, c2d.height); + + args.handle = c2d.resource_id; + args.target = 2; + args.format = c2d.format; + args.bind = (1 << 1); + args.width = c2d.width; + args.height = c2d.height; + args.depth = 1; + args.array_size = 1; + args.last_level = 0; + args.nr_samples = 0; + args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP; + virgl_renderer_resource_create(&args, NULL, 0); +} + +static void virgl_cmd_create_resource_3d(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_create_3d c3d; + struct virgl_renderer_resource_create_args args; + + VIRTIO_GPU_FILL_CMD(c3d); + trace_virtio_gpu_cmd_res_create_3d(c3d.resource_id, c3d.format, + c3d.width, c3d.height, c3d.depth); + + args.handle = c3d.resource_id; + args.target = c3d.target; + args.format = c3d.format; + args.bind = c3d.bind; + args.width = c3d.width; + args.height = c3d.height; + args.depth = c3d.depth; + args.array_size = c3d.array_size; + args.last_level = c3d.last_level; + args.nr_samples = c3d.nr_samples; + args.flags = c3d.flags; + virgl_renderer_resource_create(&args, NULL, 0); +} + +static void virgl_cmd_resource_unref(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_unref unref; + struct iovec *res_iovs = NULL; + int num_iovs = 0; + + VIRTIO_GPU_FILL_CMD(unref); + trace_virtio_gpu_cmd_res_unref(unref.resource_id); + + virgl_renderer_resource_detach_iov(unref.resource_id, + &res_iovs, + &num_iovs); + if (res_iovs != NULL && num_iovs != 0) { + virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs); + } + virgl_renderer_resource_unref(unref.resource_id); +} + +static void virgl_cmd_context_create(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_create cc; + + VIRTIO_GPU_FILL_CMD(cc); + trace_virtio_gpu_cmd_ctx_create(cc.hdr.ctx_id, + cc.debug_name); + + virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen, + cc.debug_name); +} + +static void virgl_cmd_context_destroy(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_destroy cd; + + VIRTIO_GPU_FILL_CMD(cd); + trace_virtio_gpu_cmd_ctx_destroy(cd.hdr.ctx_id); + + virgl_renderer_context_destroy(cd.hdr.ctx_id); +} + +static void virtio_gpu_rect_update(VirtIOGPU *g, int idx, int x, int y, + int width, int height) +{ + if (!g->parent_obj.scanout[idx].con) { + return; + } + + dpy_gl_update(g->parent_obj.scanout[idx].con, x, y, width, height); +} + +static void virgl_cmd_resource_flush(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_flush rf; + int i; + + VIRTIO_GPU_FILL_CMD(rf); + trace_virtio_gpu_cmd_res_flush(rf.resource_id, + rf.r.width, rf.r.height, rf.r.x, rf.r.y); + + for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { + if (g->parent_obj.scanout[i].resource_id != rf.resource_id) { + continue; + } + virtio_gpu_rect_update(g, i, rf.r.x, rf.r.y, rf.r.width, rf.r.height); + } +} + +static void virgl_cmd_set_scanout(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_set_scanout ss; + struct virgl_renderer_resource_info info; + int ret; + + VIRTIO_GPU_FILL_CMD(ss); + trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id, + ss.r.width, ss.r.height, ss.r.x, ss.r.y); + + if (ss.scanout_id >= g->parent_obj.conf.max_outputs) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d", + __func__, ss.scanout_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; + return; + } + g->parent_obj.enable = 1; + + memset(&info, 0, sizeof(info)); + + if (ss.resource_id && ss.r.width && ss.r.height) { + ret = virgl_renderer_resource_get_info(ss.resource_id, &info); + if (ret == -1) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: illegal resource specified %d\n", + __func__, ss.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + qemu_console_resize(g->parent_obj.scanout[ss.scanout_id].con, + ss.r.width, ss.r.height); + virgl_renderer_force_ctx_0(); + dpy_gl_scanout_texture( + g->parent_obj.scanout[ss.scanout_id].con, info.tex_id, + info.flags & 1 /* FIXME: Y_0_TOP */, + info.width, info.height, + ss.r.x, ss.r.y, ss.r.width, ss.r.height); + } else { + dpy_gfx_replace_surface( + g->parent_obj.scanout[ss.scanout_id].con, NULL); + dpy_gl_scanout_disable(g->parent_obj.scanout[ss.scanout_id].con); + } + g->parent_obj.scanout[ss.scanout_id].resource_id = ss.resource_id; +} + +static void virgl_cmd_submit_3d(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_cmd_submit cs; + void *buf; + size_t s; + + VIRTIO_GPU_FILL_CMD(cs); + trace_virtio_gpu_cmd_ctx_submit(cs.hdr.ctx_id, cs.size); + + buf = g_malloc(cs.size); + s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, + sizeof(cs), buf, cs.size); + if (s != cs.size) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: size mismatch (%zd/%d)", + __func__, s, cs.size); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + goto out; + } + + if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { + g->stats.req_3d++; + g->stats.bytes_3d += cs.size; + } + + virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4); + +out: + g_free(buf); +} + +static void virgl_cmd_transfer_to_host_2d(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_transfer_to_host_2d t2d; + struct virtio_gpu_box box; + + VIRTIO_GPU_FILL_CMD(t2d); + trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id); + + box.x = t2d.r.x; + box.y = t2d.r.y; + box.z = 0; + box.w = t2d.r.width; + box.h = t2d.r.height; + box.d = 1; + + virgl_renderer_transfer_write_iov(t2d.resource_id, + 0, + 0, + 0, + 0, + (struct virgl_box *)&box, + t2d.offset, NULL, 0); +} + +static void virgl_cmd_transfer_to_host_3d(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_transfer_host_3d t3d; + + VIRTIO_GPU_FILL_CMD(t3d); + trace_virtio_gpu_cmd_res_xfer_toh_3d(t3d.resource_id); + + virgl_renderer_transfer_write_iov(t3d.resource_id, + t3d.hdr.ctx_id, + t3d.level, + t3d.stride, + t3d.layer_stride, + (struct virgl_box *)&t3d.box, + t3d.offset, NULL, 0); +} + +static void +virgl_cmd_transfer_from_host_3d(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_transfer_host_3d tf3d; + + VIRTIO_GPU_FILL_CMD(tf3d); + trace_virtio_gpu_cmd_res_xfer_fromh_3d(tf3d.resource_id); + + virgl_renderer_transfer_read_iov(tf3d.resource_id, + tf3d.hdr.ctx_id, + tf3d.level, + tf3d.stride, + tf3d.layer_stride, + (struct virgl_box *)&tf3d.box, + tf3d.offset, NULL, 0); +} + + +static void virgl_resource_attach_backing(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_attach_backing att_rb; + struct iovec *res_iovs; + uint32_t res_niov; + int ret; + + VIRTIO_GPU_FILL_CMD(att_rb); + trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id); + + ret = virtio_gpu_create_mapping_iov(g, att_rb.nr_entries, sizeof(att_rb), + cmd, NULL, &res_iovs, &res_niov); + if (ret != 0) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } + + ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, + res_iovs, res_niov); + + if (ret != 0) + virtio_gpu_cleanup_mapping_iov(g, res_iovs, res_niov); +} + +static void virgl_resource_detach_backing(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_detach_backing detach_rb; + struct iovec *res_iovs = NULL; + int num_iovs = 0; + + VIRTIO_GPU_FILL_CMD(detach_rb); + trace_virtio_gpu_cmd_res_back_detach(detach_rb.resource_id); + + virgl_renderer_resource_detach_iov(detach_rb.resource_id, + &res_iovs, + &num_iovs); + if (res_iovs == NULL || num_iovs == 0) { + return; + } + virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs); +} + + +static void virgl_cmd_ctx_attach_resource(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_resource att_res; + + VIRTIO_GPU_FILL_CMD(att_res); + trace_virtio_gpu_cmd_ctx_res_attach(att_res.hdr.ctx_id, + att_res.resource_id); + + virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id); +} + +static void virgl_cmd_ctx_detach_resource(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_resource det_res; + + VIRTIO_GPU_FILL_CMD(det_res); + trace_virtio_gpu_cmd_ctx_res_detach(det_res.hdr.ctx_id, + det_res.resource_id); + + virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id); +} + +static void virgl_cmd_get_capset_info(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_get_capset_info info; + struct virtio_gpu_resp_capset_info resp; + + VIRTIO_GPU_FILL_CMD(info); + + memset(&resp, 0, sizeof(resp)); + if (info.capset_index == 0) { + resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL; + virgl_renderer_get_cap_set(resp.capset_id, + &resp.capset_max_version, + &resp.capset_max_size); + } else if (info.capset_index == 1) { + resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2; + virgl_renderer_get_cap_set(resp.capset_id, + &resp.capset_max_version, + &resp.capset_max_size); + } else { + resp.capset_max_version = 0; + resp.capset_max_size = 0; + } + resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO; + virtio_gpu_ctrl_response(g, cmd, &resp.hdr, sizeof(resp)); +} + +static void virgl_cmd_get_capset(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_get_capset gc; + struct virtio_gpu_resp_capset *resp; + uint32_t max_ver, max_size; + VIRTIO_GPU_FILL_CMD(gc); + + virgl_renderer_get_cap_set(gc.capset_id, &max_ver, + &max_size); + if (!max_size) { + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + + resp = g_malloc0(sizeof(*resp) + max_size); + resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET; + virgl_renderer_fill_caps(gc.capset_id, + gc.capset_version, + (void *)resp->capset_data); + virtio_gpu_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size); + g_free(resp); +} + +void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr); + + virgl_renderer_force_ctx_0(); + switch (cmd->cmd_hdr.type) { + case VIRTIO_GPU_CMD_CTX_CREATE: + virgl_cmd_context_create(g, cmd); + break; + case VIRTIO_GPU_CMD_CTX_DESTROY: + virgl_cmd_context_destroy(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: + virgl_cmd_create_resource_2d(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D: + virgl_cmd_create_resource_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_SUBMIT_3D: + virgl_cmd_submit_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: + virgl_cmd_transfer_to_host_2d(g, cmd); + break; + case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D: + virgl_cmd_transfer_to_host_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D: + virgl_cmd_transfer_from_host_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: + virgl_resource_attach_backing(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: + virgl_resource_detach_backing(g, cmd); + break; + case VIRTIO_GPU_CMD_SET_SCANOUT: + virgl_cmd_set_scanout(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_FLUSH: + virgl_cmd_resource_flush(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_UNREF: + virgl_cmd_resource_unref(g, cmd); + break; + case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE: + /* TODO add security */ + virgl_cmd_ctx_attach_resource(g, cmd); + break; + case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE: + /* TODO add security */ + virgl_cmd_ctx_detach_resource(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_CAPSET_INFO: + virgl_cmd_get_capset_info(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_CAPSET: + virgl_cmd_get_capset(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: + virtio_gpu_get_display_info(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_EDID: + virtio_gpu_get_edid(g, cmd); + break; + default: + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + break; + } + + if (cmd->finished) { + return; + } + if (cmd->error) { + fprintf(stderr, "%s: ctrl 0x%x, error 0x%x\n", __func__, + cmd->cmd_hdr.type, cmd->error); + virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error); + return; + } + if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) { + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + return; + } + + trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); + virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); +} + +static void virgl_write_fence(void *opaque, uint32_t fence) +{ + VirtIOGPU *g = opaque; + struct virtio_gpu_ctrl_command *cmd, *tmp; + + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { + /* + * the guest can end up emitting fences out of order + * so we should check all fenced cmds not just the first one. + */ + if (cmd->cmd_hdr.fence_id > fence) { + continue; + } + trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + QTAILQ_REMOVE(&g->fenceq, cmd, next); + g_free(cmd); + g->inflight--; + if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { + fprintf(stderr, "inflight: %3d (-)\r", g->inflight); + } + } +} + +static virgl_renderer_gl_context +virgl_create_context(void *opaque, int scanout_idx, + struct virgl_renderer_gl_ctx_param *params) +{ + VirtIOGPU *g = opaque; + QEMUGLContext ctx; + QEMUGLParams qparams; + + qparams.major_ver = params->major_ver; + qparams.minor_ver = params->minor_ver; + + ctx = dpy_gl_ctx_create(g->parent_obj.scanout[scanout_idx].con, &qparams); + return (virgl_renderer_gl_context)ctx; +} + +static void virgl_destroy_context(void *opaque, virgl_renderer_gl_context ctx) +{ + VirtIOGPU *g = opaque; + QEMUGLContext qctx = (QEMUGLContext)ctx; + + dpy_gl_ctx_destroy(g->parent_obj.scanout[0].con, qctx); +} + +static int virgl_make_context_current(void *opaque, int scanout_idx, + virgl_renderer_gl_context ctx) +{ + VirtIOGPU *g = opaque; + QEMUGLContext qctx = (QEMUGLContext)ctx; + + return dpy_gl_ctx_make_current(g->parent_obj.scanout[scanout_idx].con, + qctx); +} + +static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = { + .version = 1, + .write_fence = virgl_write_fence, + .create_gl_context = virgl_create_context, + .destroy_gl_context = virgl_destroy_context, + .make_current = virgl_make_context_current, +}; + +static void virtio_gpu_print_stats(void *opaque) +{ + VirtIOGPU *g = opaque; + + if (g->stats.requests) { + fprintf(stderr, "stats: vq req %4d, %3d -- 3D %4d (%5d)\n", + g->stats.requests, + g->stats.max_inflight, + g->stats.req_3d, + g->stats.bytes_3d); + g->stats.requests = 0; + g->stats.max_inflight = 0; + g->stats.req_3d = 0; + g->stats.bytes_3d = 0; + } else { + fprintf(stderr, "stats: idle\r"); + } + timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000); +} + +static void virtio_gpu_fence_poll(void *opaque) +{ + VirtIOGPU *g = opaque; + + virgl_renderer_poll(); + virtio_gpu_process_cmdq(g); + if (!QTAILQ_EMPTY(&g->cmdq) || !QTAILQ_EMPTY(&g->fenceq)) { + timer_mod(g->fence_poll, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 10); + } +} + +void virtio_gpu_virgl_fence_poll(VirtIOGPU *g) +{ + virtio_gpu_fence_poll(g); +} + +void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g) +{ + int i; + + for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { + dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL); + dpy_gl_scanout_disable(g->parent_obj.scanout[i].con); + } +} + +void virtio_gpu_virgl_reset(VirtIOGPU *g) +{ + virgl_renderer_reset(); +} + +int virtio_gpu_virgl_init(VirtIOGPU *g) +{ + int ret; + + ret = virgl_renderer_init(g, 0, &virtio_gpu_3d_cbs); + if (ret != 0) { + return ret; + } + + g->fence_poll = timer_new_ms(QEMU_CLOCK_VIRTUAL, + virtio_gpu_fence_poll, g); + + if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { + g->print_stats = timer_new_ms(QEMU_CLOCK_VIRTUAL, + virtio_gpu_print_stats, g); + timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000); + } + return 0; +} + +int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g) +{ + uint32_t capset2_max_ver, capset2_max_size; + virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2, + &capset2_max_ver, + &capset2_max_size); + + return capset2_max_ver ? 2 : 1; +} diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index c9f5e36fd07..d78b9700c7d 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -35,80 +35,51 @@ static struct virtio_gpu_simple_resource* virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id); +static struct virtio_gpu_simple_resource * +virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id, + bool require_backing, + const char *caller, uint32_t *error); static void virtio_gpu_cleanup_mapping(VirtIOGPU *g, struct virtio_gpu_simple_resource *res); -#ifdef CONFIG_VIRGL -#include -#define VIRGL(_g, _virgl, _simple, ...) \ - do { \ - if (_g->parent_obj.use_virgl_renderer) { \ - _virgl(__VA_ARGS__); \ - } else { \ - _simple(__VA_ARGS__); \ - } \ - } while (0) -#else -#define VIRGL(_g, _virgl, _simple, ...) \ - do { \ - _simple(__VA_ARGS__); \ - } while (0) -#endif - -static void update_cursor_data_simple(VirtIOGPU *g, - struct virtio_gpu_scanout *s, - uint32_t resource_id) +void virtio_gpu_update_cursor_data(VirtIOGPU *g, + struct virtio_gpu_scanout *s, + uint32_t resource_id) { struct virtio_gpu_simple_resource *res; uint32_t pixels; + void *data; - res = virtio_gpu_find_resource(g, resource_id); + res = virtio_gpu_find_check_resource(g, resource_id, false, + __func__, NULL); if (!res) { return; } - if (pixman_image_get_width(res->image) != s->current_cursor->width || - pixman_image_get_height(res->image) != s->current_cursor->height) { - return; + if (res->blob_size) { + if (res->blob_size < (s->current_cursor->width * + s->current_cursor->height * 4)) { + return; + } + data = res->blob; + } else { + if (pixman_image_get_width(res->image) != s->current_cursor->width || + pixman_image_get_height(res->image) != s->current_cursor->height) { + return; + } + data = pixman_image_get_data(res->image); } pixels = s->current_cursor->width * s->current_cursor->height; - memcpy(s->current_cursor->data, - pixman_image_get_data(res->image), + memcpy(s->current_cursor->data, data, pixels * sizeof(uint32_t)); } -#ifdef CONFIG_VIRGL - -static void update_cursor_data_virgl(VirtIOGPU *g, - struct virtio_gpu_scanout *s, - uint32_t resource_id) -{ - uint32_t width, height; - uint32_t pixels, *data; - - data = virgl_renderer_get_cursor_data(resource_id, &width, &height); - if (!data) { - return; - } - - if (width != s->current_cursor->width || - height != s->current_cursor->height) { - free(data); - return; - } - - pixels = s->current_cursor->width * s->current_cursor->height; - memcpy(s->current_cursor->data, data, pixels * sizeof(uint32_t)); - free(data); -} - -#endif - static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor) { struct virtio_gpu_scanout *s; + VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g); bool move = cursor->hdr.type == VIRTIO_GPU_CMD_MOVE_CURSOR; if (cursor->pos.scanout_id >= g->parent_obj.conf.max_outputs) { @@ -131,8 +102,7 @@ static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor) s->current_cursor->hot_y = cursor->hot_y; if (cursor->resource_id > 0) { - VIRGL(g, update_cursor_data_virgl, update_cursor_data_simple, - g, s, cursor->resource_id); + vgc->update_cursor_data(g, s, cursor->resource_id); } dpy_cursor_define(s->con, s->current_cursor); @@ -158,6 +128,37 @@ virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id) return NULL; } +static struct virtio_gpu_simple_resource * +virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id, + bool require_backing, + const char *caller, uint32_t *error) +{ + struct virtio_gpu_simple_resource *res; + + res = virtio_gpu_find_resource(g, resource_id); + if (!res) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid resource specified %d\n", + caller, resource_id); + if (error) { + *error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + } + return NULL; + } + + if (require_backing) { + if (!res->iov || (!res->image && !res->blob)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: no backing storage %d\n", + caller, resource_id); + if (error) { + *error = VIRTIO_GPU_RESP_ERR_UNSPEC; + } + return NULL; + } + } + + return res; +} + void virtio_gpu_ctrl_response(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd, struct virtio_gpu_ctrl_hdr *resp, @@ -321,6 +322,56 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g, g->hostmem += res->hostmem; } +static void virtio_gpu_resource_create_blob(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_resource_create_blob cblob; + int ret; + + VIRTIO_GPU_FILL_CMD(cblob); + virtio_gpu_create_blob_bswap(&cblob); + trace_virtio_gpu_cmd_res_create_blob(cblob.resource_id, cblob.size); + + if (cblob.resource_id == 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: resource id 0 is not allowed\n", + __func__); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + if (cblob.blob_mem != VIRTIO_GPU_BLOB_MEM_GUEST && + cblob.blob_flags != VIRTIO_GPU_BLOB_FLAG_USE_SHAREABLE) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid memory type\n", + __func__); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + + if (virtio_gpu_find_resource(g, cblob.resource_id)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: resource already exists %d\n", + __func__, cblob.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + res = g_new0(struct virtio_gpu_simple_resource, 1); + res->resource_id = cblob.resource_id; + res->blob_size = cblob.size; + + ret = virtio_gpu_create_mapping_iov(g, cblob.nr_entries, sizeof(cblob), + cmd, &res->addrs, &res->iov, + &res->iov_cnt); + if (ret != 0) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + g_free(res); + return; + } + + virtio_gpu_init_udmabuf(res); + QTAILQ_INSERT_HEAD(&g->reslist, res, next); +} + static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id) { struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id]; @@ -355,7 +406,7 @@ static void virtio_gpu_resource_destroy(VirtIOGPU *g, } } - pixman_image_unref(res->image); + qemu_pixman_image_unref(res->image); virtio_gpu_cleanup_mapping(g, res); QTAILQ_REMOVE(&g->reslist, res, next); g->hostmem -= res->hostmem; @@ -396,11 +447,9 @@ static void virtio_gpu_transfer_to_host_2d(VirtIOGPU *g, virtio_gpu_t2d_bswap(&t2d); trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id); - res = virtio_gpu_find_resource(g, t2d.resource_id); - if (!res || !res->iov) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n", - __func__, t2d.resource_id); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + res = virtio_gpu_find_check_resource(g, t2d.resource_id, true, + __func__, &cmd->error); + if (!res || res->blob) { return; } @@ -446,6 +495,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g, { struct virtio_gpu_simple_resource *res; struct virtio_gpu_resource_flush rf; + struct virtio_gpu_scanout *scanout; pixman_region16_t flush_region; int i; @@ -454,20 +504,31 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g, trace_virtio_gpu_cmd_res_flush(rf.resource_id, rf.r.width, rf.r.height, rf.r.x, rf.r.y); - res = virtio_gpu_find_resource(g, rf.resource_id); + res = virtio_gpu_find_check_resource(g, rf.resource_id, false, + __func__, &cmd->error); if (!res) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n", - __func__, rf.resource_id); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; return; } - if (rf.r.x > res->width || + if (res->blob) { + for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { + scanout = &g->parent_obj.scanout[i]; + if (scanout->resource_id == res->resource_id && + console_has_gl(scanout->con)) { + dpy_gl_update(scanout->con, 0, 0, scanout->width, + scanout->height); + } + } + return; + } + + if (!res->blob && + (rf.r.x > res->width || rf.r.y > res->height || rf.r.width > res->width || rf.r.height > res->height || rf.r.x + rf.r.width > res->width || - rf.r.y + rf.r.height > res->height) { + rf.r.y + rf.r.height > res->height)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: flush bounds outside resource" " bounds for resource %d: %d %d %d %d vs %d %d\n", __func__, rf.resource_id, rf.r.x, rf.r.y, @@ -479,7 +540,6 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g, pixman_region_init_rect(&flush_region, rf.r.x, rf.r.y, rf.r.width, rf.r.height); for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { - struct virtio_gpu_scanout *scanout; pixman_region16_t region, finalregion; pixman_box16_t *extents; @@ -512,14 +572,109 @@ static void virtio_unref_resource(pixman_image_t *image, void *data) pixman_image_unref(data); } +static void virtio_gpu_update_scanout(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_simple_resource *res, + struct virtio_gpu_rect *r) +{ + struct virtio_gpu_simple_resource *ores; + struct virtio_gpu_scanout *scanout; + + scanout = &g->parent_obj.scanout[scanout_id]; + ores = virtio_gpu_find_resource(g, scanout->resource_id); + if (ores) { + ores->scanout_bitmask &= ~(1 << scanout_id); + } + + res->scanout_bitmask |= (1 << scanout_id); + scanout->resource_id = res->resource_id; + scanout->x = r->x; + scanout->y = r->y; + scanout->width = r->width; + scanout->height = r->height; +} + +static void virtio_gpu_do_set_scanout(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_framebuffer *fb, + struct virtio_gpu_simple_resource *res, + struct virtio_gpu_rect *r, + uint32_t *error) +{ + struct virtio_gpu_scanout *scanout; + uint8_t *data; + + scanout = &g->parent_obj.scanout[scanout_id]; + + if (r->x > fb->width || + r->y > fb->height || + r->width < 16 || + r->height < 16 || + r->width > fb->width || + r->height > fb->height || + r->x + r->width > fb->width || + r->y + r->height > fb->height) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout %d bounds for" + " resource %d, rect (%d,%d)+%d,%d, fb %d %d\n", + __func__, scanout_id, res->resource_id, + r->x, r->y, r->width, r->height, + fb->width, fb->height); + *error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + + g->parent_obj.enable = 1; + + if (res->blob) { + if (console_has_gl(scanout->con)) { + if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb, r)) { + virtio_gpu_update_scanout(g, scanout_id, res, r); + return; + } + } + + data = res->blob; + } else { + data = (uint8_t *)pixman_image_get_data(res->image); + } + + /* create a surface for this scanout */ + if ((res->blob && !console_has_gl(scanout->con)) || + !scanout->ds || + surface_data(scanout->ds) != data + fb->offset || + scanout->width != r->width || + scanout->height != r->height) { + pixman_image_t *rect; + void *ptr = data + fb->offset; + rect = pixman_image_create_bits(fb->format, r->width, r->height, + ptr, fb->stride); + + if (res->image) { + pixman_image_ref(res->image); + pixman_image_set_destroy_function(rect, virtio_unref_resource, + res->image); + } + + /* realloc the surface ptr */ + scanout->ds = qemu_create_displaysurface_pixman(rect); + if (!scanout->ds) { + *error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } + + pixman_image_unref(rect); + dpy_gfx_replace_surface(g->parent_obj.scanout[scanout_id].con, + scanout->ds); + } + + virtio_gpu_update_scanout(g, scanout_id, res, r); +} + static void virtio_gpu_set_scanout(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { - struct virtio_gpu_simple_resource *res, *ores; - struct virtio_gpu_scanout *scanout; - pixman_format_code_t format; - uint32_t offset; - int bpp; + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_framebuffer fb = { 0 }; struct virtio_gpu_set_scanout ss; VIRTIO_GPU_FILL_CMD(ss); @@ -534,97 +689,111 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g, return; } - g->parent_obj.enable = 1; if (ss.resource_id == 0) { virtio_gpu_disable_scanout(g, ss.scanout_id); return; } - /* create a surface for this scanout */ - res = virtio_gpu_find_resource(g, ss.resource_id); + res = virtio_gpu_find_check_resource(g, ss.resource_id, true, + __func__, &cmd->error); if (!res) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n", - __func__, ss.resource_id); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; return; } - if (ss.r.x > res->width || - ss.r.y > res->height || - ss.r.width < 16 || - ss.r.height < 16 || - ss.r.width > res->width || - ss.r.height > res->height || - ss.r.x + ss.r.width > res->width || - ss.r.y + ss.r.height > res->height) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout %d bounds for" - " resource %d, (%d,%d)+%d,%d vs %d %d\n", - __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, - ss.r.width, ss.r.height, res->width, res->height); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + fb.format = pixman_image_get_format(res->image); + fb.bytes_pp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(fb.format), 8); + fb.width = pixman_image_get_width(res->image); + fb.height = pixman_image_get_height(res->image); + fb.stride = pixman_image_get_stride(res->image); + fb.offset = ss.r.x * fb.bytes_pp + ss.r.y * fb.stride; + + virtio_gpu_do_set_scanout(g, ss.scanout_id, + &fb, res, &ss.r, &cmd->error); +} + +static void virtio_gpu_set_scanout_blob(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_framebuffer fb = { 0 }; + struct virtio_gpu_set_scanout_blob ss; + uint64_t fbend; + + VIRTIO_GPU_FILL_CMD(ss); + virtio_gpu_scanout_blob_bswap(&ss); + trace_virtio_gpu_cmd_set_scanout_blob(ss.scanout_id, ss.resource_id, + ss.r.width, ss.r.height, ss.r.x, + ss.r.y); + + if (ss.scanout_id >= g->parent_obj.conf.max_outputs) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d", + __func__, ss.scanout_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; return; } - scanout = &g->parent_obj.scanout[ss.scanout_id]; + if (ss.resource_id == 0) { + virtio_gpu_disable_scanout(g, ss.scanout_id); + return; + } - format = pixman_image_get_format(res->image); - bpp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(format), 8); - offset = (ss.r.x * bpp) + ss.r.y * pixman_image_get_stride(res->image); - if (!scanout->ds || surface_data(scanout->ds) - != ((uint8_t *)pixman_image_get_data(res->image) + offset) || - scanout->width != ss.r.width || - scanout->height != ss.r.height) { - pixman_image_t *rect; - void *ptr = (uint8_t *)pixman_image_get_data(res->image) + offset; - rect = pixman_image_create_bits(format, ss.r.width, ss.r.height, ptr, - pixman_image_get_stride(res->image)); - pixman_image_ref(res->image); - pixman_image_set_destroy_function(rect, virtio_unref_resource, - res->image); - /* realloc the surface ptr */ - scanout->ds = qemu_create_displaysurface_pixman(rect); - if (!scanout->ds) { - cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; - return; - } - pixman_image_unref(rect); - dpy_gfx_replace_surface(g->parent_obj.scanout[ss.scanout_id].con, - scanout->ds); + res = virtio_gpu_find_check_resource(g, ss.resource_id, true, + __func__, &cmd->error); + if (!res) { + return; } - ores = virtio_gpu_find_resource(g, scanout->resource_id); - if (ores) { - ores->scanout_bitmask &= ~(1 << ss.scanout_id); + fb.format = virtio_gpu_get_pixman_format(ss.format); + if (!fb.format) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: host couldn't handle guest format %d\n", + __func__, ss.format); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; } - res->scanout_bitmask |= (1 << ss.scanout_id); - scanout->resource_id = ss.resource_id; - scanout->x = ss.r.x; - scanout->y = ss.r.y; - scanout->width = ss.r.width; - scanout->height = ss.r.height; + fb.bytes_pp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(fb.format), 8); + fb.width = ss.width; + fb.height = ss.height; + fb.stride = ss.strides[0]; + fb.offset = ss.offsets[0] + ss.r.x * fb.bytes_pp + ss.r.y * fb.stride; + + fbend = fb.offset; + fbend += fb.stride * (ss.r.height - 1); + fbend += fb.bytes_pp * ss.r.width; + if (fbend > res->blob_size) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: fb end out of range\n", + __func__); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + + virtio_gpu_do_set_scanout(g, ss.scanout_id, + &fb, res, &ss.r, &cmd->error); } int virtio_gpu_create_mapping_iov(VirtIOGPU *g, - struct virtio_gpu_resource_attach_backing *ab, + uint32_t nr_entries, uint32_t offset, struct virtio_gpu_ctrl_command *cmd, - uint64_t **addr, struct iovec **iov) + uint64_t **addr, struct iovec **iov, + uint32_t *niov) { struct virtio_gpu_mem_entry *ents; size_t esize, s; - int i; + int e, v; - if (ab->nr_entries > 16384) { + if (nr_entries > 16384) { qemu_log_mask(LOG_GUEST_ERROR, "%s: nr_entries is too big (%d > 16384)\n", - __func__, ab->nr_entries); + __func__, nr_entries); return -1; } - esize = sizeof(*ents) * ab->nr_entries; + esize = sizeof(*ents) * nr_entries; ents = g_malloc(esize); s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, - sizeof(*ab), ents, esize); + offset, ents, esize); if (s != esize) { qemu_log_mask(LOG_GUEST_ERROR, "%s: command data size incorrect %zu vs %zu\n", @@ -633,37 +802,52 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g, return -1; } - *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); + *iov = NULL; if (addr) { - *addr = g_malloc0(sizeof(uint64_t) * ab->nr_entries); - } - for (i = 0; i < ab->nr_entries; i++) { - uint64_t a = le64_to_cpu(ents[i].addr); - uint32_t l = le32_to_cpu(ents[i].length); - hwaddr len = l; - (*iov)[i].iov_base = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, - a, &len, DMA_DIRECTION_TO_DEVICE); - (*iov)[i].iov_len = len; - if (addr) { - (*addr)[i] = a; - } - if (!(*iov)[i].iov_base || len != l) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for" - " resource %d element %d\n", - __func__, ab->resource_id, i); - if ((*iov)[i].iov_base) { - i++; /* cleanup the 'i'th map */ + *addr = NULL; + } + for (e = 0, v = 0; e < nr_entries; e++) { + uint64_t a = le64_to_cpu(ents[e].addr); + uint32_t l = le32_to_cpu(ents[e].length); + hwaddr len; + void *map; + + do { + len = l; + map = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, + a, &len, DMA_DIRECTION_TO_DEVICE); + if (!map) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for" + " element %d\n", __func__, e); + virtio_gpu_cleanup_mapping_iov(g, *iov, v); + g_free(ents); + *iov = NULL; + if (addr) { + g_free(*addr); + *addr = NULL; + } + return -1; + } + + if (!(v % 16)) { + *iov = g_realloc(*iov, sizeof(struct iovec) * (v + 16)); + if (addr) { + *addr = g_realloc(*addr, sizeof(uint64_t) * (v + 16)); + } } - virtio_gpu_cleanup_mapping_iov(g, *iov, i); - g_free(ents); - *iov = NULL; + (*iov)[v].iov_base = map; + (*iov)[v].iov_len = len; if (addr) { - g_free(*addr); - *addr = NULL; + (*addr)[v] = a; } - return -1; - } + + a += len; + l -= len; + v += 1; + } while (l > 0); } + *niov = v; + g_free(ents); return 0; } @@ -690,6 +874,10 @@ static void virtio_gpu_cleanup_mapping(VirtIOGPU *g, res->iov_cnt = 0; g_free(res->addrs); res->addrs = NULL; + + if (res->blob) { + virtio_gpu_fini_udmabuf(res); + } } static void @@ -717,13 +905,12 @@ virtio_gpu_resource_attach_backing(VirtIOGPU *g, return; } - ret = virtio_gpu_create_mapping_iov(g, &ab, cmd, &res->addrs, &res->iov); + ret = virtio_gpu_create_mapping_iov(g, ab.nr_entries, sizeof(ab), cmd, + &res->addrs, &res->iov, &res->iov_cnt); if (ret != 0) { cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; return; } - - res->iov_cnt = ab.nr_entries; } static void @@ -737,18 +924,16 @@ virtio_gpu_resource_detach_backing(VirtIOGPU *g, virtio_gpu_bswap_32(&detach, sizeof(detach)); trace_virtio_gpu_cmd_res_back_detach(detach.resource_id); - res = virtio_gpu_find_resource(g, detach.resource_id); - if (!res || !res->iov) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n", - __func__, detach.resource_id); - cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + res = virtio_gpu_find_check_resource(g, detach.resource_id, true, + __func__, &cmd->error); + if (!res) { return; } virtio_gpu_cleanup_mapping(g, res); } -static void virtio_gpu_simple_process_cmd(VirtIOGPU *g, - struct virtio_gpu_ctrl_command *cmd) +void virtio_gpu_simple_process_cmd(VirtIOGPU *g, + struct virtio_gpu_ctrl_command *cmd) { VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr); virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); @@ -763,6 +948,13 @@ static void virtio_gpu_simple_process_cmd(VirtIOGPU *g, case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: virtio_gpu_resource_create_2d(g, cmd); break; + case VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB: + if (!virtio_gpu_blob_enabled(g->parent_obj.conf)) { + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + break; + } + virtio_gpu_resource_create_blob(g, cmd); + break; case VIRTIO_GPU_CMD_RESOURCE_UNREF: virtio_gpu_resource_unref(g, cmd); break; @@ -775,6 +967,13 @@ static void virtio_gpu_simple_process_cmd(VirtIOGPU *g, case VIRTIO_GPU_CMD_SET_SCANOUT: virtio_gpu_set_scanout(g, cmd); break; + case VIRTIO_GPU_CMD_SET_SCANOUT_BLOB: + if (!virtio_gpu_blob_enabled(g->parent_obj.conf)) { + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + break; + } + virtio_gpu_set_scanout_blob(g, cmd); + break; case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: virtio_gpu_resource_attach_backing(g, cmd); break; @@ -786,8 +985,10 @@ static void virtio_gpu_simple_process_cmd(VirtIOGPU *g, break; } if (!cmd->finished) { - virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error : - VIRTIO_GPU_RESP_OK_NODATA); + if (!g->parent_obj.renderer_blocked) { + virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error : + VIRTIO_GPU_RESP_OK_NODATA); + } } } @@ -806,6 +1007,7 @@ static void virtio_gpu_handle_cursor_cb(VirtIODevice *vdev, VirtQueue *vq) void virtio_gpu_process_cmdq(VirtIOGPU *g) { struct virtio_gpu_ctrl_command *cmd; + VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g); if (g->processing_cmdq) { return; @@ -819,8 +1021,7 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g) } /* process command */ - VIRGL(g, virtio_gpu_virgl_process_cmd, virtio_gpu_simple_process_cmd, - g, cmd); + vgc->process_cmd(g, cmd); QTAILQ_REMOVE(&g->cmdq, cmd, next); if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { @@ -843,16 +1044,27 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g) g->processing_cmdq = false; } -static void virtio_gpu_gl_flushed(VirtIOGPUBase *b) +static void virtio_gpu_process_fenceq(VirtIOGPU *g) { - VirtIOGPU *g = VIRTIO_GPU(b); + struct virtio_gpu_ctrl_command *cmd, *tmp; -#ifdef CONFIG_VIRGL - if (g->renderer_reset) { - g->renderer_reset = false; - virtio_gpu_virgl_reset(g); + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { + trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + QTAILQ_REMOVE(&g->fenceq, cmd, next); + g_free(cmd); + g->inflight--; + if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { + fprintf(stderr, "inflight: %3d (-)\r", g->inflight); + } } -#endif +} + +static void virtio_gpu_handle_gl_flushed(VirtIOGPUBase *b) +{ + VirtIOGPU *g = container_of(b, VirtIOGPU, parent_obj); + + virtio_gpu_process_fenceq(g); virtio_gpu_process_cmdq(g); } @@ -865,13 +1077,6 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) return; } -#ifdef CONFIG_VIRGL - if (!g->renderer_inited && g->parent_obj.use_virgl_renderer) { - virtio_gpu_virgl_init(g); - g->renderer_inited = true; - } -#endif - cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command)); while (cmd) { cmd->vq = vq; @@ -882,18 +1087,14 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) } virtio_gpu_process_cmdq(g); - -#ifdef CONFIG_VIRGL - if (g->parent_obj.use_virgl_renderer) { - virtio_gpu_virgl_fence_poll(g); - } -#endif } static void virtio_gpu_ctrl_bh(void *opaque) { VirtIOGPU *g = opaque; - virtio_gpu_handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq); + VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g); + + vgc->handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq); } static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq) @@ -1105,24 +1306,21 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, return 0; } -static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) +void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(qdev); VirtIOGPU *g = VIRTIO_GPU(qdev); - bool have_virgl; - -#if !defined(CONFIG_VIRGL) || defined(HOST_WORDS_BIGENDIAN) - have_virgl = false; -#else - have_virgl = display_opengl; -#endif - if (!have_virgl) { - g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED); - } else { -#if defined(CONFIG_VIRGL) - VIRTIO_GPU_BASE(g)->virtio_config.num_capsets = - virtio_gpu_virgl_get_num_capsets(g); -#endif + + if (virtio_gpu_blob_enabled(g->parent_obj.conf)) { + if (!virtio_gpu_have_udmabuf()) { + error_setg(errp, "cannot enable blob resources without udmabuf"); + return; + } + + if (virtio_gpu_virgl_enabled(g->parent_obj.conf)) { + error_setg(errp, "blobs and virgl are not compatible (yet)"); + return; + } } if (!virtio_gpu_base_device_realize(qdev, @@ -1141,18 +1339,12 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) QTAILQ_INIT(&g->fenceq); } -static void virtio_gpu_reset(VirtIODevice *vdev) +void virtio_gpu_reset(VirtIODevice *vdev) { VirtIOGPU *g = VIRTIO_GPU(vdev); struct virtio_gpu_simple_resource *res, *tmp; struct virtio_gpu_ctrl_command *cmd; -#ifdef CONFIG_VIRGL - if (g->parent_obj.use_virgl_renderer) { - virtio_gpu_virgl_reset(g); - } -#endif - QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { virtio_gpu_resource_destroy(g, res); } @@ -1170,17 +1362,6 @@ static void virtio_gpu_reset(VirtIODevice *vdev) g_free(cmd); } -#ifdef CONFIG_VIRGL - if (g->parent_obj.use_virgl_renderer) { - if (g->parent_obj.renderer_blocked) { - g->renderer_reset = true; - } else { - virtio_gpu_virgl_reset(g); - } - g->parent_obj.use_virgl_renderer = false; - } -#endif - virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev)); } @@ -1235,12 +1416,8 @@ static Property virtio_gpu_properties[] = { VIRTIO_GPU_BASE_PROPERTIES(VirtIOGPU, parent_obj.conf), DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf_max_hostmem, 256 * MiB), -#ifdef CONFIG_VIRGL - DEFINE_PROP_BIT("virgl", VirtIOGPU, parent_obj.conf.flags, - VIRTIO_GPU_FLAG_VIRGL_ENABLED, true), - DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags, - VIRTIO_GPU_FLAG_STATS_ENABLED, false), -#endif + DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags, + VIRTIO_GPU_FLAG_BLOB_ENABLED, false), DEFINE_PROP_END_OF_LIST(), }; @@ -1248,9 +1425,14 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); - VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_CLASS(klass); + VirtIOGPUClass *vgc = VIRTIO_GPU_CLASS(klass); + VirtIOGPUBaseClass *vgbc = &vgc->parent; + + vgc->handle_ctrl = virtio_gpu_handle_ctrl; + vgc->process_cmd = virtio_gpu_simple_process_cmd; + vgc->update_cursor_data = virtio_gpu_update_cursor_data; + vgbc->gl_flushed = virtio_gpu_handle_gl_flushed; - vgc->gl_flushed = virtio_gpu_gl_flushed; vdc->realize = virtio_gpu_device_realize; vdc->reset = virtio_gpu_reset; vdc->get_config = virtio_gpu_get_config; @@ -1264,8 +1446,10 @@ static const TypeInfo virtio_gpu_info = { .name = TYPE_VIRTIO_GPU, .parent = TYPE_VIRTIO_GPU_BASE, .instance_size = sizeof(VirtIOGPU), + .class_size = sizeof(VirtIOGPUClass), .class_init = virtio_gpu_class_init, }; +module_obj(TYPE_VIRTIO_GPU); static void virtio_register_types(void) { diff --git a/hw/display/virtio-vga-gl.c b/hw/display/virtio-vga-gl.c new file mode 100644 index 00000000000..f22549097c5 --- /dev/null +++ b/hw/display/virtio-vga-gl.c @@ -0,0 +1,50 @@ +#include "qemu/osdep.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-gpu.h" +#include "hw/display/vga.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "virtio-vga.h" +#include "qom/object.h" + +#define TYPE_VIRTIO_VGA_GL "virtio-vga-gl" + +typedef struct VirtIOVGAGL VirtIOVGAGL; +DECLARE_INSTANCE_CHECKER(VirtIOVGAGL, VIRTIO_VGA_GL, + TYPE_VIRTIO_VGA_GL) + +struct VirtIOVGAGL { + VirtIOVGABase parent_obj; + + VirtIOGPUGL vdev; +}; + +static void virtio_vga_gl_inst_initfn(Object *obj) +{ + VirtIOVGAGL *dev = VIRTIO_VGA_GL(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU_GL); + VIRTIO_VGA_BASE(dev)->vgpu = VIRTIO_GPU_BASE(&dev->vdev); +} + + +static VirtioPCIDeviceTypeInfo virtio_vga_gl_info = { + .generic_name = TYPE_VIRTIO_VGA_GL, + .parent = TYPE_VIRTIO_VGA_BASE, + .instance_size = sizeof(VirtIOVGAGL), + .instance_init = virtio_vga_gl_inst_initfn, +}; +module_obj(TYPE_VIRTIO_VGA_GL); + +static void virtio_vga_register_types(void) +{ + if (have_vga) { + virtio_pci_types_register(&virtio_vga_gl_info); + } +} + +type_init(virtio_vga_register_types) + +module_dep("hw-display-virtio-vga"); diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index d3c64040615..9e57f61e9ed 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -239,6 +239,7 @@ static TypeInfo virtio_vga_base_info = { .class_init = virtio_vga_base_class_init, .abstract = true, }; +module_obj(TYPE_VIRTIO_VGA_BASE); #define TYPE_VIRTIO_VGA "virtio-vga" @@ -268,6 +269,7 @@ static VirtioPCIDeviceTypeInfo virtio_vga_info = { .instance_size = sizeof(VirtIOVGA), .instance_init = virtio_vga_inst_initfn, }; +module_obj(TYPE_VIRTIO_VGA); static void virtio_vga_register_types(void) { diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c index 4fd6aeb18b5..9bb781e3125 100644 --- a/hw/display/xlnx_dp.c +++ b/hw/display/xlnx_dp.c @@ -714,7 +714,11 @@ static uint64_t xlnx_dp_read(void *opaque, hwaddr offset, unsigned size) break; default: assert(offset <= (0x3AC >> 2)); - ret = s->core_registers[offset]; + if (offset == (0x3A8 >> 2) || offset == (0x3AC >> 2)) { + ret = s->core_registers[DP_INT_MASK]; + } else { + ret = s->core_registers[offset]; + } break; } @@ -1253,7 +1257,7 @@ static void xlnx_dp_init(Object *obj) object_property_add_child(OBJECT(s), "dpcd", OBJECT(s->dpcd)); s->edid = I2CDDC(qdev_new("i2c-ddc")); - i2c_set_slave_address(I2C_SLAVE(s->edid), 0x50); + i2c_slave_set_address(I2C_SLAVE(s->edid), 0x50); object_property_add_child(OBJECT(s), "edid", OBJECT(s->edid)); fifo8_create(&s->rx_fifo, 16); diff --git a/hw/dma/meson.build b/hw/dma/meson.build index 5c78a4e05ff..f3f0661bc3c 100644 --- a/hw/dma/meson.build +++ b/hw/dma/meson.build @@ -1,4 +1,3 @@ -softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_dma.c')) softmmu_ss.add(when: 'CONFIG_RC4030', if_true: files('rc4030.c')) softmmu_ss.add(when: 'CONFIG_PL080', if_true: files('pl080.c')) softmmu_ss.add(when: 'CONFIG_PL330', if_true: files('pl330.c')) diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c index f1a586b1d71..2627307cc85 100644 --- a/hw/dma/pl080.c +++ b/hw/dma/pl080.c @@ -10,7 +10,6 @@ #include "qemu/osdep.h" #include "hw/sysbus.h" #include "migration/vmstate.h" -#include "exec/address-spaces.h" #include "qemu/log.h" #include "qemu/module.h" #include "hw/dma/pl080.h" diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c index 944ba296b08..0cb46191c19 100644 --- a/hw/dma/pl330.c +++ b/hw/dma/pl330.c @@ -269,6 +269,9 @@ struct PL330State { uint8_t num_faulting; uint8_t periph_busy[PL330_PERIPH_NUM]; + /* Memory region that DMA operation access */ + MemoryRegion *mem_mr; + AddressSpace *mem_as; }; #define TYPE_PL330 "pl330" @@ -1108,7 +1111,7 @@ static inline const PL330InsnDesc *pl330_fetch_insn(PL330Chan *ch) uint8_t opcode; int i; - dma_memory_read(&address_space_memory, ch->pc, &opcode, 1); + dma_memory_read(ch->parent->mem_as, ch->pc, &opcode, 1); for (i = 0; insn_desc[i].size; i++) { if ((opcode & insn_desc[i].opmask) == insn_desc[i].opcode) { return &insn_desc[i]; @@ -1122,7 +1125,7 @@ static inline void pl330_exec_insn(PL330Chan *ch, const PL330InsnDesc *insn) uint8_t buf[PL330_INSN_MAXSIZE]; assert(insn->size <= PL330_INSN_MAXSIZE); - dma_memory_read(&address_space_memory, ch->pc, buf, insn->size); + dma_memory_read(ch->parent->mem_as, ch->pc, buf, insn->size); insn->exec(ch, buf[0], &buf[1], insn->size - 1); } @@ -1186,7 +1189,7 @@ static int pl330_exec_cycle(PL330Chan *channel) if (q != NULL && q->len <= pl330_fifo_num_free(&s->fifo)) { int len = q->len - (q->addr & (q->len - 1)); - dma_memory_read(&address_space_memory, q->addr, buf, len); + dma_memory_read(s->mem_as, q->addr, buf, len); trace_pl330_exec_cycle(q->addr, len); if (trace_event_get_state_backends(TRACE_PL330_HEXDUMP)) { pl330_hexdump(buf, len); @@ -1217,7 +1220,7 @@ static int pl330_exec_cycle(PL330Chan *channel) fifo_res = pl330_fifo_get(&s->fifo, buf, len, q->tag); } if (fifo_res == PL330_FIFO_OK || q->z) { - dma_memory_write(&address_space_memory, q->addr, buf, len); + dma_memory_write(s->mem_as, q->addr, buf, len); trace_pl330_exec_cycle(q->addr, len); if (trace_event_get_state_backends(TRACE_PL330_HEXDUMP)) { pl330_hexdump(buf, len); @@ -1562,6 +1565,18 @@ static void pl330_realize(DeviceState *dev, Error **errp) "dma", PL330_IOMEM_SIZE); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem); + if (!s->mem_mr) { + error_setg(errp, "'memory' link is not set"); + return; + } else if (s->mem_mr == get_system_memory()) { + /* Avoid creating new AS for system memory. */ + s->mem_as = &address_space_memory; + } else { + s->mem_as = g_new0(AddressSpace, 1); + address_space_init(s->mem_as, s->mem_mr, + memory_region_name(s->mem_mr)); + } + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pl330_exec_cycle_timer, s); s->cfg[0] = (s->mgr_ns_at_rst ? 0x4 : 0) | @@ -1656,6 +1671,9 @@ static Property pl330_properties[] = { DEFINE_PROP_UINT8("rd_q_dep", PL330State, rd_q_dep, 16), DEFINE_PROP_UINT16("data_buffer_dep", PL330State, data_buffer_dep, 256), + DEFINE_PROP_LINK("memory", PL330State, mem_mr, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/dma/puv3_dma.c b/hw/dma/puv3_dma.c deleted file mode 100644 index cca1e9ec21b..00000000000 --- a/hw/dma/puv3_dma.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * DMA device simulation in PKUnity SoC - * - * Copyright (C) 2010-2012 Guan Xuetao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation, or any later version. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "qom/object.h" - -#undef DEBUG_PUV3 -#include "hw/unicore32/puv3.h" -#include "qemu/module.h" -#include "qemu/log.h" - -#define PUV3_DMA_CH_NR (6) -#define PUV3_DMA_CH_MASK (0xff) -#define PUV3_DMA_CH(offset) ((offset) >> 8) - -#define TYPE_PUV3_DMA "puv3_dma" -OBJECT_DECLARE_SIMPLE_TYPE(PUV3DMAState, PUV3_DMA) - -struct PUV3DMAState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - uint32_t reg_CFG[PUV3_DMA_CH_NR]; -}; - -static uint64_t puv3_dma_read(void *opaque, hwaddr offset, - unsigned size) -{ - PUV3DMAState *s = opaque; - uint32_t ret = 0; - - assert(PUV3_DMA_CH(offset) < PUV3_DMA_CH_NR); - - switch (offset & PUV3_DMA_CH_MASK) { - case 0x10: - ret = s->reg_CFG[PUV3_DMA_CH(offset)]; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad read offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } - DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); - - return ret; -} - -static void puv3_dma_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) -{ - PUV3DMAState *s = opaque; - - assert(PUV3_DMA_CH(offset) < PUV3_DMA_CH_NR); - - switch (offset & PUV3_DMA_CH_MASK) { - case 0x10: - s->reg_CFG[PUV3_DMA_CH(offset)] = value; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad write offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } - DPRINTF("offset 0x%x, value 0x%x\n", offset, value); -} - -static const MemoryRegionOps puv3_dma_ops = { - .read = puv3_dma_read, - .write = puv3_dma_write, - .impl = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void puv3_dma_realize(DeviceState *dev, Error **errp) -{ - PUV3DMAState *s = PUV3_DMA(dev); - int i; - - for (i = 0; i < PUV3_DMA_CH_NR; i++) { - s->reg_CFG[i] = 0x0; - } - - memory_region_init_io(&s->iomem, OBJECT(s), &puv3_dma_ops, s, "puv3_dma", - PUV3_REGS_OFFSET); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem); -} - -static void puv3_dma_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = puv3_dma_realize; -} - -static const TypeInfo puv3_dma_info = { - .name = TYPE_PUV3_DMA, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(PUV3DMAState), - .class_init = puv3_dma_class_init, -}; - -static void puv3_dma_register_type(void) -{ - type_register_static(&puv3_dma_info); -} - -type_init(puv3_dma_register_type) diff --git a/hw/dma/pxa2xx_dma.c b/hw/dma/pxa2xx_dma.c index b3707ff3de2..fa896f7edf7 100644 --- a/hw/dma/pxa2xx_dma.c +++ b/hw/dma/pxa2xx_dma.c @@ -525,7 +525,7 @@ static bool is_version_0(void *opaque, int version_id) return version_id == 0; } -static VMStateDescription vmstate_pxa2xx_dma_chan = { +static const VMStateDescription vmstate_pxa2xx_dma_chan = { .name = "pxa2xx_dma_chan", .version_id = 1, .minimum_version_id = 1, @@ -540,7 +540,7 @@ static VMStateDescription vmstate_pxa2xx_dma_chan = { }, }; -static VMStateDescription vmstate_pxa2xx_dma = { +static const VMStateDescription vmstate_pxa2xx_dma = { .name = "pxa2xx_dma", .version_id = 1, .minimum_version_id = 0, diff --git a/hw/dma/sifive_pdma.c b/hw/dma/sifive_pdma.c index e1f6fedbda7..85fe34f5f31 100644 --- a/hw/dma/sifive_pdma.c +++ b/hw/dma/sifive_pdma.c @@ -24,7 +24,6 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "qapi/error.h" -#include "hw/hw.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/sysbus.h" @@ -55,6 +54,13 @@ #define DMA_EXEC_DST 0x110 #define DMA_EXEC_SRC 0x118 +/* + * FU540/FU740 docs are incorrect with NextConfig.wsize/rsize reset values. + * The reset values tested on Unleashed/Unmatched boards are 6 instead of 0. + */ +#define CONFIG_WRSZ_DEFAULT 6 +#define CONFIG_RDSZ_DEFAULT 6 + enum dma_chan_state { DMA_CHAN_STATE_IDLE, DMA_CHAN_STATE_STARTED, @@ -68,13 +74,13 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch) uint64_t dst = s->chan[ch].next_dst; uint64_t src = s->chan[ch].next_src; uint32_t config = s->chan[ch].next_config; - int wsize, rsize, size; + int wsize, rsize, size, remainder; uint8_t buf[64]; int n; /* do nothing if bytes to transfer is zero */ if (!bytes) { - goto error; + goto done; } /* @@ -100,11 +106,7 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch) size = 6; } size = 1 << size; - - /* the bytes to transfer should be multiple of transaction size */ - if (bytes % size) { - goto error; - } + remainder = bytes % size; /* indicate a DMA transfer is started */ s->chan[ch].state = DMA_CHAN_STATE_STARTED; @@ -125,10 +127,13 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch) s->chan[ch].exec_bytes -= size; } - /* indicate a DMA transfer is done */ - s->chan[ch].state = DMA_CHAN_STATE_DONE; - s->chan[ch].control &= ~CONTROL_RUN; - s->chan[ch].control |= CONTROL_DONE; + if (remainder) { + cpu_physical_memory_read(s->chan[ch].exec_src, buf, remainder); + cpu_physical_memory_write(s->chan[ch].exec_dst, buf, remainder); + s->chan[ch].exec_src += remainder; + s->chan[ch].exec_dst += remainder; + s->chan[ch].exec_bytes -= remainder; + } /* reload exec_ registers if repeat is required */ if (s->chan[ch].next_config & CONFIG_REPEAT) { @@ -137,6 +142,11 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch) s->chan[ch].exec_src = src; } +done: + /* indicate a DMA transfer is done */ + s->chan[ch].state = DMA_CHAN_STATE_DONE; + s->chan[ch].control &= ~CONTROL_RUN; + s->chan[ch].control |= CONTROL_DONE; return; error: @@ -222,6 +232,7 @@ static void sifive_pdma_write(void *opaque, hwaddr offset, { SiFivePDMAState *s = opaque; int ch = SIFIVE_PDMA_CHAN_NO(offset); + bool claimed, run; if (ch >= SIFIVE_PDMA_CHANS) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid channel no %d\n", @@ -232,8 +243,35 @@ static void sifive_pdma_write(void *opaque, hwaddr offset, offset &= 0xfff; switch (offset) { case DMA_CONTROL: + claimed = !!(s->chan[ch].control & CONTROL_CLAIM); + run = !!(s->chan[ch].control & CONTROL_RUN); + + if (!claimed && (value & CONTROL_CLAIM)) { + /* reset Next* registers */ + s->chan[ch].next_config = (CONFIG_RDSZ_DEFAULT << CONFIG_RDSZ_SHIFT) | + (CONFIG_WRSZ_DEFAULT << CONFIG_WRSZ_SHIFT); + s->chan[ch].next_bytes = 0; + s->chan[ch].next_dst = 0; + s->chan[ch].next_src = 0; + } + + /* claim bit can only be cleared when run is low */ + if (run && !(value & CONTROL_CLAIM)) { + value |= CONTROL_CLAIM; + } + s->chan[ch].control = value; + /* + * If channel was not claimed before run bit is set, + * or if the channel is disclaimed when run was low, + * DMA won't run. + */ + if (!claimed || (!run && !(value & CONTROL_CLAIM))) { + s->chan[ch].control &= ~CONTROL_RUN; + return; + } + if (value & CONTROL_RUN) { sifive_pdma_run(s, ch); } diff --git a/hw/dma/trace-events b/hw/dma/trace-events index 44893995f63..3c47df54e4d 100644 --- a/hw/dma/trace-events +++ b/hw/dma/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # rc4030.c jazzio_read(uint64_t addr, uint32_t ret) "read reg[0x%"PRIx64"] = 0x%x" diff --git a/hw/dma/xlnx-zdma.c b/hw/dma/xlnx-zdma.c index fa38a556341..a5a92b4ff8c 100644 --- a/hw/dma/xlnx-zdma.c +++ b/hw/dma/xlnx-zdma.c @@ -320,9 +320,9 @@ static bool zdma_load_descriptor(XlnxZDMA *s, uint64_t addr, return false; } - descr->addr = address_space_ldq_le(s->dma_as, addr, s->attr, NULL); - descr->size = address_space_ldl_le(s->dma_as, addr + 8, s->attr, NULL); - descr->attr = address_space_ldl_le(s->dma_as, addr + 12, s->attr, NULL); + descr->addr = address_space_ldq_le(&s->dma_as, addr, s->attr, NULL); + descr->size = address_space_ldl_le(&s->dma_as, addr + 8, s->attr, NULL); + descr->attr = address_space_ldl_le(&s->dma_as, addr + 12, s->attr, NULL); return true; } @@ -354,7 +354,7 @@ static void zdma_update_descr_addr(XlnxZDMA *s, bool type, } else { addr = zdma_get_regaddr64(s, basereg); addr += sizeof(s->dsc_dst); - next = address_space_ldq_le(s->dma_as, addr, s->attr, NULL); + next = address_space_ldq_le(&s->dma_as, addr, s->attr, NULL); } zdma_put_regaddr64(s, basereg, next); @@ -421,7 +421,7 @@ static void zdma_write_dst(XlnxZDMA *s, uint8_t *buf, uint32_t len) } } - address_space_write(s->dma_as, s->dsc_dst.addr, s->attr, buf, dlen); + address_space_write(&s->dma_as, s->dsc_dst.addr, s->attr, buf, dlen); if (burst_type == AXI_BURST_INCR) { s->dsc_dst.addr += dlen; } @@ -497,7 +497,7 @@ static void zdma_process_descr(XlnxZDMA *s) len = s->cfg.bus_width / 8; } } else { - address_space_read(s->dma_as, src_addr, s->attr, s->buf, len); + address_space_read(&s->dma_as, src_addr, s->attr, s->buf, len); if (burst_type == AXI_BURST_INCR) { src_addr += len; } @@ -765,6 +765,12 @@ static void zdma_realize(DeviceState *dev, Error **errp) XlnxZDMA *s = XLNX_ZDMA(dev); unsigned int i; + if (!s->dma_mr) { + error_setg(errp, TYPE_XLNX_ZDMA " 'dma' link not set"); + return; + } + address_space_init(&s->dma_as, s->dma_mr, "zdma-dma"); + for (i = 0; i < ARRAY_SIZE(zdma_regs_info); ++i) { RegisterInfo *r = &s->regs_info[zdma_regs_info[i].addr / 4]; @@ -777,12 +783,6 @@ static void zdma_realize(DeviceState *dev, Error **errp) }; } - if (s->dma_mr) { - s->dma_as = g_malloc0(sizeof(AddressSpace)); - address_space_init(s->dma_as, s->dma_mr, NULL); - } else { - s->dma_as = &address_space_memory; - } s->attr = MEMTXATTRS_UNSPECIFIED; } diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c index 98324dadcd4..896bb3574dd 100644 --- a/hw/dma/xlnx_csu_dma.c +++ b/hw/dma/xlnx_csu_dma.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qapi/error.h" -#include "hw/hw.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/sysbus.h" @@ -202,11 +201,11 @@ static uint32_t xlnx_csu_dma_read(XlnxCSUDMA *s, uint8_t *buf, uint32_t len) for (i = 0; i < len && (result == MEMTX_OK); i += s->width) { uint32_t mlen = MIN(len - i, s->width); - result = address_space_rw(s->dma_as, addr, s->attr, + result = address_space_rw(&s->dma_as, addr, s->attr, buf + i, mlen, false); } } else { - result = address_space_rw(s->dma_as, addr, s->attr, buf, len, false); + result = address_space_rw(&s->dma_as, addr, s->attr, buf, len, false); } if (result == MEMTX_OK) { @@ -233,12 +232,12 @@ static uint32_t xlnx_csu_dma_write(XlnxCSUDMA *s, uint8_t *buf, uint32_t len) for (i = 0; i < len && (result == MEMTX_OK); i += s->width) { uint32_t mlen = MIN(len - i, s->width); - result = address_space_rw(s->dma_as, addr, s->attr, + result = address_space_rw(&s->dma_as, addr, s->attr, buf, mlen, true); buf += mlen; } } else { - result = address_space_rw(s->dma_as, addr, s->attr, buf, len, true); + result = address_space_rw(&s->dma_as, addr, s->attr, buf, len, true); } if (result != MEMTX_OK) { @@ -627,6 +626,17 @@ static void xlnx_csu_dma_realize(DeviceState *dev, Error **errp) XlnxCSUDMA *s = XLNX_CSU_DMA(dev); RegisterInfoArray *reg_array; + if (!s->is_dst && !s->tx_dev) { + error_setg(errp, "zynqmp.csu-dma: Stream not connected"); + return; + } + + if (!s->dma_mr) { + error_setg(errp, TYPE_XLNX_CSU_DMA " 'dma' link not set"); + return; + } + address_space_init(&s->dma_as, s->dma_mr, "csu-dma"); + reg_array = register_init_block32(dev, xlnx_csu_dma_regs_info[!!s->is_dst], XLNX_CSU_DMA_R_MAX, @@ -641,21 +651,9 @@ static void xlnx_csu_dma_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem); sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); - if (!s->is_dst && !s->tx_dev) { - error_setg(errp, "zynqmp.csu-dma: Stream not connected"); - return; - } - s->src_timer = ptimer_init(xlnx_csu_dma_src_timeout_hit, s, PTIMER_POLICY_DEFAULT); - if (s->dma_mr) { - s->dma_as = g_malloc0(sizeof(AddressSpace)); - address_space_init(s->dma_as, s->dma_mr, NULL); - } else { - s->dma_as = &address_space_memory; - } - s->attr = MEMTXATTRS_UNSPECIFIED; s->r_size_last_word = 0; diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c index 985a259e05b..911d21c8cfb 100644 --- a/hw/gpio/aspeed_gpio.c +++ b/hw/gpio/aspeed_gpio.c @@ -10,17 +10,13 @@ #include "qemu/host-utils.h" #include "qemu/log.h" #include "hw/gpio/aspeed_gpio.h" -#include "include/hw/misc/aspeed_scu.h" +#include "hw/misc/aspeed_scu.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "hw/irq.h" #include "migration/vmstate.h" -#define GPIOS_PER_REG 32 -#define GPIOS_PER_SET GPIOS_PER_REG -#define GPIO_PIN_GAP_SIZE 4 #define GPIOS_PER_GROUP 8 -#define GPIO_GROUP_SHIFT 3 /* GPIO Source Types */ #define ASPEED_CMD_SRC_MASK 0x01010101 @@ -164,50 +160,48 @@ #define GPIO_YZAAAB_DIRECTION (0x1E4 >> 2) #define GPIO_AC_DATA_VALUE (0x1E8 >> 2) #define GPIO_AC_DIRECTION (0x1EC >> 2) -#define GPIO_3_6V_MEM_SIZE 0x1F0 -#define GPIO_3_6V_REG_ARRAY_SIZE (GPIO_3_6V_MEM_SIZE >> 2) +#define GPIO_3_3V_MEM_SIZE 0x1F0 +#define GPIO_3_3V_REG_ARRAY_SIZE (GPIO_3_3V_MEM_SIZE >> 2) /* AST2600 only - 1.8V gpios */ /* - * The AST2600 has same 3.6V gpios as the AST2400 (memory offsets 0x0-0x198) - * and addtional 1.8V gpios (memory offsets 0x800-0x9D4). + * The AST2600 two copies of the GPIO controller: the same 3.3V gpios as the + * AST2400 (memory offsets 0x0-0x198) and a second controller with 1.8V gpios + * (memory offsets 0x800-0x9D4). */ -#define GPIO_1_8V_REG_OFFSET 0x800 -#define GPIO_1_8V_ABCD_DATA_VALUE ((0x800 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_DIRECTION ((0x804 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_INT_ENABLE ((0x808 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_INT_SENS_0 ((0x80C - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_INT_SENS_1 ((0x810 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_INT_SENS_2 ((0x814 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_INT_STATUS ((0x818 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_RESET_TOLERANT ((0x81C - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_DATA_VALUE ((0x820 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_DIRECTION ((0x824 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_INT_ENABLE ((0x828 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_INT_SENS_0 ((0x82C - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_INT_SENS_1 ((0x830 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_INT_SENS_2 ((0x834 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_INT_STATUS ((0x838 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_RESET_TOLERANT ((0x83C - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_DEBOUNCE_1 ((0x840 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_DEBOUNCE_2 ((0x844 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_DEBOUNCE_1 ((0x848 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_DEBOUNCE_2 ((0x84C - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_DEBOUNCE_TIME_1 ((0x850 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_DEBOUNCE_TIME_2 ((0x854 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_DEBOUNCE_TIME_3 ((0x858 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_COMMAND_SRC_0 ((0x860 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_COMMAND_SRC_1 ((0x864 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_COMMAND_SRC_0 ((0x868 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_COMMAND_SRC_1 ((0x86C - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_DATA_READ ((0x8C0 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_DATA_READ ((0x8C4 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_ABCD_INPUT_MASK ((0x9D0 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_E_INPUT_MASK ((0x9D4 - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_1_8V_MEM_SIZE 0x9D8 -#define GPIO_1_8V_REG_ARRAY_SIZE ((GPIO_1_8V_MEM_SIZE - \ - GPIO_1_8V_REG_OFFSET) >> 2) -#define GPIO_MAX_MEM_SIZE MAX(GPIO_3_6V_MEM_SIZE, GPIO_1_8V_MEM_SIZE) +#define GPIO_1_8V_ABCD_DATA_VALUE (0x000 >> 2) +#define GPIO_1_8V_ABCD_DIRECTION (0x004 >> 2) +#define GPIO_1_8V_ABCD_INT_ENABLE (0x008 >> 2) +#define GPIO_1_8V_ABCD_INT_SENS_0 (0x00C >> 2) +#define GPIO_1_8V_ABCD_INT_SENS_1 (0x010 >> 2) +#define GPIO_1_8V_ABCD_INT_SENS_2 (0x014 >> 2) +#define GPIO_1_8V_ABCD_INT_STATUS (0x018 >> 2) +#define GPIO_1_8V_ABCD_RESET_TOLERANT (0x01C >> 2) +#define GPIO_1_8V_E_DATA_VALUE (0x020 >> 2) +#define GPIO_1_8V_E_DIRECTION (0x024 >> 2) +#define GPIO_1_8V_E_INT_ENABLE (0x028 >> 2) +#define GPIO_1_8V_E_INT_SENS_0 (0x02C >> 2) +#define GPIO_1_8V_E_INT_SENS_1 (0x030 >> 2) +#define GPIO_1_8V_E_INT_SENS_2 (0x034 >> 2) +#define GPIO_1_8V_E_INT_STATUS (0x038 >> 2) +#define GPIO_1_8V_E_RESET_TOLERANT (0x03C >> 2) +#define GPIO_1_8V_ABCD_DEBOUNCE_1 (0x040 >> 2) +#define GPIO_1_8V_ABCD_DEBOUNCE_2 (0x044 >> 2) +#define GPIO_1_8V_E_DEBOUNCE_1 (0x048 >> 2) +#define GPIO_1_8V_E_DEBOUNCE_2 (0x04C >> 2) +#define GPIO_1_8V_DEBOUNCE_TIME_1 (0x050 >> 2) +#define GPIO_1_8V_DEBOUNCE_TIME_2 (0x054 >> 2) +#define GPIO_1_8V_DEBOUNCE_TIME_3 (0x058 >> 2) +#define GPIO_1_8V_ABCD_COMMAND_SRC_0 (0x060 >> 2) +#define GPIO_1_8V_ABCD_COMMAND_SRC_1 (0x064 >> 2) +#define GPIO_1_8V_E_COMMAND_SRC_0 (0x068 >> 2) +#define GPIO_1_8V_E_COMMAND_SRC_1 (0x06C >> 2) +#define GPIO_1_8V_ABCD_DATA_READ (0x0C0 >> 2) +#define GPIO_1_8V_E_DATA_READ (0x0C4 >> 2) +#define GPIO_1_8V_ABCD_INPUT_MASK (0x1D0 >> 2) +#define GPIO_1_8V_E_INPUT_MASK (0x1D4 >> 2) +#define GPIO_1_8V_MEM_SIZE 0x1D8 +#define GPIO_1_8V_REG_ARRAY_SIZE (GPIO_1_8V_MEM_SIZE >> 2) static int aspeed_evaluate_irq(GPIOSets *regs, int gpio_prev_high, int gpio) { @@ -261,7 +255,7 @@ static void aspeed_gpio_update(AspeedGPIOState *s, GPIOSets *regs, diff = old ^ new; if (diff) { - for (gpio = 0; gpio < GPIOS_PER_REG; gpio++) { + for (gpio = 0; gpio < ASPEED_GPIOS_PER_SET; gpio++) { uint32_t mask = 1 << gpio; /* If the gpio needs to be updated... */ @@ -285,8 +279,7 @@ static void aspeed_gpio_update(AspeedGPIOState *s, GPIOSets *regs, if (direction & mask) { /* ...trigger the line-state IRQ */ ptrdiff_t set = aspeed_gpio_set_idx(s, regs); - size_t offset = set * GPIOS_PER_SET + gpio; - qemu_set_irq(s->gpios[offset], !!(new & mask)); + qemu_set_irq(s->gpios[set][gpio], !!(new & mask)); } else { /* ...otherwise if we meet the line's current IRQ policy... */ if (aspeed_evaluate_irq(regs, old & mask, gpio)) { @@ -299,21 +292,6 @@ static void aspeed_gpio_update(AspeedGPIOState *s, GPIOSets *regs, qemu_set_irq(s->irq, !!(s->pending)); } -static uint32_t aspeed_adjust_pin(AspeedGPIOState *s, uint32_t pin) -{ - AspeedGPIOClass *agc = ASPEED_GPIO_GET_CLASS(s); - /* - * The 2500 has a 4 pin gap in group AB and the 2400 has a 4 pin - * gap in group Y (and only four pins in AB but this is the last group so - * it doesn't matter). - */ - if (agc->gap && pin >= agc->gap) { - pin += GPIO_PIN_GAP_SIZE; - } - - return pin; -} - static bool aspeed_gpio_get_pin_level(AspeedGPIOState *s, uint32_t set_idx, uint32_t pin) { @@ -369,7 +347,7 @@ static uint32_t update_value_control_source(GPIOSets *regs, uint32_t old_value, uint32_t new_value = 0; /* for each group in set */ - for (i = 0; i < GPIOS_PER_REG; i += GPIOS_PER_GROUP) { + for (i = 0; i < ASPEED_GPIOS_PER_SET; i += GPIOS_PER_GROUP) { cmd_source = extract32(regs->cmd_source_0, i, 1) | (extract32(regs->cmd_source_1, i, 1) << 1); @@ -382,7 +360,7 @@ static uint32_t update_value_control_source(GPIOSets *regs, uint32_t old_value, return new_value; } -static const AspeedGPIOReg aspeed_3_6v_gpios[GPIO_3_6V_REG_ARRAY_SIZE] = { +static const AspeedGPIOReg aspeed_3_3v_gpios[GPIO_3_3V_REG_ARRAY_SIZE] = { /* Set ABCD */ [GPIO_ABCD_DATA_VALUE] = { 0, gpio_reg_data_value }, [GPIO_ABCD_DIRECTION] = { 0, gpio_reg_direction }, @@ -639,7 +617,7 @@ static void aspeed_gpio_write(void *opaque, hwaddr offset, uint64_t data, * bidirectional | 1 | 1 | data * input only | 1 | 0 | 0 * output only | 0 | 1 | 1 - * no pin / gap | 0 | 0 | 0 + * no pin | 0 | 0 | 0 * * which is captured by: * data = ( data | ~input) & output; @@ -781,7 +759,7 @@ static void aspeed_gpio_set_pin(Object *obj, Visitor *v, const char *name, } /****************** Setup functions ******************/ -static const GPIOSetProperties ast2400_set_props[] = { +static const GPIOSetProperties ast2400_set_props[ASPEED_GPIO_MAX_NR_SETS] = { [0] = {0xffffffff, 0xffffffff, {"A", "B", "C", "D"} }, [1] = {0xffffffff, 0xffffffff, {"E", "F", "G", "H"} }, [2] = {0xffffffff, 0xffffffff, {"I", "J", "K", "L"} }, @@ -791,28 +769,28 @@ static const GPIOSetProperties ast2400_set_props[] = { [6] = {0x0000000f, 0x0fffff0f, {"Y", "Z", "AA", "AB"} }, }; -static const GPIOSetProperties ast2500_set_props[] = { +static const GPIOSetProperties ast2500_set_props[ASPEED_GPIO_MAX_NR_SETS] = { [0] = {0xffffffff, 0xffffffff, {"A", "B", "C", "D"} }, [1] = {0xffffffff, 0xffffffff, {"E", "F", "G", "H"} }, [2] = {0xffffffff, 0xffffffff, {"I", "J", "K", "L"} }, [3] = {0xffffffff, 0xffffffff, {"M", "N", "O", "P"} }, [4] = {0xffffffff, 0xffffffff, {"Q", "R", "S", "T"} }, [5] = {0xffffffff, 0x0000ffff, {"U", "V", "W", "X"} }, - [6] = {0xffffff0f, 0x0fffff0f, {"Y", "Z", "AA", "AB"} }, + [6] = {0x0fffffff, 0x0fffffff, {"Y", "Z", "AA", "AB"} }, [7] = {0x000000ff, 0x000000ff, {"AC"} }, }; -static GPIOSetProperties ast2600_3_6v_set_props[] = { +static GPIOSetProperties ast2600_3_3v_set_props[ASPEED_GPIO_MAX_NR_SETS] = { [0] = {0xffffffff, 0xffffffff, {"A", "B", "C", "D"} }, [1] = {0xffffffff, 0xffffffff, {"E", "F", "G", "H"} }, [2] = {0xffffffff, 0xffffffff, {"I", "J", "K", "L"} }, [3] = {0xffffffff, 0xffffffff, {"M", "N", "O", "P"} }, - [4] = {0xffffffff, 0xffffffff, {"Q", "R", "S", "T"} }, - [5] = {0xffffffff, 0x0000ffff, {"U", "V", "W", "X"} }, - [6] = {0xffff0000, 0x0fff0000, {"Y", "Z", "", ""} }, + [4] = {0xffffffff, 0x00ffffff, {"Q", "R", "S", "T"} }, + [5] = {0xffffffff, 0xffffff00, {"U", "V", "W", "X"} }, + [6] = {0x0000ffff, 0x0000ffff, {"Y", "Z"} }, }; -static GPIOSetProperties ast2600_1_8v_set_props[] = { +static GPIOSetProperties ast2600_1_8v_set_props[ASPEED_GPIO_MAX_NR_SETS] = { [0] = {0xffffffff, 0xffffffff, {"18A", "18B", "18C", "18D"} }, [1] = {0x0000000f, 0x0000000f, {"18E"} }, }; @@ -838,18 +816,24 @@ static void aspeed_gpio_realize(DeviceState *dev, Error **errp) AspeedGPIOState *s = ASPEED_GPIO(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); AspeedGPIOClass *agc = ASPEED_GPIO_GET_CLASS(s); - int pin; /* Interrupt parent line */ sysbus_init_irq(sbd, &s->irq); /* Individual GPIOs */ - for (pin = 0; pin < agc->nr_gpio_pins; pin++) { - sysbus_init_irq(sbd, &s->gpios[pin]); + for (int i = 0; i < ASPEED_GPIO_MAX_NR_SETS; i++) { + const GPIOSetProperties *props = &agc->props[i]; + uint32_t skip = ~(props->input | props->output); + for (int j = 0; j < ASPEED_GPIOS_PER_SET; j++) { + if (skip >> j & 1) { + continue; + } + sysbus_init_irq(sbd, &s->gpios[i][j]); + } } memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_gpio_ops, s, - TYPE_ASPEED_GPIO, GPIO_MAX_MEM_SIZE); + TYPE_ASPEED_GPIO, 0x800); sysbus_init_mmio(sbd, &s->iomem); } @@ -858,20 +842,22 @@ static void aspeed_gpio_init(Object *obj) { AspeedGPIOState *s = ASPEED_GPIO(obj); AspeedGPIOClass *agc = ASPEED_GPIO_GET_CLASS(s); - int pin; - - for (pin = 0; pin < agc->nr_gpio_pins; pin++) { - char *name; - int set_idx = pin / GPIOS_PER_SET; - int pin_idx = aspeed_adjust_pin(s, pin) - (set_idx * GPIOS_PER_SET); - int group_idx = pin_idx >> GPIO_GROUP_SHIFT; - const GPIOSetProperties *props = &agc->props[set_idx]; - - name = g_strdup_printf("gpio%s%d", props->group_label[group_idx], - pin_idx % GPIOS_PER_GROUP); - object_property_add(obj, name, "bool", aspeed_gpio_get_pin, - aspeed_gpio_set_pin, NULL, NULL); - g_free(name); + + for (int i = 0; i < ASPEED_GPIO_MAX_NR_SETS; i++) { + const GPIOSetProperties *props = &agc->props[i]; + uint32_t skip = ~(props->input | props->output); + for (int j = 0; j < ASPEED_GPIOS_PER_SET; j++) { + if (skip >> j & 1) { + continue; + } + int group_idx = j / GPIOS_PER_GROUP; + int pin_idx = j % GPIOS_PER_GROUP; + const char *group = &props->group_label[group_idx][0]; + char *name = g_strdup_printf("gpio%s%d", group, pin_idx); + object_property_add(obj, name, "bool", aspeed_gpio_get_pin, + aspeed_gpio_set_pin, NULL, NULL); + g_free(name); + } } } @@ -928,8 +914,7 @@ static void aspeed_gpio_ast2400_class_init(ObjectClass *klass, void *data) agc->props = ast2400_set_props; agc->nr_gpio_pins = 216; agc->nr_gpio_sets = 7; - agc->gap = 196; - agc->reg_table = aspeed_3_6v_gpios; + agc->reg_table = aspeed_3_3v_gpios; } static void aspeed_gpio_2500_class_init(ObjectClass *klass, void *data) @@ -939,18 +924,17 @@ static void aspeed_gpio_2500_class_init(ObjectClass *klass, void *data) agc->props = ast2500_set_props; agc->nr_gpio_pins = 228; agc->nr_gpio_sets = 8; - agc->gap = 220; - agc->reg_table = aspeed_3_6v_gpios; + agc->reg_table = aspeed_3_3v_gpios; } -static void aspeed_gpio_ast2600_3_6v_class_init(ObjectClass *klass, void *data) +static void aspeed_gpio_ast2600_3_3v_class_init(ObjectClass *klass, void *data) { AspeedGPIOClass *agc = ASPEED_GPIO_CLASS(klass); - agc->props = ast2600_3_6v_set_props; + agc->props = ast2600_3_3v_set_props; agc->nr_gpio_pins = 208; agc->nr_gpio_sets = 7; - agc->reg_table = aspeed_3_6v_gpios; + agc->reg_table = aspeed_3_3v_gpios; } static void aspeed_gpio_ast2600_1_8v_class_init(ObjectClass *klass, void *data) @@ -986,10 +970,10 @@ static const TypeInfo aspeed_gpio_ast2500_info = { .instance_init = aspeed_gpio_init, }; -static const TypeInfo aspeed_gpio_ast2600_3_6v_info = { +static const TypeInfo aspeed_gpio_ast2600_3_3v_info = { .name = TYPE_ASPEED_GPIO "-ast2600", .parent = TYPE_ASPEED_GPIO, - .class_init = aspeed_gpio_ast2600_3_6v_class_init, + .class_init = aspeed_gpio_ast2600_3_3v_class_init, .instance_init = aspeed_gpio_init, }; @@ -1005,7 +989,7 @@ static void aspeed_gpio_register_types(void) type_register_static(&aspeed_gpio_info); type_register_static(&aspeed_gpio_ast2400_info); type_register_static(&aspeed_gpio_ast2500_info); - type_register_static(&aspeed_gpio_ast2600_3_6v_info); + type_register_static(&aspeed_gpio_ast2600_3_3v_info); type_register_static(&aspeed_gpio_ast2600_1_8v_info); } diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c index abdddbc67c2..c995bba1d9f 100644 --- a/hw/gpio/bcm2835_gpio.c +++ b/hw/gpio/bcm2835_gpio.c @@ -299,8 +299,7 @@ static void bcm2835_gpio_init(Object *obj) DeviceState *dev = DEVICE(obj); SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), - TYPE_SD_BUS, DEVICE(s), "sd-bus"); + qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS, DEVICE(s), "sd-bus"); memory_region_init_io(&s->iomem, obj, &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000); diff --git a/hw/gpio/gpio_pwr.c b/hw/gpio/gpio_pwr.c index 7714fa0dc4d..dbaf1c70c88 100644 --- a/hw/gpio/gpio_pwr.c +++ b/hw/gpio/gpio_pwr.c @@ -43,7 +43,7 @@ static void gpio_pwr_reset(void *opaque, int n, int level) static void gpio_pwr_shutdown(void *opaque, int n, int level) { if (level) { - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/gpio/meson.build b/hw/gpio/meson.build index 79568f00ce3..7bd6a57264b 100644 --- a/hw/gpio/meson.build +++ b/hw/gpio/meson.build @@ -3,7 +3,6 @@ softmmu_ss.add(when: 'CONFIG_GPIO_KEY', if_true: files('gpio_key.c')) softmmu_ss.add(when: 'CONFIG_GPIO_PWR', if_true: files('gpio_pwr.c')) softmmu_ss.add(when: 'CONFIG_MAX7310', if_true: files('max7310.c')) softmmu_ss.add(when: 'CONFIG_PL061', if_true: files('pl061.c')) -softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_gpio.c')) softmmu_ss.add(when: 'CONFIG_ZAURUS', if_true: files('zaurus.c')) softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_gpio.c')) diff --git a/hw/gpio/pl061.c b/hw/gpio/pl061.c index e72e77572a0..899be861cc5 100644 --- a/hw/gpio/pl061.c +++ b/hw/gpio/pl061.c @@ -6,28 +6,39 @@ * Written by Paul Brook * * This code is licensed under the GPL. + * + * QEMU interface: + * + sysbus MMIO region 0: the device registers + * + sysbus IRQ: the GPIOINTR interrupt line + * + unnamed GPIO inputs 0..7: inputs to connect to the emulated GPIO lines + * + unnamed GPIO outputs 0..7: the emulated GPIO lines, considered as + * outputs + * + QOM property "pullups": an integer defining whether non-floating lines + * configured as inputs should be pulled up to logical 1 (ie whether in + * real hardware they have a pullup resistor on the line out of the PL061). + * This should be an 8-bit value, where bit 0 is 1 if GPIO line 0 should + * be pulled high, bit 1 configures line 1, and so on. The default is 0xff, + * indicating that all GPIO lines are pulled up to logical 1. + * + QOM property "pulldowns": an integer defining whether non-floating lines + * configured as inputs should be pulled down to logical 0 (ie whether in + * real hardware they have a pulldown resistor on the line out of the PL061). + * This should be an 8-bit value, where bit 0 is 1 if GPIO line 0 should + * be pulled low, bit 1 configures line 1, and so on. The default is 0x0. + * It is an error to set a bit in both "pullups" and "pulldowns". If a bit + * is 0 in both, then the line is considered to be floating, and it will + * not have qemu_set_irq() called on it when it is configured as an input. */ #include "qemu/osdep.h" #include "hw/irq.h" #include "hw/sysbus.h" +#include "hw/qdev-properties.h" #include "migration/vmstate.h" +#include "qapi/error.h" #include "qemu/log.h" #include "qemu/module.h" #include "qom/object.h" - -//#define DEBUG_PL061 1 - -#ifdef DEBUG_PL061 -#define DPRINTF(fmt, ...) \ -do { printf("pl061: " fmt , ## __VA_ARGS__); } while (0) -#define BADF(fmt, ...) \ -do { fprintf(stderr, "pl061: error: " fmt , ## __VA_ARGS__); exit(1);} while (0) -#else -#define DPRINTF(fmt, ...) do {} while(0) -#define BADF(fmt, ...) \ -do { fprintf(stderr, "pl061: error: " fmt , ## __VA_ARGS__);} while (0) -#endif +#include "trace.h" static const uint8_t pl061_id[12] = { 0x00, 0x00, 0x00, 0x00, 0x61, 0x10, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1 }; @@ -67,7 +78,9 @@ struct PL061State { qemu_irq irq; qemu_irq out[N_GPIOS]; const unsigned char *id; - uint32_t rsvd_start; /* reserved area: [rsvd_start, 0xfcc] */ + /* Properties, for non-Luminary PL061 */ + uint32_t pullups; + uint32_t pulldowns; }; static const VMStateDescription vmstate_pl061 = { @@ -100,26 +113,75 @@ static const VMStateDescription vmstate_pl061 = { } }; +static uint8_t pl061_floating(PL061State *s) +{ + /* + * Return mask of bits which correspond to pins configured as inputs + * and which are floating (neither pulled up to 1 nor down to 0). + */ + uint8_t floating; + + if (s->id == pl061_id_luminary) { + /* + * If both PUR and PDR bits are clear, there is neither a pullup + * nor a pulldown in place, and the output truly floats. + */ + floating = ~(s->pur | s->pdr); + } else { + floating = ~(s->pullups | s->pulldowns); + } + return floating & ~s->dir; +} + +static uint8_t pl061_pullups(PL061State *s) +{ + /* + * Return mask of bits which correspond to pins configured as inputs + * and which are pulled up to 1. + */ + uint8_t pullups; + + if (s->id == pl061_id_luminary) { + /* + * The Luminary variant of the PL061 has an extra registers which + * the guest can use to configure whether lines should be pullup + * or pulldown. + */ + pullups = s->pur; + } else { + pullups = s->pullups; + } + return pullups & ~s->dir; +} + static void pl061_update(PL061State *s) { uint8_t changed; uint8_t mask; uint8_t out; int i; - - DPRINTF("dir = %d, data = %d\n", s->dir, s->data); - - /* Outputs float high. */ - /* FIXME: This is board dependent. */ - out = (s->data & s->dir) | ~s->dir; + uint8_t pullups = pl061_pullups(s); + uint8_t floating = pl061_floating(s); + + trace_pl061_update(DEVICE(s)->canonical_path, s->dir, s->data, + pullups, floating); + + /* + * Pins configured as output are driven from the data register; + * otherwise if they're pulled up they're 1, and if they're floating + * then we give them the same value they had previously, so we don't + * report any change to the other end. + */ + out = (s->data & s->dir) | pullups | (s->old_out_data & floating); changed = s->old_out_data ^ out; if (changed) { s->old_out_data = out; for (i = 0; i < N_GPIOS; i++) { mask = 1 << i; if (changed & mask) { - DPRINTF("Set output %d = %d\n", i, (out & mask) != 0); - qemu_set_irq(s->out[i], (out & mask) != 0); + int level = (out & mask) != 0; + trace_pl061_set_output(DEVICE(s)->canonical_path, i, level); + qemu_set_irq(s->out[i], level); } } } @@ -131,7 +193,8 @@ static void pl061_update(PL061State *s) for (i = 0; i < N_GPIOS; i++) { mask = 1 << i; if (changed & mask) { - DPRINTF("Changed input %d = %d\n", i, (s->data & mask) != 0); + trace_pl061_input_change(DEVICE(s)->canonical_path, i, + (s->data & mask) != 0); if (!(s->isense & mask)) { /* Edge interrupt */ @@ -150,7 +213,8 @@ static void pl061_update(PL061State *s) /* Level interrupt */ s->istate |= ~(s->data ^ s->iev) & s->isense; - DPRINTF("istate = %02X\n", s->istate); + trace_pl061_update_istate(DEVICE(s)->canonical_path, + s->istate, s->im, (s->istate & s->im) != 0); qemu_set_irq(s->irq, (s->istate & s->im) != 0); } @@ -159,62 +223,114 @@ static uint64_t pl061_read(void *opaque, hwaddr offset, unsigned size) { PL061State *s = (PL061State *)opaque; + uint64_t r = 0; - if (offset < 0x400) { - return s->data & (offset >> 2); - } - if (offset >= s->rsvd_start && offset <= 0xfcc) { - goto err_out; - } - if (offset >= 0xfd0 && offset < 0x1000) { - return s->id[(offset - 0xfd0) >> 2]; - } switch (offset) { + case 0x0 ... 0x3ff: /* Data */ + r = s->data & (offset >> 2); + break; case 0x400: /* Direction */ - return s->dir; + r = s->dir; + break; case 0x404: /* Interrupt sense */ - return s->isense; + r = s->isense; + break; case 0x408: /* Interrupt both edges */ - return s->ibe; + r = s->ibe; + break; case 0x40c: /* Interrupt event */ - return s->iev; + r = s->iev; + break; case 0x410: /* Interrupt mask */ - return s->im; + r = s->im; + break; case 0x414: /* Raw interrupt status */ - return s->istate; + r = s->istate; + break; case 0x418: /* Masked interrupt status */ - return s->istate & s->im; + r = s->istate & s->im; + break; case 0x420: /* Alternate function select */ - return s->afsel; + r = s->afsel; + break; case 0x500: /* 2mA drive */ - return s->dr2r; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->dr2r; + break; case 0x504: /* 4mA drive */ - return s->dr4r; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->dr4r; + break; case 0x508: /* 8mA drive */ - return s->dr8r; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->dr8r; + break; case 0x50c: /* Open drain */ - return s->odr; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->odr; + break; case 0x510: /* Pull-up */ - return s->pur; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->pur; + break; case 0x514: /* Pull-down */ - return s->pdr; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->pdr; + break; case 0x518: /* Slew rate control */ - return s->slr; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->slr; + break; case 0x51c: /* Digital enable */ - return s->den; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->den; + break; case 0x520: /* Lock */ - return s->locked; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->locked; + break; case 0x524: /* Commit */ - return s->cr; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->cr; + break; case 0x528: /* Analog mode select */ - return s->amsel; + if (s->id != pl061_id_luminary) { + goto bad_offset; + } + r = s->amsel; + break; + case 0xfd0 ... 0xfff: /* ID registers */ + r = s->id[(offset - 0xfd0) >> 2]; + break; default: + bad_offset: + qemu_log_mask(LOG_GUEST_ERROR, + "pl061_read: Bad offset %x\n", (int)offset); break; } -err_out: - qemu_log_mask(LOG_GUEST_ERROR, - "pl061_read: Bad offset %x\n", (int)offset); - return 0; + + trace_pl061_read(DEVICE(s)->canonical_path, offset, r); + return r; } static void pl061_write(void *opaque, hwaddr offset, @@ -223,16 +339,14 @@ static void pl061_write(void *opaque, hwaddr offset, PL061State *s = (PL061State *)opaque; uint8_t mask; - if (offset < 0x400) { + trace_pl061_write(DEVICE(s)->canonical_path, offset, value); + + switch (offset) { + case 0 ... 0x3ff: mask = (offset >> 2) & s->dir; s->data = (s->data & ~mask) | (value & mask); pl061_update(s); return; - } - if (offset >= s->rsvd_start) { - goto err_out; - } - switch (offset) { case 0x400: /* Direction */ s->dir = value & 0xff; break; @@ -256,56 +370,99 @@ static void pl061_write(void *opaque, hwaddr offset, s->afsel = (s->afsel & ~mask) | (value & mask); break; case 0x500: /* 2mA drive */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->dr2r = value & 0xff; break; case 0x504: /* 4mA drive */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->dr4r = value & 0xff; break; case 0x508: /* 8mA drive */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->dr8r = value & 0xff; break; case 0x50c: /* Open drain */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->odr = value & 0xff; break; case 0x510: /* Pull-up */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->pur = value & 0xff; break; case 0x514: /* Pull-down */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->pdr = value & 0xff; break; case 0x518: /* Slew rate control */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->slr = value & 0xff; break; case 0x51c: /* Digital enable */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->den = value & 0xff; break; case 0x520: /* Lock */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->locked = (value != 0xacce551); break; case 0x524: /* Commit */ + if (s->id != pl061_id_luminary) { + goto bad_offset; + } if (!s->locked) s->cr = value & 0xff; break; case 0x528: + if (s->id != pl061_id_luminary) { + goto bad_offset; + } s->amsel = value & 0xff; break; default: - goto err_out; + bad_offset: + qemu_log_mask(LOG_GUEST_ERROR, + "pl061_write: Bad offset %x\n", (int)offset); + return; } pl061_update(s); return; -err_out: - qemu_log_mask(LOG_GUEST_ERROR, - "pl061_write: Bad offset %x\n", (int)offset); } -static void pl061_reset(DeviceState *dev) +static void pl061_enter_reset(Object *obj, ResetType type) { - PL061State *s = PL061(dev); + PL061State *s = PL061(obj); + + trace_pl061_reset(DEVICE(s)->canonical_path); /* reset values from PL061 TRM, Stellaris LM3S5P31 & LM3S8962 Data Sheet */ + + /* + * FIXME: For the LM3S6965, not all of the PL061 instances have the + * same reset values for GPIOPUR, GPIOAFSEL and GPIODEN, so in theory + * we should allow the board to configure these via properties. + * In practice, we don't wire anything up to the affected GPIO lines + * (PB7, PC0, PC1, PC2, PC3 -- they're used for JTAG), so we can + * get away with this inaccuracy. + */ s->data = 0; - s->old_out_data = 0; s->old_in_data = 0; s->dir = 0; s->isense = 0; @@ -327,6 +484,24 @@ static void pl061_reset(DeviceState *dev) s->amsel = 0; } +static void pl061_hold_reset(Object *obj) +{ + PL061State *s = PL061(obj); + int i, level; + uint8_t floating = pl061_floating(s); + uint8_t pullups = pl061_pullups(s); + + for (i = 0; i < N_GPIOS; i++) { + if (extract32(floating, i, 1)) { + continue; + } + level = extract32(pullups, i, 1); + trace_pl061_set_output(DEVICE(s)->canonical_path, i, level); + qemu_set_irq(s->out[i], level); + } + s->old_out_data = pullups; +} + static void pl061_set_irq(void * opaque, int irq, int level) { PL061State *s = (PL061State *)opaque; @@ -352,7 +527,6 @@ static void pl061_luminary_init(Object *obj) PL061State *s = PL061(obj); s->id = pl061_id_luminary; - s->rsvd_start = 0x52c; } static void pl061_init(Object *obj) @@ -362,7 +536,6 @@ static void pl061_init(Object *obj) SysBusDevice *sbd = SYS_BUS_DEVICE(obj); s->id = pl061_id; - s->rsvd_start = 0x424; memory_region_init_io(&s->iomem, obj, &pl061_ops, s, "pl061", 0x1000); sysbus_init_mmio(sbd, &s->iomem); @@ -371,12 +544,40 @@ static void pl061_init(Object *obj) qdev_init_gpio_out(dev, s->out, N_GPIOS); } +static void pl061_realize(DeviceState *dev, Error **errp) +{ + PL061State *s = PL061(dev); + + if (s->pullups > 0xff) { + error_setg(errp, "pullups property must be between 0 and 0xff"); + return; + } + if (s->pulldowns > 0xff) { + error_setg(errp, "pulldowns property must be between 0 and 0xff"); + return; + } + if (s->pullups & s->pulldowns) { + error_setg(errp, "no bit may be set both in pullups and pulldowns"); + return; + } +} + +static Property pl061_props[] = { + DEFINE_PROP_UINT32("pullups", PL061State, pullups, 0xff), + DEFINE_PROP_UINT32("pulldowns", PL061State, pulldowns, 0x0), + DEFINE_PROP_END_OF_LIST() +}; + static void pl061_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); dc->vmsd = &vmstate_pl061; - dc->reset = &pl061_reset; + dc->realize = pl061_realize; + device_class_set_props(dc, pl061_props); + rc->phases.enter = pl061_enter_reset; + rc->phases.hold = pl061_hold_reset; } static const TypeInfo pl061_info = { diff --git a/hw/gpio/puv3_gpio.c b/hw/gpio/puv3_gpio.c deleted file mode 100644 index e003ae505cf..00000000000 --- a/hw/gpio/puv3_gpio.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * GPIO device simulation in PKUnity SoC - * - * Copyright (C) 2010-2012 Guan Xuetao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation, or any later version. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "qom/object.h" - -#undef DEBUG_PUV3 -#include "hw/unicore32/puv3.h" -#include "qemu/module.h" -#include "qemu/log.h" - -#define TYPE_PUV3_GPIO "puv3_gpio" -OBJECT_DECLARE_SIMPLE_TYPE(PUV3GPIOState, PUV3_GPIO) - -struct PUV3GPIOState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - qemu_irq irq[9]; - - uint32_t reg_GPLR; - uint32_t reg_GPDR; - uint32_t reg_GPIR; -}; - -static uint64_t puv3_gpio_read(void *opaque, hwaddr offset, - unsigned size) -{ - PUV3GPIOState *s = opaque; - uint32_t ret = 0; - - switch (offset) { - case 0x00: - ret = s->reg_GPLR; - break; - case 0x04: - ret = s->reg_GPDR; - break; - case 0x20: - ret = s->reg_GPIR; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad read offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } - DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); - - return ret; -} - -static void puv3_gpio_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) -{ - PUV3GPIOState *s = opaque; - - DPRINTF("offset 0x%x, value 0x%x\n", offset, value); - switch (offset) { - case 0x04: - s->reg_GPDR = value; - break; - case 0x08: - if (s->reg_GPDR & value) { - s->reg_GPLR |= value; - } else { - qemu_log_mask(LOG_GUEST_ERROR, "%s: Write gpio input port\n", - __func__); - } - break; - case 0x0c: - if (s->reg_GPDR & value) { - s->reg_GPLR &= ~value; - } else { - qemu_log_mask(LOG_GUEST_ERROR, "%s: Write gpio input port\n", - __func__); - } - break; - case 0x10: /* GRER */ - case 0x14: /* GFER */ - case 0x18: /* GEDR */ - break; - case 0x20: /* GPIR */ - s->reg_GPIR = value; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad write offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } -} - -static const MemoryRegionOps puv3_gpio_ops = { - .read = puv3_gpio_read, - .write = puv3_gpio_write, - .impl = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void puv3_gpio_realize(DeviceState *dev, Error **errp) -{ - PUV3GPIOState *s = PUV3_GPIO(dev); - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - - s->reg_GPLR = 0; - s->reg_GPDR = 0; - - /* FIXME: these irqs not handled yet */ - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW0]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW1]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW2]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW3]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW4]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW5]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW6]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW7]); - sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOHIGH]); - - memory_region_init_io(&s->iomem, OBJECT(s), &puv3_gpio_ops, s, "puv3_gpio", - PUV3_REGS_OFFSET); - sysbus_init_mmio(sbd, &s->iomem); -} - -static void puv3_gpio_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = puv3_gpio_realize; -} - -static const TypeInfo puv3_gpio_info = { - .name = TYPE_PUV3_GPIO, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(PUV3GPIOState), - .class_init = puv3_gpio_class_init, -}; - -static void puv3_gpio_register_type(void) -{ - type_register_static(&puv3_gpio_info); -} - -type_init(puv3_gpio_register_type) diff --git a/hw/gpio/trace-events b/hw/gpio/trace-events index 46ab9323bd0..1dab99c5604 100644 --- a/hw/gpio/trace-events +++ b/hw/gpio/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # npcm7xx_gpio.c npcm7xx_gpio_read(const char *id, uint64_t offset, uint64_t value) " %s offset: 0x%04" PRIx64 " value 0x%08" PRIx64 @@ -13,6 +13,15 @@ nrf51_gpio_write(uint64_t offset, uint64_t value) "offset 0x%" PRIx64 " value 0x nrf51_gpio_set(int64_t line, int64_t value) "line %" PRIi64 " value %" PRIi64 nrf51_gpio_update_output_irq(int64_t line, int64_t value) "line %" PRIi64 " value %" PRIi64 +# pl061.c +pl061_update(const char *id, uint32_t dir, uint32_t data, uint32_t pullups, uint32_t floating) "%s GPIODIR 0x%x GPIODATA 0x%x pullups 0x%x floating 0x%x" +pl061_set_output(const char *id, int gpio, int level) "%s setting output %d to %d" +pl061_input_change(const char *id, int gpio, int level) "%s input %d changed to %d" +pl061_update_istate(const char *id, uint32_t istate, uint32_t im, int level) "%s GPIORIS 0x%x GPIOIE 0x%x interrupt level %d" +pl061_read(const char *id, uint64_t offset, uint64_t r) "%s offset 0x%" PRIx64 " value 0x%" PRIx64 +pl061_write(const char *id, uint64_t offset, uint64_t value) "%s offset 0x%" PRIx64 " value 0x%" PRIx64 +pl061_reset(const char *id) "%s reset" + # sifive_gpio.c sifive_gpio_read(uint64_t offset, uint64_t r) "offset 0x%" PRIx64 " value 0x%" PRIx64 sifive_gpio_write(uint64_t offset, uint64_t value) "offset 0x%" PRIx64 " value 0x%" PRIx64 diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c index 5b82c9440d1..eab96dd84ef 100644 --- a/hw/hppa/dino.c +++ b/hw/hppa/dino.c @@ -14,13 +14,11 @@ #include "qemu/module.h" #include "qemu/units.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/irq.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "migration/vmstate.h" #include "hppa_sys.h" -#include "exec/address-spaces.h" #include "trace.h" #include "qom/object.h" diff --git a/hw/hppa/lasi.c b/hw/hppa/lasi.c index 1a856579484..88c3791eb68 100644 --- a/hw/hppa/lasi.c +++ b/hw/hppa/lasi.c @@ -13,9 +13,7 @@ #include "qemu/units.h" #include "qemu/log.h" #include "qapi/error.h" -#include "cpu.h" #include "trace.h" -#include "hw/hw.h" #include "hw/irq.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" @@ -24,7 +22,6 @@ #include "hw/char/parallel.h" #include "hw/char/serial.h" #include "hw/input/lasips2.h" -#include "exec/address-spaces.h" #include "migration/vmstate.h" #include "qom/object.h" diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index f2b71db9bd7..2a46af5bc9b 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -9,7 +9,6 @@ #include "cpu.h" #include "elf.h" #include "hw/loader.h" -#include "hw/boards.h" #include "qemu/error-report.h" #include "sysemu/reset.h" #include "sysemu/sysemu.h" diff --git a/hw/hppa/trace-events b/hw/hppa/trace-events index 3ff620319a6..3f42be9056f 100644 --- a/hw/hppa/trace-events +++ b/hw/hppa/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # pci.c hppa_pci_iack_write(void) "" diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c index 984caf898dc..dbce3b35fba 100644 --- a/hw/hyperv/vmbus.c +++ b/hw/hyperv/vmbus.c @@ -2372,6 +2372,14 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp) assert(!qemu_uuid_is_null(&vdev->instanceid)); + if (!qemu_uuid_is_null(&vdc->instanceid)) { + /* Class wants to only have a single instance with a fixed UUID */ + if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) { + error_setg(&err, "instance id can't be changed"); + goto error_out; + } + } + /* Check for instance id collision for this class id */ QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) { VMBusDevice *child_dev = VMBUS_DEVICE(child->child); @@ -2438,18 +2446,22 @@ static void vmbus_dev_unrealize(DeviceState *dev) free_channels(vdev); } +static Property vmbus_dev_props[] = { + DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid), + DEFINE_PROP_END_OF_LIST() +}; + + static void vmbus_dev_class_init(ObjectClass *klass, void *data) { DeviceClass *kdev = DEVICE_CLASS(klass); + device_class_set_props(kdev, vmbus_dev_props); kdev->bus_type = TYPE_VMBUS; kdev->realize = vmbus_dev_realize; kdev->unrealize = vmbus_dev_unrealize; kdev->reset = vmbus_dev_reset; } -static Property vmbus_dev_instanceid = - DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid); - static void vmbus_dev_instance_init(Object *obj) { VMBusDevice *vdev = VMBUS_DEVICE(obj); @@ -2458,8 +2470,6 @@ static void vmbus_dev_instance_init(Object *obj) if (!qemu_uuid_is_null(&vdc->instanceid)) { /* Class wants to only have a single instance with a fixed UUID */ vdev->instanceid = vdc->instanceid; - } else { - qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid); } } @@ -2719,7 +2729,7 @@ static void vmbus_bridge_realize(DeviceState *dev, Error **errp) return; } - bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus")); + bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus")); } static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev) diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig index 09642a6dcb7..8217cb50411 100644 --- a/hw/i2c/Kconfig +++ b/hw/i2c/Kconfig @@ -28,3 +28,11 @@ config IMX_I2C config MPC_I2C bool select I2C + +config PCA954X + bool + select I2C + +config PMBUS + bool + select SMBUS diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c index 518a3f5c6f9..03a4f5a9101 100644 --- a/hw/i2c/aspeed_i2c.c +++ b/hw/i2c/aspeed_i2c.c @@ -601,7 +601,7 @@ static void aspeed_i2c_bus_write(void *opaque, hwaddr offset, break; } - bus->dma_addr = value & 0xfffffffc; + bus->dma_addr = value & 0x3ffffffc; break; case I2CD_DMA_LEN: @@ -740,20 +740,20 @@ static const VMStateDescription aspeed_i2c_vmstate = { static void aspeed_i2c_reset(DeviceState *dev) { - int i; AspeedI2CState *s = ASPEED_I2C(dev); - AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(s); s->intr_status = 0; +} + +static void aspeed_i2c_instance_init(Object *obj) +{ + AspeedI2CState *s = ASPEED_I2C(obj); + AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(s); + int i; for (i = 0; i < aic->num_busses; i++) { - s->busses[i].intr_ctrl = 0; - s->busses[i].intr_status = 0; - s->busses[i].cmd = 0; - s->busses[i].buf = 0; - s->busses[i].dma_addr = 0; - s->busses[i].dma_len = 0; - i2c_end_transfer(s->busses[i].bus); + object_initialize_child(obj, "bus[*]", &s->busses[i], + TYPE_ASPEED_I2C_BUS); } } @@ -791,17 +791,21 @@ static void aspeed_i2c_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(sbd, &s->iomem); for (i = 0; i < aic->num_busses; i++) { - char name[32]; + Object *bus = OBJECT(&s->busses[i]); int offset = i < aic->gap ? 1 : 5; - sysbus_init_irq(sbd, &s->busses[i].irq); - snprintf(name, sizeof(name), "aspeed.i2c.%d", i); - s->busses[i].controller = s; - s->busses[i].id = i; - s->busses[i].bus = i2c_init_bus(dev, name); - memory_region_init_io(&s->busses[i].mr, OBJECT(dev), - &aspeed_i2c_bus_ops, &s->busses[i], name, - aic->reg_size); + if (!object_property_set_link(bus, "controller", OBJECT(s), errp)) { + return; + } + + if (!object_property_set_uint(bus, "bus-id", i, errp)) { + return; + } + + if (!sysbus_realize(SYS_BUS_DEVICE(bus), errp)) { + return; + } + memory_region_add_subregion(&s->iomem, aic->reg_size * (i + offset), &s->busses[i].mr); } @@ -816,7 +820,8 @@ static void aspeed_i2c_realize(DeviceState *dev, Error **errp) return; } - address_space_init(&s->dram_as, s->dram_mr, "dma-dram"); + address_space_init(&s->dram_as, s->dram_mr, + TYPE_ASPEED_I2C "-dma-dram"); } } @@ -840,12 +845,72 @@ static void aspeed_i2c_class_init(ObjectClass *klass, void *data) static const TypeInfo aspeed_i2c_info = { .name = TYPE_ASPEED_I2C, .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = aspeed_i2c_instance_init, .instance_size = sizeof(AspeedI2CState), .class_init = aspeed_i2c_class_init, .class_size = sizeof(AspeedI2CClass), .abstract = true, }; +static void aspeed_i2c_bus_reset(DeviceState *dev) +{ + AspeedI2CBus *s = ASPEED_I2C_BUS(dev); + + s->intr_ctrl = 0; + s->intr_status = 0; + s->cmd = 0; + s->buf = 0; + s->dma_addr = 0; + s->dma_len = 0; + i2c_end_transfer(s->bus); +} + +static void aspeed_i2c_bus_realize(DeviceState *dev, Error **errp) +{ + AspeedI2CBus *s = ASPEED_I2C_BUS(dev); + AspeedI2CClass *aic; + g_autofree char *name = g_strdup_printf(TYPE_ASPEED_I2C_BUS ".%d", s->id); + + if (!s->controller) { + error_setg(errp, TYPE_ASPEED_I2C_BUS ": 'controller' link not set"); + return; + } + + aic = ASPEED_I2C_GET_CLASS(s->controller); + + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); + + s->bus = i2c_init_bus(dev, name); + + memory_region_init_io(&s->mr, OBJECT(s), &aspeed_i2c_bus_ops, + s, name, aic->reg_size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mr); +} + +static Property aspeed_i2c_bus_properties[] = { + DEFINE_PROP_UINT8("bus-id", AspeedI2CBus, id, 0), + DEFINE_PROP_LINK("controller", AspeedI2CBus, controller, TYPE_ASPEED_I2C, + AspeedI2CState *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void aspeed_i2c_bus_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "Aspeed I2C Bus"; + dc->realize = aspeed_i2c_bus_realize; + dc->reset = aspeed_i2c_bus_reset; + device_class_set_props(dc, aspeed_i2c_bus_properties); +} + +static const TypeInfo aspeed_i2c_bus_info = { + .name = TYPE_ASPEED_I2C_BUS, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AspeedI2CBus), + .class_init = aspeed_i2c_bus_class_init, +}; + static qemu_irq aspeed_2400_i2c_bus_get_irq(AspeedI2CBus *bus) { return bus->controller->irq; @@ -950,6 +1015,7 @@ static const TypeInfo aspeed_2600_i2c_info = { static void aspeed_i2c_register_types(void) { + type_register_static(&aspeed_i2c_bus_info); type_register_static(&aspeed_i2c_info); type_register_static(&aspeed_2400_i2c_info); type_register_static(&aspeed_2500_i2c_info); diff --git a/hw/i2c/core.c b/hw/i2c/core.c index 21ec52ac5ad..0e7d2763b9e 100644 --- a/hw/i2c/core.c +++ b/hw/i2c/core.c @@ -60,13 +60,13 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) { I2CBus *bus; - bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); + bus = I2C_BUS(qbus_new(TYPE_I2C_BUS, parent, name)); QLIST_INIT(&bus->current_devs); vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); return bus; } -void i2c_set_slave_address(I2CSlave *dev, uint8_t address) +void i2c_slave_set_address(I2CSlave *dev, uint8_t address) { dev->address = address; } @@ -77,6 +77,30 @@ int i2c_bus_busy(I2CBus *bus) return !QLIST_EMPTY(&bus->current_devs); } +bool i2c_scan_bus(I2CBus *bus, uint8_t address, bool broadcast, + I2CNodeList *current_devs) +{ + BusChild *kid; + + QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) { + DeviceState *qdev = kid->child; + I2CSlave *candidate = I2C_SLAVE(qdev); + I2CSlaveClass *sc = I2C_SLAVE_GET_CLASS(candidate); + + if (sc->match_and_add(candidate, address, broadcast, current_devs)) { + if (!broadcast) { + return true; + } + } + } + + /* + * If broadcast was true, and the list was full or empty, return true. If + * broadcast was false, return false. + */ + return broadcast; +} + /* TODO: Make this handle multiple masters. */ /* * Start or continue an i2c transaction. When this is called for the @@ -90,10 +114,12 @@ int i2c_bus_busy(I2CBus *bus) * protocol uses a start transfer to switch from write to read mode * without releasing the bus. If that fails, the bus is still * in a transaction. + * + * @event must be I2C_START_RECV or I2C_START_SEND. */ -int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv) +static int i2c_do_start_transfer(I2CBus *bus, uint8_t address, + enum i2c_event event) { - BusChild *kid; I2CSlaveClass *sc; I2CNode *node; bool bus_scanned = false; @@ -115,18 +141,8 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv) * terminating the previous transaction. */ if (QLIST_EMPTY(&bus->current_devs)) { - QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) { - DeviceState *qdev = kid->child; - I2CSlave *candidate = I2C_SLAVE(qdev); - if ((candidate->address == address) || (bus->broadcast)) { - node = g_malloc(sizeof(struct I2CNode)); - node->elt = candidate; - QLIST_INSERT_HEAD(&bus->current_devs, node, next); - if (!bus->broadcast) { - break; - } - } - } + /* Disregard whether devices were found. */ + (void)i2c_scan_bus(bus, address, bus->broadcast, &bus->current_devs); bus_scanned = true; } @@ -144,7 +160,7 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv) if (sc->event) { trace_i2c_event("start", s->address); - rv = sc->event(s, recv ? I2C_START_RECV : I2C_START_SEND); + rv = sc->event(s, event); if (rv && !bus->broadcast) { if (bus_scanned) { /* First call, terminate the transfer. */ @@ -157,6 +173,23 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv) return 0; } +int i2c_start_transfer(I2CBus *bus, uint8_t address, bool is_recv) +{ + return i2c_do_start_transfer(bus, address, is_recv + ? I2C_START_RECV + : I2C_START_SEND); +} + +int i2c_start_recv(I2CBus *bus, uint8_t address) +{ + return i2c_do_start_transfer(bus, address, I2C_START_RECV); +} + +int i2c_start_send(I2CBus *bus, uint8_t address) +{ + return i2c_do_start_transfer(bus, address, I2C_START_SEND); +} + void i2c_end_transfer(I2CBus *bus) { I2CSlaveClass *sc; @@ -175,50 +208,42 @@ void i2c_end_transfer(I2CBus *bus) bus->broadcast = false; } -int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send) +int i2c_send(I2CBus *bus, uint8_t data) { I2CSlaveClass *sc; I2CSlave *s; I2CNode *node; int ret = 0; - if (send) { - QLIST_FOREACH(node, &bus->current_devs, next) { - s = node->elt; - sc = I2C_SLAVE_GET_CLASS(s); - if (sc->send) { - trace_i2c_send(s->address, *data); - ret = ret || sc->send(s, *data); - } else { - ret = -1; - } - } - return ret ? -1 : 0; - } else { - ret = 0xff; - if (!QLIST_EMPTY(&bus->current_devs) && !bus->broadcast) { - sc = I2C_SLAVE_GET_CLASS(QLIST_FIRST(&bus->current_devs)->elt); - if (sc->recv) { - s = QLIST_FIRST(&bus->current_devs)->elt; - ret = sc->recv(s); - trace_i2c_recv(s->address, ret); - } + QLIST_FOREACH(node, &bus->current_devs, next) { + s = node->elt; + sc = I2C_SLAVE_GET_CLASS(s); + if (sc->send) { + trace_i2c_send(s->address, data); + ret = ret || sc->send(s, data); + } else { + ret = -1; } - *data = ret; - return 0; } -} -int i2c_send(I2CBus *bus, uint8_t data) -{ - return i2c_send_recv(bus, &data, true); + return ret ? -1 : 0; } uint8_t i2c_recv(I2CBus *bus) { uint8_t data = 0xff; + I2CSlaveClass *sc; + I2CSlave *s; + + if (!QLIST_EMPTY(&bus->current_devs) && !bus->broadcast) { + sc = I2C_SLAVE_GET_CLASS(QLIST_FIRST(&bus->current_devs)->elt); + if (sc->recv) { + s = QLIST_FIRST(&bus->current_devs)->elt; + data = sc->recv(s); + trace_i2c_recv(s->address, data); + } + } - i2c_send_recv(bus, &data, false); return data; } @@ -290,12 +315,28 @@ I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t addr) return dev; } +static bool i2c_slave_match(I2CSlave *candidate, uint8_t address, + bool broadcast, I2CNodeList *current_devs) +{ + if ((candidate->address == address) || (broadcast)) { + I2CNode *node = g_malloc(sizeof(struct I2CNode)); + node->elt = candidate; + QLIST_INSERT_HEAD(current_devs, node, next); + return true; + } + + /* Not found and not broadcast. */ + return false; +} + static void i2c_slave_class_init(ObjectClass *klass, void *data) { DeviceClass *k = DEVICE_CLASS(klass); + I2CSlaveClass *sc = I2C_SLAVE_CLASS(klass); set_bit(DEVICE_CATEGORY_MISC, k->categories); k->bus_type = TYPE_I2C_BUS; device_class_set_props(k, i2c_props); + sc->match_and_add = i2c_slave_match; } static const TypeInfo i2c_slave_type_info = { diff --git a/hw/i2c/i2c_mux_pca954x.c b/hw/i2c/i2c_mux_pca954x.c new file mode 100644 index 00000000000..847c59921cf --- /dev/null +++ b/hw/i2c/i2c_mux_pca954x.c @@ -0,0 +1,290 @@ +/* + * I2C multiplexer for PCA954x series of I2C multiplexer/switch chips. + * + * Copyright 2021 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/i2c/i2c.h" +#include "hw/i2c/i2c_mux_pca954x.h" +#include "hw/i2c/smbus_slave.h" +#include "hw/qdev-core.h" +#include "hw/sysbus.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/queue.h" +#include "qom/object.h" +#include "trace.h" + +#define PCA9548_CHANNEL_COUNT 8 +#define PCA9546_CHANNEL_COUNT 4 + +/* + * struct Pca954xChannel - The i2c mux device will have N of these states + * that own the i2c channel bus. + * @bus: The owned channel bus. + * @enabled: Is this channel active? + */ +typedef struct Pca954xChannel { + SysBusDevice parent; + + I2CBus *bus; + + bool enabled; +} Pca954xChannel; + +#define TYPE_PCA954X_CHANNEL "pca954x-channel" +#define PCA954X_CHANNEL(obj) \ + OBJECT_CHECK(Pca954xChannel, (obj), TYPE_PCA954X_CHANNEL) + +/* + * struct Pca954xState - The pca954x state object. + * @control: The value written to the mux control. + * @channel: The set of i2c channel buses that act as channels which own the + * i2c children. + */ +typedef struct Pca954xState { + SMBusDevice parent; + + uint8_t control; + + /* The channel i2c buses. */ + Pca954xChannel channel[PCA9548_CHANNEL_COUNT]; +} Pca954xState; + +/* + * struct Pca954xClass - The pca954x class object. + * @nchans: The number of i2c channels this device has. + */ +typedef struct Pca954xClass { + SMBusDeviceClass parent; + + uint8_t nchans; +} Pca954xClass; + +#define TYPE_PCA954X "pca954x" +OBJECT_DECLARE_TYPE(Pca954xState, Pca954xClass, PCA954X) + +/* + * For each channel, if it's enabled, recursively call match on those children. + */ +static bool pca954x_match(I2CSlave *candidate, uint8_t address, + bool broadcast, + I2CNodeList *current_devs) +{ + Pca954xState *mux = PCA954X(candidate); + Pca954xClass *mc = PCA954X_GET_CLASS(mux); + int i; + + /* They are talking to the mux itself (or all devices enabled). */ + if ((candidate->address == address) || broadcast) { + I2CNode *node = g_malloc(sizeof(struct I2CNode)); + node->elt = candidate; + QLIST_INSERT_HEAD(current_devs, node, next); + if (!broadcast) { + return true; + } + } + + for (i = 0; i < mc->nchans; i++) { + if (!mux->channel[i].enabled) { + continue; + } + + if (i2c_scan_bus(mux->channel[i].bus, address, broadcast, + current_devs)) { + if (!broadcast) { + return true; + } + } + } + + /* If we arrived here we didn't find a match, return broadcast. */ + return broadcast; +} + +static void pca954x_enable_channel(Pca954xState *s, uint8_t enable_mask) +{ + Pca954xClass *mc = PCA954X_GET_CLASS(s); + int i; + + /* + * For each channel, check if their bit is set in enable_mask and if yes, + * enable it, otherwise disable, hide it. + */ + for (i = 0; i < mc->nchans; i++) { + if (enable_mask & (1 << i)) { + s->channel[i].enabled = true; + } else { + s->channel[i].enabled = false; + } + } +} + +static void pca954x_write(Pca954xState *s, uint8_t data) +{ + s->control = data; + pca954x_enable_channel(s, data); + + trace_pca954x_write_bytes(data); +} + +static int pca954x_write_data(SMBusDevice *d, uint8_t *buf, uint8_t len) +{ + Pca954xState *s = PCA954X(d); + + if (len == 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__); + return -1; + } + + /* + * len should be 1, because they write one byte to enable/disable channels. + */ + if (len > 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: extra data after channel selection mask\n", + __func__); + return -1; + } + + pca954x_write(s, buf[0]); + return 0; +} + +static uint8_t pca954x_read_byte(SMBusDevice *d) +{ + Pca954xState *s = PCA954X(d); + uint8_t data = s->control; + trace_pca954x_read_data(data); + return data; +} + +static void pca954x_enter_reset(Object *obj, ResetType type) +{ + Pca954xState *s = PCA954X(obj); + /* Reset will disable all channels. */ + pca954x_write(s, 0); +} + +I2CBus *pca954x_i2c_get_bus(I2CSlave *mux, uint8_t channel) +{ + Pca954xClass *pc = PCA954X_GET_CLASS(mux); + Pca954xState *pca954x = PCA954X(mux); + + g_assert(channel < pc->nchans); + return I2C_BUS(qdev_get_child_bus(DEVICE(&pca954x->channel[channel]), + "i2c-bus")); +} + +static void pca954x_channel_init(Object *obj) +{ + Pca954xChannel *s = PCA954X_CHANNEL(obj); + s->bus = i2c_init_bus(DEVICE(s), "i2c-bus"); + + /* Start all channels as disabled. */ + s->enabled = false; +} + +static void pca954x_channel_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->desc = "Pca954x Channel"; +} + +static void pca9546_class_init(ObjectClass *klass, void *data) +{ + Pca954xClass *s = PCA954X_CLASS(klass); + s->nchans = PCA9546_CHANNEL_COUNT; +} + +static void pca9548_class_init(ObjectClass *klass, void *data) +{ + Pca954xClass *s = PCA954X_CLASS(klass); + s->nchans = PCA9548_CHANNEL_COUNT; +} + +static void pca954x_realize(DeviceState *dev, Error **errp) +{ + Pca954xState *s = PCA954X(dev); + Pca954xClass *c = PCA954X_GET_CLASS(s); + int i; + + /* SMBus modules. Cannot fail. */ + for (i = 0; i < c->nchans; i++) { + sysbus_realize(SYS_BUS_DEVICE(&s->channel[i]), &error_abort); + } +} + +static void pca954x_init(Object *obj) +{ + Pca954xState *s = PCA954X(obj); + Pca954xClass *c = PCA954X_GET_CLASS(obj); + int i; + + /* Only initialize the children we expect. */ + for (i = 0; i < c->nchans; i++) { + object_initialize_child(obj, "channel[*]", &s->channel[i], + TYPE_PCA954X_CHANNEL); + } +} + +static void pca954x_class_init(ObjectClass *klass, void *data) +{ + I2CSlaveClass *sc = I2C_SLAVE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + SMBusDeviceClass *k = SMBUS_DEVICE_CLASS(klass); + + sc->match_and_add = pca954x_match; + + rc->phases.enter = pca954x_enter_reset; + + dc->desc = "Pca954x i2c-mux"; + dc->realize = pca954x_realize; + + k->write_data = pca954x_write_data; + k->receive_byte = pca954x_read_byte; +} + +static const TypeInfo pca954x_info[] = { + { + .name = TYPE_PCA954X, + .parent = TYPE_SMBUS_DEVICE, + .instance_size = sizeof(Pca954xState), + .instance_init = pca954x_init, + .class_size = sizeof(Pca954xClass), + .class_init = pca954x_class_init, + .abstract = true, + }, + { + .name = TYPE_PCA9546, + .parent = TYPE_PCA954X, + .class_init = pca9546_class_init, + }, + { + .name = TYPE_PCA9548, + .parent = TYPE_PCA954X, + .class_init = pca9548_class_init, + }, + { + .name = TYPE_PCA954X_CHANNEL, + .parent = TYPE_SYS_BUS_DEVICE, + .class_init = pca954x_channel_class_init, + .instance_size = sizeof(Pca954xChannel), + .instance_init = pca954x_channel_init, + } +}; + +DEFINE_TYPES(pca954x_info) diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c index 2e02e1c4faa..9792583fea7 100644 --- a/hw/i2c/imx_i2c.c +++ b/hw/i2c/imx_i2c.c @@ -171,7 +171,7 @@ static void imx_i2c_write(void *opaque, hwaddr offset, switch (offset) { case IADR_ADDR: s->iadr = value & IADR_MASK; - /* i2c_set_slave_address(s->bus, (uint8_t)s->iadr); */ + /* i2c_slave_set_address(s->bus, (uint8_t)s->iadr); */ break; case IFDR_ADDR: s->ifdr = value & IFDR_MASK; diff --git a/hw/i2c/meson.build b/hw/i2c/meson.build index cdcd694a7fb..d3df273251f 100644 --- a/hw/i2c/meson.build +++ b/hw/i2c/meson.build @@ -14,4 +14,6 @@ i2c_ss.add(when: 'CONFIG_SMBUS_EEPROM', if_true: files('smbus_eeprom.c')) i2c_ss.add(when: 'CONFIG_VERSATILE_I2C', if_true: files('versatile_i2c.c')) i2c_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_i2c.c')) i2c_ss.add(when: 'CONFIG_PPC4XX', if_true: files('ppc4xx_i2c.c')) +i2c_ss.add(when: 'CONFIG_PCA954X', if_true: files('i2c_mux_pca954x.c')) +i2c_ss.add(when: 'CONFIG_PMBUS', if_true: files('pmbus_device.c')) softmmu_ss.add_all(when: 'CONFIG_I2C', if_true: i2c_ss) diff --git a/hw/i2c/mpc_i2c.c b/hw/i2c/mpc_i2c.c index 720d2331e95..845392505ff 100644 --- a/hw/i2c/mpc_i2c.c +++ b/hw/i2c/mpc_i2c.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "hw/i2c/i2c.h" #include "hw/irq.h" -#include "qemu/log.h" #include "qemu/module.h" #include "hw/sysbus.h" #include "migration/vmstate.h" diff --git a/hw/i2c/pm_smbus.c b/hw/i2c/pm_smbus.c index 06e1e5321b9..d7eae548cbc 100644 --- a/hw/i2c/pm_smbus.c +++ b/hw/i2c/pm_smbus.c @@ -128,14 +128,14 @@ static void smb_transaction(PMSMBus *s) * So at least Linux may or may not set the read bit here. * So just ignore the read bit for this command. */ - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { goto error; } ret = i2c_send(bus, s->smb_data1); if (ret) { goto error; } - if (i2c_start_transfer(bus, addr, 1)) { + if (i2c_start_recv(bus, addr)) { goto error; } s->in_i2c_block_read = true; diff --git a/hw/i2c/pmbus_device.c b/hw/i2c/pmbus_device.c new file mode 100644 index 00000000000..24f8f522d9f --- /dev/null +++ b/hw/i2c/pmbus_device.c @@ -0,0 +1,1612 @@ +/* + * PMBus wrapper over SMBus + * + * Copyright 2021 Google LLC + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include +#include +#include "hw/i2c/pmbus_device.h" +#include "migration/vmstate.h" +#include "qemu/module.h" +#include "qemu/log.h" + +uint16_t pmbus_data2direct_mode(PMBusCoefficients c, uint32_t value) +{ + /* R is usually negative to fit large readings into 16 bits */ + uint16_t y = (c.m * value + c.b) * pow(10, c.R); + return y; +} + +uint32_t pmbus_direct_mode2data(PMBusCoefficients c, uint16_t value) +{ + /* X = (Y * 10^-R - b) / m */ + uint32_t x = (value / pow(10, c.R) - c.b) / c.m; + return x; +} + +void pmbus_send(PMBusDevice *pmdev, const uint8_t *data, uint16_t len) +{ + if (pmdev->out_buf_len + len > SMBUS_DATA_MAX_LEN) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMBus device tried to send too much data"); + len = 0; + } + + for (int i = len - 1; i >= 0; i--) { + pmdev->out_buf[i + pmdev->out_buf_len] = data[len - i - 1]; + } + pmdev->out_buf_len += len; +} + +/* Internal only, convert unsigned ints to the little endian bus */ +static void pmbus_send_uint(PMBusDevice *pmdev, uint64_t data, uint8_t size) +{ + uint8_t bytes[8]; + g_assert(size <= 8); + + for (int i = 0; i < size; i++) { + bytes[i] = data & 0xFF; + data = data >> 8; + } + pmbus_send(pmdev, bytes, size); +} + +void pmbus_send8(PMBusDevice *pmdev, uint8_t data) +{ + pmbus_send_uint(pmdev, data, 1); +} + +void pmbus_send16(PMBusDevice *pmdev, uint16_t data) +{ + pmbus_send_uint(pmdev, data, 2); +} + +void pmbus_send32(PMBusDevice *pmdev, uint32_t data) +{ + pmbus_send_uint(pmdev, data, 4); +} + +void pmbus_send64(PMBusDevice *pmdev, uint64_t data) +{ + pmbus_send_uint(pmdev, data, 8); +} + +void pmbus_send_string(PMBusDevice *pmdev, const char *data) +{ + size_t len = strlen(data); + g_assert(len > 0); + g_assert(len + pmdev->out_buf_len < SMBUS_DATA_MAX_LEN); + pmdev->out_buf[len + pmdev->out_buf_len] = len; + + for (int i = len - 1; i >= 0; i--) { + pmdev->out_buf[i + pmdev->out_buf_len] = data[len - 1 - i]; + } + pmdev->out_buf_len += len + 1; +} + + +static uint64_t pmbus_receive_uint(const uint8_t *buf, uint8_t len) +{ + uint64_t ret = 0; + + /* Exclude command code from return value */ + buf++; + len--; + + for (int i = len - 1; i >= 0; i--) { + ret = ret << 8 | buf[i]; + } + return ret; +} + +uint8_t pmbus_receive8(PMBusDevice *pmdev) +{ + if (pmdev->in_buf_len - 1 != 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: length mismatch. Expected 1 byte, got %d bytes\n", + __func__, pmdev->in_buf_len - 1); + } + return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len); +} + +uint16_t pmbus_receive16(PMBusDevice *pmdev) +{ + if (pmdev->in_buf_len - 1 != 2) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: length mismatch. Expected 2 bytes, got %d bytes\n", + __func__, pmdev->in_buf_len - 1); + } + return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len); +} + +uint32_t pmbus_receive32(PMBusDevice *pmdev) +{ + if (pmdev->in_buf_len - 1 != 4) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: length mismatch. Expected 4 bytes, got %d bytes\n", + __func__, pmdev->in_buf_len - 1); + } + return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len); +} + +uint64_t pmbus_receive64(PMBusDevice *pmdev) +{ + if (pmdev->in_buf_len - 1 != 8) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: length mismatch. Expected 8 bytes, got %d bytes\n", + __func__, pmdev->in_buf_len - 1); + } + return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len); +} + +static uint8_t pmbus_out_buf_pop(PMBusDevice *pmdev) +{ + if (pmdev->out_buf_len == 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: tried to read from empty buffer", + __func__); + return 0xFF; + } + uint8_t data = pmdev->out_buf[pmdev->out_buf_len - 1]; + pmdev->out_buf_len--; + return data; +} + +static void pmbus_quick_cmd(SMBusDevice *smd, uint8_t read) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(smd); + PMBusDeviceClass *pmdc = PMBUS_DEVICE_GET_CLASS(pmdev); + + if (pmdc->quick_cmd) { + pmdc->quick_cmd(pmdev, read); + } +} + +static void pmbus_pages_alloc(PMBusDevice *pmdev) +{ + /* some PMBus devices don't use the PAGE command, so they get 1 page */ + PMBusDeviceClass *k = PMBUS_DEVICE_GET_CLASS(pmdev); + if (k->device_num_pages == 0) { + k->device_num_pages = 1; + } + pmdev->num_pages = k->device_num_pages; + pmdev->pages = g_new0(PMBusPage, k->device_num_pages); +} + +void pmbus_check_limits(PMBusDevice *pmdev) +{ + for (int i = 0; i < pmdev->num_pages; i++) { + if ((pmdev->pages[i].operation & PB_OP_ON) == 0) { + continue; /* don't check powered off devices */ + } + + if (pmdev->pages[i].read_vout > pmdev->pages[i].vout_ov_fault_limit) { + pmdev->pages[i].status_word |= PB_STATUS_VOUT; + pmdev->pages[i].status_vout |= PB_STATUS_VOUT_OV_FAULT; + } + + if (pmdev->pages[i].read_vout > pmdev->pages[i].vout_ov_warn_limit) { + pmdev->pages[i].status_word |= PB_STATUS_VOUT; + pmdev->pages[i].status_vout |= PB_STATUS_VOUT_OV_WARN; + } + + if (pmdev->pages[i].read_vout < pmdev->pages[i].vout_uv_warn_limit) { + pmdev->pages[i].status_word |= PB_STATUS_VOUT; + pmdev->pages[i].status_vout |= PB_STATUS_VOUT_UV_WARN; + } + + if (pmdev->pages[i].read_vout < pmdev->pages[i].vout_uv_fault_limit) { + pmdev->pages[i].status_word |= PB_STATUS_VOUT; + pmdev->pages[i].status_vout |= PB_STATUS_VOUT_UV_FAULT; + } + + if (pmdev->pages[i].read_vin > pmdev->pages[i].vin_ov_warn_limit) { + pmdev->pages[i].status_word |= PB_STATUS_INPUT; + pmdev->pages[i].status_input |= PB_STATUS_INPUT_VIN_OV_WARN; + } + + if (pmdev->pages[i].read_vin < pmdev->pages[i].vin_uv_warn_limit) { + pmdev->pages[i].status_word |= PB_STATUS_INPUT; + pmdev->pages[i].status_input |= PB_STATUS_INPUT_VIN_UV_WARN; + } + + if (pmdev->pages[i].read_iout > pmdev->pages[i].iout_oc_warn_limit) { + pmdev->pages[i].status_word |= PB_STATUS_IOUT_POUT; + pmdev->pages[i].status_iout |= PB_STATUS_IOUT_OC_WARN; + } + + if (pmdev->pages[i].read_iout > pmdev->pages[i].iout_oc_fault_limit) { + pmdev->pages[i].status_word |= PB_STATUS_IOUT_POUT; + pmdev->pages[i].status_iout |= PB_STATUS_IOUT_OC_FAULT; + } + + if (pmdev->pages[i].read_pin > pmdev->pages[i].pin_op_warn_limit) { + pmdev->pages[i].status_word |= PB_STATUS_INPUT; + pmdev->pages[i].status_input |= PB_STATUS_INPUT_PIN_OP_WARN; + } + + if (pmdev->pages[i].read_temperature_1 + > pmdev->pages[i].ot_fault_limit) { + pmdev->pages[i].status_word |= PB_STATUS_TEMPERATURE; + pmdev->pages[i].status_temperature |= PB_STATUS_OT_FAULT; + } + + if (pmdev->pages[i].read_temperature_1 + > pmdev->pages[i].ot_warn_limit) { + pmdev->pages[i].status_word |= PB_STATUS_TEMPERATURE; + pmdev->pages[i].status_temperature |= PB_STATUS_OT_WARN; + } + } +} + +static uint8_t pmbus_receive_byte(SMBusDevice *smd) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(smd); + PMBusDeviceClass *pmdc = PMBUS_DEVICE_GET_CLASS(pmdev); + uint8_t ret = 0xFF; + uint8_t index = pmdev->page; + + if (pmdev->out_buf_len != 0) { + ret = pmbus_out_buf_pop(pmdev); + return ret; + } + + switch (pmdev->code) { + case PMBUS_PAGE: + pmbus_send8(pmdev, pmdev->page); + break; + + case PMBUS_OPERATION: /* R/W byte */ + pmbus_send8(pmdev, pmdev->pages[index].operation); + break; + + case PMBUS_ON_OFF_CONFIG: /* R/W byte */ + pmbus_send8(pmdev, pmdev->pages[index].on_off_config); + break; + + case PMBUS_PHASE: /* R/W byte */ + pmbus_send8(pmdev, pmdev->pages[index].phase); + break; + + case PMBUS_WRITE_PROTECT: /* R/W byte */ + pmbus_send8(pmdev, pmdev->pages[index].write_protect); + break; + + case PMBUS_CAPABILITY: + pmbus_send8(pmdev, pmdev->capability); + break; + + case PMBUS_VOUT_MODE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MODE) { + pmbus_send8(pmdev, pmdev->pages[index].vout_mode); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_COMMAND: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_command); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_TRIM: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_trim); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_CAL_OFFSET: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_cal_offset); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_MAX: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_max); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_MARGIN_HIGH: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) { + pmbus_send16(pmdev, pmdev->pages[index].vout_margin_high); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_MARGIN_LOW: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) { + pmbus_send16(pmdev, pmdev->pages[index].vout_margin_low); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_TRANSITION_RATE: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_transition_rate); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_DROOP: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_droop); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_SCALE_LOOP: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_scale_loop); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_SCALE_MONITOR: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_scale_monitor); + } else { + goto passthough; + } + break; + + /* TODO: implement coefficients support */ + + case PMBUS_POUT_MAX: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_POUT) { + pmbus_send16(pmdev, pmdev->pages[index].pout_max); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_ON: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send16(pmdev, pmdev->pages[index].vin_on); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_OFF: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send16(pmdev, pmdev->pages[index].vin_off); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_CAL_GAIN: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT_GAIN) { + pmbus_send16(pmdev, pmdev->pages[index].iout_cal_gain); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_OV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_ov_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_OV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send8(pmdev, pmdev->pages[index].vout_ov_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_OV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_ov_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_UV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_uv_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_UV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].vout_uv_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VOUT_UV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send8(pmdev, pmdev->pages[index].vout_uv_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_OC_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send16(pmdev, pmdev->pages[index].iout_oc_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_OC_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send8(pmdev, pmdev->pages[index].iout_oc_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_OC_LV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send16(pmdev, pmdev->pages[index].iout_oc_lv_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_OC_LV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send8(pmdev, pmdev->pages[index].iout_oc_lv_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_OC_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send16(pmdev, pmdev->pages[index].iout_oc_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_UC_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send16(pmdev, pmdev->pages[index].iout_uc_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_IOUT_UC_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send8(pmdev, pmdev->pages[index].iout_uc_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_OT_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send16(pmdev, pmdev->pages[index].ot_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_OT_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send8(pmdev, pmdev->pages[index].ot_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_OT_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send16(pmdev, pmdev->pages[index].ot_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_UT_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send16(pmdev, pmdev->pages[index].ut_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_UT_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send16(pmdev, pmdev->pages[index].ut_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_UT_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send8(pmdev, pmdev->pages[index].ut_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_OV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send16(pmdev, pmdev->pages[index].vin_ov_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_OV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send8(pmdev, pmdev->pages[index].vin_ov_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_OV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send16(pmdev, pmdev->pages[index].vin_ov_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_UV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send16(pmdev, pmdev->pages[index].vin_uv_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_UV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send16(pmdev, pmdev->pages[index].vin_uv_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_VIN_UV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send8(pmdev, pmdev->pages[index].vin_uv_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_IIN_OC_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN) { + pmbus_send16(pmdev, pmdev->pages[index].iin_oc_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_IIN_OC_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN) { + pmbus_send8(pmdev, pmdev->pages[index].iin_oc_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_IIN_OC_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN) { + pmbus_send16(pmdev, pmdev->pages[index].iin_oc_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_POUT_OP_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_POUT) { + pmbus_send16(pmdev, pmdev->pages[index].pout_op_fault_limit); + } else { + goto passthough; + } + break; + + case PMBUS_POUT_OP_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_POUT) { + pmbus_send8(pmdev, pmdev->pages[index].pout_op_fault_response); + } else { + goto passthough; + } + break; + + case PMBUS_POUT_OP_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_POUT) { + pmbus_send16(pmdev, pmdev->pages[index].pout_op_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_PIN_OP_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_PIN) { + pmbus_send16(pmdev, pmdev->pages[index].pin_op_warn_limit); + } else { + goto passthough; + } + break; + + case PMBUS_STATUS_BYTE: /* R/W byte */ + pmbus_send8(pmdev, pmdev->pages[index].status_word & 0xFF); + break; + + case PMBUS_STATUS_WORD: /* R/W word */ + pmbus_send16(pmdev, pmdev->pages[index].status_word); + break; + + case PMBUS_STATUS_VOUT: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send8(pmdev, pmdev->pages[index].status_vout); + } else { + goto passthough; + } + break; + + case PMBUS_STATUS_IOUT: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send8(pmdev, pmdev->pages[index].status_iout); + } else { + goto passthough; + } + break; + + case PMBUS_STATUS_INPUT: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN || + pmdev->pages[index].page_flags & PB_HAS_IIN || + pmdev->pages[index].page_flags & PB_HAS_PIN) { + pmbus_send8(pmdev, pmdev->pages[index].status_input); + } else { + goto passthough; + } + break; + + case PMBUS_STATUS_TEMPERATURE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send8(pmdev, pmdev->pages[index].status_temperature); + } else { + goto passthough; + } + break; + + case PMBUS_STATUS_CML: /* R/W byte */ + pmbus_send8(pmdev, pmdev->pages[index].status_cml); + break; + + case PMBUS_STATUS_OTHER: /* R/W byte */ + pmbus_send8(pmdev, pmdev->pages[index].status_other); + break; + + case PMBUS_READ_EIN: /* Read-Only block 5 bytes */ + if (pmdev->pages[index].page_flags & PB_HAS_EIN) { + pmbus_send(pmdev, pmdev->pages[index].read_ein, 5); + } else { + goto passthough; + } + break; + + case PMBUS_READ_EOUT: /* Read-Only block 5 bytes */ + if (pmdev->pages[index].page_flags & PB_HAS_EOUT) { + pmbus_send(pmdev, pmdev->pages[index].read_eout, 5); + } else { + goto passthough; + } + break; + + case PMBUS_READ_VIN: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmbus_send16(pmdev, pmdev->pages[index].read_vin); + } else { + goto passthough; + } + break; + + case PMBUS_READ_IIN: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN) { + pmbus_send16(pmdev, pmdev->pages[index].read_iin); + } else { + goto passthough; + } + break; + + case PMBUS_READ_VOUT: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmbus_send16(pmdev, pmdev->pages[index].read_vout); + } else { + goto passthough; + } + break; + + case PMBUS_READ_IOUT: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmbus_send16(pmdev, pmdev->pages[index].read_iout); + } else { + goto passthough; + } + break; + + case PMBUS_READ_TEMPERATURE_1: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmbus_send16(pmdev, pmdev->pages[index].read_temperature_1); + } else { + goto passthough; + } + break; + + case PMBUS_READ_TEMPERATURE_2: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMP2) { + pmbus_send16(pmdev, pmdev->pages[index].read_temperature_2); + } else { + goto passthough; + } + break; + + case PMBUS_READ_TEMPERATURE_3: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMP3) { + pmbus_send16(pmdev, pmdev->pages[index].read_temperature_3); + } else { + goto passthough; + } + break; + + case PMBUS_READ_POUT: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_POUT) { + pmbus_send16(pmdev, pmdev->pages[index].read_pout); + } else { + goto passthough; + } + break; + + case PMBUS_READ_PIN: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_PIN) { + pmbus_send16(pmdev, pmdev->pages[index].read_pin); + } else { + goto passthough; + } + break; + + case PMBUS_REVISION: /* Read-Only byte */ + pmbus_send8(pmdev, pmdev->pages[index].revision); + break; + + case PMBUS_MFR_ID: /* R/W block */ + if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) { + pmbus_send_string(pmdev, pmdev->pages[index].mfr_id); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_MODEL: /* R/W block */ + if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) { + pmbus_send_string(pmdev, pmdev->pages[index].mfr_model); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_REVISION: /* R/W block */ + if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) { + pmbus_send_string(pmdev, pmdev->pages[index].mfr_revision); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_LOCATION: /* R/W block */ + if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) { + pmbus_send_string(pmdev, pmdev->pages[index].mfr_location); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_VIN_MIN: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_vin_min); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_VIN_MAX: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_vin_max); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_IIN_MAX: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_iin_max); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_PIN_MAX: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_PIN_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_pin_max); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_VOUT_MIN: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_vout_min); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_VOUT_MAX: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_vout_max); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_IOUT_MAX: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_iout_max); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_POUT_MAX: /* Read-Only word */ + if (pmdev->pages[index].page_flags & PB_HAS_POUT_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_pout_max); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_MAX_TEMP_1: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMP_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_max_temp_1); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_MAX_TEMP_2: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMP_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_max_temp_2); + } else { + goto passthough; + } + break; + + case PMBUS_MFR_MAX_TEMP_3: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMP_RATING) { + pmbus_send16(pmdev, pmdev->pages[index].mfr_max_temp_3); + } else { + goto passthough; + } + break; + + case PMBUS_CLEAR_FAULTS: /* Send Byte */ + case PMBUS_PAGE_PLUS_WRITE: /* Block Write-only */ + case PMBUS_STORE_DEFAULT_ALL: /* Send Byte */ + case PMBUS_RESTORE_DEFAULT_ALL: /* Send Byte */ + case PMBUS_STORE_DEFAULT_CODE: /* Write-only Byte */ + case PMBUS_RESTORE_DEFAULT_CODE: /* Write-only Byte */ + case PMBUS_STORE_USER_ALL: /* Send Byte */ + case PMBUS_RESTORE_USER_ALL: /* Send Byte */ + case PMBUS_STORE_USER_CODE: /* Write-only Byte */ + case PMBUS_RESTORE_USER_CODE: /* Write-only Byte */ + case PMBUS_QUERY: /* Write-Only */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: reading from write only register 0x%02x\n", + __func__, pmdev->code); + break; + +passthough: + default: + /* Pass through read request if not handled */ + if (pmdc->receive_byte) { + ret = pmdc->receive_byte(pmdev); + } + break; + } + + if (pmdev->out_buf_len != 0) { + ret = pmbus_out_buf_pop(pmdev); + return ret; + } + + return ret; +} + +/* + * PMBus clear faults command applies to all status registers, existing faults + * should separately get re-asserted. + */ +static void pmbus_clear_faults(PMBusDevice *pmdev) +{ + for (uint8_t i = 0; i < pmdev->num_pages; i++) { + pmdev->pages[i].status_word = 0; + pmdev->pages[i].status_vout = 0; + pmdev->pages[i].status_iout = 0; + pmdev->pages[i].status_input = 0; + pmdev->pages[i].status_temperature = 0; + pmdev->pages[i].status_cml = 0; + pmdev->pages[i].status_other = 0; + pmdev->pages[i].status_mfr_specific = 0; + pmdev->pages[i].status_fans_1_2 = 0; + pmdev->pages[i].status_fans_3_4 = 0; + } + +} + +/* + * PMBus operation is used to turn On and Off PSUs + * Therefore, default value for the Operation should be PB_OP_ON or 0x80 + */ +static void pmbus_operation(PMBusDevice *pmdev) +{ + uint8_t index = pmdev->page; + if ((pmdev->pages[index].operation & PB_OP_ON) == 0) { + pmdev->pages[index].read_vout = 0; + pmdev->pages[index].read_iout = 0; + pmdev->pages[index].read_pout = 0; + return; + } + + if (pmdev->pages[index].operation & (PB_OP_ON | PB_OP_MARGIN_HIGH)) { + pmdev->pages[index].read_vout = pmdev->pages[index].vout_margin_high; + } + + if (pmdev->pages[index].operation & (PB_OP_ON | PB_OP_MARGIN_LOW)) { + pmdev->pages[index].read_vout = pmdev->pages[index].vout_margin_low; + } + pmbus_check_limits(pmdev); +} + +static int pmbus_write_data(SMBusDevice *smd, uint8_t *buf, uint8_t len) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(smd); + PMBusDeviceClass *pmdc = PMBUS_DEVICE_GET_CLASS(pmdev); + int ret = 0; + uint8_t index; + + if (len == 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__); + return -1; + } + + if (!pmdev->pages) { /* allocate memory for pages on first use */ + pmbus_pages_alloc(pmdev); + } + + pmdev->in_buf_len = len; + pmdev->in_buf = buf; + + pmdev->code = buf[0]; /* PMBus command code */ + if (len == 1) { /* Single length writes are command codes only */ + return 0; + } + + if (pmdev->code == PMBUS_PAGE) { + pmdev->page = pmbus_receive8(pmdev); + return 0; + } + /* loop through all the pages when 0xFF is received */ + if (pmdev->page == PB_ALL_PAGES) { + for (int i = 0; i < pmdev->num_pages; i++) { + pmdev->page = i; + pmbus_write_data(smd, buf, len); + } + pmdev->page = PB_ALL_PAGES; + return 0; + } + + index = pmdev->page; + + switch (pmdev->code) { + case PMBUS_OPERATION: /* R/W byte */ + pmdev->pages[index].operation = pmbus_receive8(pmdev); + pmbus_operation(pmdev); + break; + + case PMBUS_ON_OFF_CONFIG: /* R/W byte */ + pmdev->pages[index].on_off_config = pmbus_receive8(pmdev); + break; + + case PMBUS_CLEAR_FAULTS: /* Send Byte */ + pmbus_clear_faults(pmdev); + break; + + case PMBUS_PHASE: /* R/W byte */ + pmdev->pages[index].phase = pmbus_receive8(pmdev); + break; + + case PMBUS_PAGE_PLUS_WRITE: /* Block Write-only */ + case PMBUS_WRITE_PROTECT: /* R/W byte */ + pmdev->pages[index].write_protect = pmbus_receive8(pmdev); + break; + + case PMBUS_VOUT_MODE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MODE) { + pmdev->pages[index].vout_mode = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_COMMAND: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_command = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_TRIM: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_trim = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_CAL_OFFSET: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_cal_offset = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_MAX: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_max = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_MARGIN_HIGH: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) { + pmdev->pages[index].vout_margin_high = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_MARGIN_LOW: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) { + pmdev->pages[index].vout_margin_low = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_TRANSITION_RATE: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_transition_rate = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_DROOP: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_droop = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_SCALE_LOOP: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_scale_loop = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_SCALE_MONITOR: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_scale_monitor = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_POUT_MAX: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].pout_max = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_ON: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_on = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_OFF: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_off = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_CAL_GAIN: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT_GAIN) { + pmdev->pages[index].iout_cal_gain = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_OV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_ov_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_OV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_ov_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_OV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_ov_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_UV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_uv_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_UV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_uv_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VOUT_UV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].vout_uv_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_OC_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].iout_oc_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_OC_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].iout_oc_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_OC_LV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].iout_oc_lv_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_OC_LV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].iout_oc_lv_fault_response + = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_OC_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].iout_oc_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_UC_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].iout_uc_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IOUT_UC_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].iout_uc_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_OT_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmdev->pages[index].ot_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_OT_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmdev->pages[index].ot_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_OT_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmdev->pages[index].ot_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_UT_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmdev->pages[index].ut_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_UT_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmdev->pages[index].ut_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_UT_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmdev->pages[index].ut_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_OV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_ov_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_OV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_ov_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_OV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_ov_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_UV_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_uv_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_UV_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_uv_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_VIN_UV_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VIN) { + pmdev->pages[index].vin_uv_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IIN_OC_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN) { + pmdev->pages[index].iin_oc_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IIN_OC_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN) { + pmdev->pages[index].iin_oc_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_IIN_OC_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_IIN) { + pmdev->pages[index].iin_oc_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_POUT_OP_FAULT_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].pout_op_fault_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_POUT_OP_FAULT_RESPONSE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].pout_op_fault_response = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_POUT_OP_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].pout_op_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_PIN_OP_WARN_LIMIT: /* R/W word */ + if (pmdev->pages[index].page_flags & PB_HAS_PIN) { + pmdev->pages[index].pin_op_warn_limit = pmbus_receive16(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_STATUS_BYTE: /* R/W byte */ + pmdev->pages[index].status_word = pmbus_receive8(pmdev); + break; + + case PMBUS_STATUS_WORD: /* R/W word */ + pmdev->pages[index].status_word = pmbus_receive16(pmdev); + break; + + case PMBUS_STATUS_VOUT: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_VOUT) { + pmdev->pages[index].status_vout = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_STATUS_IOUT: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_IOUT) { + pmdev->pages[index].status_iout = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_STATUS_INPUT: /* R/W byte */ + pmdev->pages[index].status_input = pmbus_receive8(pmdev); + break; + + case PMBUS_STATUS_TEMPERATURE: /* R/W byte */ + if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) { + pmdev->pages[index].status_temperature = pmbus_receive8(pmdev); + } else { + goto passthrough; + } + break; + + case PMBUS_STATUS_CML: /* R/W byte */ + pmdev->pages[index].status_cml = pmbus_receive8(pmdev); + break; + + case PMBUS_STATUS_OTHER: /* R/W byte */ + pmdev->pages[index].status_other = pmbus_receive8(pmdev); + break; + + case PMBUS_PAGE_PLUS_READ: /* Block Read-only */ + case PMBUS_CAPABILITY: /* Read-Only byte */ + case PMBUS_COEFFICIENTS: /* Read-only block 5 bytes */ + case PMBUS_READ_EIN: /* Read-Only block 5 bytes */ + case PMBUS_READ_EOUT: /* Read-Only block 5 bytes */ + case PMBUS_READ_VIN: /* Read-Only word */ + case PMBUS_READ_IIN: /* Read-Only word */ + case PMBUS_READ_VCAP: /* Read-Only word */ + case PMBUS_READ_VOUT: /* Read-Only word */ + case PMBUS_READ_IOUT: /* Read-Only word */ + case PMBUS_READ_TEMPERATURE_1: /* Read-Only word */ + case PMBUS_READ_TEMPERATURE_2: /* Read-Only word */ + case PMBUS_READ_TEMPERATURE_3: /* Read-Only word */ + case PMBUS_READ_FAN_SPEED_1: /* Read-Only word */ + case PMBUS_READ_FAN_SPEED_2: /* Read-Only word */ + case PMBUS_READ_FAN_SPEED_3: /* Read-Only word */ + case PMBUS_READ_FAN_SPEED_4: /* Read-Only word */ + case PMBUS_READ_DUTY_CYCLE: /* Read-Only word */ + case PMBUS_READ_FREQUENCY: /* Read-Only word */ + case PMBUS_READ_POUT: /* Read-Only word */ + case PMBUS_READ_PIN: /* Read-Only word */ + case PMBUS_REVISION: /* Read-Only byte */ + case PMBUS_APP_PROFILE_SUPPORT: /* Read-Only block-read */ + case PMBUS_MFR_VIN_MIN: /* Read-Only word */ + case PMBUS_MFR_VIN_MAX: /* Read-Only word */ + case PMBUS_MFR_IIN_MAX: /* Read-Only word */ + case PMBUS_MFR_PIN_MAX: /* Read-Only word */ + case PMBUS_MFR_VOUT_MIN: /* Read-Only word */ + case PMBUS_MFR_VOUT_MAX: /* Read-Only word */ + case PMBUS_MFR_IOUT_MAX: /* Read-Only word */ + case PMBUS_MFR_POUT_MAX: /* Read-Only word */ + case PMBUS_MFR_TAMBIENT_MAX: /* Read-Only word */ + case PMBUS_MFR_TAMBIENT_MIN: /* Read-Only word */ + case PMBUS_MFR_EFFICIENCY_LL: /* Read-Only block 14 bytes */ + case PMBUS_MFR_EFFICIENCY_HL: /* Read-Only block 14 bytes */ + case PMBUS_MFR_PIN_ACCURACY: /* Read-Only byte */ + case PMBUS_IC_DEVICE_ID: /* Read-Only block-read */ + case PMBUS_IC_DEVICE_REV: /* Read-Only block-read */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: writing to read-only register 0x%02x\n", + __func__, pmdev->code); + break; + +passthrough: + /* Unimplimented registers get passed to the device */ + default: + if (pmdc->write_data) { + ret = pmdc->write_data(pmdev, buf, len); + } + break; + } + pmbus_check_limits(pmdev); + pmdev->in_buf_len = 0; + return ret; +} + +int pmbus_page_config(PMBusDevice *pmdev, uint8_t index, uint64_t flags) +{ + if (!pmdev->pages) { /* allocate memory for pages on first use */ + pmbus_pages_alloc(pmdev); + } + + /* The 0xFF page is special for commands applying to all pages */ + if (index == PB_ALL_PAGES) { + for (int i = 0; i < pmdev->num_pages; i++) { + pmdev->pages[i].page_flags = flags; + } + return 0; + } + + if (index > pmdev->num_pages - 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: index %u is out of range\n", + __func__, index); + return -1; + } + + pmdev->pages[index].page_flags = flags; + + return 0; +} + +/* TODO: include pmbus page info in vmstate */ +const VMStateDescription vmstate_pmbus_device = { + .name = TYPE_PMBUS_DEVICE, + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_SMBUS_DEVICE(smb, PMBusDevice), + VMSTATE_UINT8(num_pages, PMBusDevice), + VMSTATE_UINT8(code, PMBusDevice), + VMSTATE_UINT8(page, PMBusDevice), + VMSTATE_UINT8(capability, PMBusDevice), + VMSTATE_END_OF_LIST() + } +}; + +static void pmbus_device_finalize(Object *obj) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(obj); + g_free(pmdev->pages); +} + +static void pmbus_device_class_init(ObjectClass *klass, void *data) +{ + SMBusDeviceClass *k = SMBUS_DEVICE_CLASS(klass); + + k->quick_cmd = pmbus_quick_cmd; + k->write_data = pmbus_write_data; + k->receive_byte = pmbus_receive_byte; +} + +static const TypeInfo pmbus_device_type_info = { + .name = TYPE_PMBUS_DEVICE, + .parent = TYPE_SMBUS_DEVICE, + .instance_size = sizeof(PMBusDevice), + .instance_finalize = pmbus_device_finalize, + .abstract = true, + .class_size = sizeof(PMBusDeviceClass), + .class_init = pmbus_device_class_init, +}; + +static void pmbus_device_register_types(void) +{ + type_register_static(&pmbus_device_type_info); +} + +type_init(pmbus_device_register_types) diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c index c0a8e045670..75d50f15158 100644 --- a/hw/i2c/ppc4xx_i2c.c +++ b/hw/i2c/ppc4xx_i2c.c @@ -1,6 +1,8 @@ /* * PPC4xx I2C controller emulation * + * Documentation: PPC405GP User's Manual, Chapter 22. IIC Bus Interface + * * Copyright (c) 2007 Jocelyn Mayer * Copyright (c) 2012 François Revol * Copyright (c) 2016-2018 BALATON Zoltan @@ -238,11 +240,14 @@ static void ppc4xx_i2c_writeb(void *opaque, hwaddr addr, uint64_t value, i2c->sts &= ~IIC_STS_ERR; } } - if (!(i2c->sts & IIC_STS_ERR) && - i2c_send_recv(i2c->bus, &i2c->mdata[i], !recv)) { - i2c->sts |= IIC_STS_ERR; - i2c->extsts |= IIC_EXTSTS_XFRA; - break; + if (!(i2c->sts & IIC_STS_ERR)) { + if (recv) { + i2c->mdata[i] = i2c_recv(i2c->bus); + } else if (i2c_send(i2c->bus, i2c->mdata[i]) < 0) { + i2c->sts |= IIC_STS_ERR; + i2c->extsts |= IIC_EXTSTS_XFRA; + break; + } } if (value & IIC_CNTL_RPST || !(value & IIC_CNTL_CHT)) { i2c_end_transfer(i2c->bus); diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c index 4d2bf99207a..12c5741f388 100644 --- a/hw/i2c/smbus_eeprom.c +++ b/hw/i2c/smbus_eeprom.c @@ -276,7 +276,7 @@ uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size) spd[18] = 12; /* ~CAS latencies supported */ spd[19] = (type == DDR2 ? 0 : 1); /* reserved / ~CS latencies supported */ spd[20] = 2; /* DIMM type / ~WE latencies */ - /* module features */ + spd[21] = (type < DDR2 ? 0x20 : 0); /* module features */ /* memory chip features */ spd[23] = 0x12; /* clock cycle time @ medium CAS latency */ /* data access time */ diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c index dc43b8637d1..6a53c34e70b 100644 --- a/hw/i2c/smbus_master.c +++ b/hw/i2c/smbus_master.c @@ -29,7 +29,7 @@ int smbus_receive_byte(I2CBus *bus, uint8_t addr) { uint8_t data; - if (i2c_start_transfer(bus, addr, 1)) { + if (i2c_start_recv(bus, addr)) { return -1; } data = i2c_recv(bus); @@ -40,7 +40,7 @@ int smbus_receive_byte(I2CBus *bus, uint8_t addr) int smbus_send_byte(I2CBus *bus, uint8_t addr, uint8_t data) { - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { return -1; } i2c_send(bus, data); @@ -51,11 +51,11 @@ int smbus_send_byte(I2CBus *bus, uint8_t addr, uint8_t data) int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command) { uint8_t data; - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { return -1; } i2c_send(bus, command); - if (i2c_start_transfer(bus, addr, 1)) { + if (i2c_start_recv(bus, addr)) { i2c_end_transfer(bus); return -1; } @@ -67,7 +67,7 @@ int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command) int smbus_write_byte(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t data) { - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { return -1; } i2c_send(bus, command); @@ -79,11 +79,11 @@ int smbus_write_byte(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t data) int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command) { uint16_t data; - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { return -1; } i2c_send(bus, command); - if (i2c_start_transfer(bus, addr, 1)) { + if (i2c_start_recv(bus, addr)) { i2c_end_transfer(bus); return -1; } @@ -96,7 +96,7 @@ int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command) int smbus_write_word(I2CBus *bus, uint8_t addr, uint8_t command, uint16_t data) { - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { return -1; } i2c_send(bus, command); @@ -113,12 +113,12 @@ int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data, int i; if (send_cmd) { - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { return -1; } i2c_send(bus, command); } - if (i2c_start_transfer(bus, addr, 1)) { + if (i2c_start_recv(bus, addr)) { if (send_cmd) { i2c_end_transfer(bus); } @@ -149,7 +149,7 @@ int smbus_write_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data, len = 32; } - if (i2c_start_transfer(bus, addr, 0)) { + if (i2c_start_send(bus, addr)) { return -1; } i2c_send(bus, command); diff --git a/hw/i2c/trace-events b/hw/i2c/trace-events index 82fe6f965f4..7d8907c1eed 100644 --- a/hw/i2c/trace-events +++ b/hw/i2c/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # core.c @@ -26,3 +26,8 @@ npcm7xx_smbus_recv_byte(const char *id, uint8_t value) "%s recv byte: 0x%02x" npcm7xx_smbus_stop(const char *id) "%s stopping" npcm7xx_smbus_nack(const char *id) "%s nacking" npcm7xx_smbus_recv_fifo(const char *id, uint8_t received, uint8_t expected) "%s recv fifo: received %u, expected %u" + +# i2c-mux-pca954x.c + +pca954x_write_bytes(uint8_t value) "PCA954X write data: 0x%02x" +pca954x_read_data(uint8_t value) "PCA954X read data: 0x%02x" diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 7f91f30877f..d22ac4a4b95 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -1,4 +1,12 @@ +config X86_FW_OVMF + bool + config SEV + bool + select X86_FW_OVMF + depends on KVM + +config SGX bool depends on KVM @@ -17,13 +25,15 @@ config PC imply PVPANIC_ISA imply QXL imply SEV + imply SGX imply SGA imply TEST_DEVICES imply TPM_CRB imply TPM_TIS_ISA imply VGA_PCI imply VIRTIO_VGA - select FDC + imply NVDIMM + select FDC_ISA select I8259 select I8254 select PCKBD @@ -49,6 +59,7 @@ config PC_ACPI select ACPI_X86 select ACPI_CPU_HOTPLUG select ACPI_MEMORY_HOTPLUG + select ACPI_VIOT select SMBUS_EEPROM select PFLASH_CFI01 depends on ACPI_SMBUS @@ -106,6 +117,7 @@ config MICROVM select ACPI_HW_REDUCED select PCI_EXPRESS_GENERIC_BRIDGE select USB_XHCI_SYSBUS + select I8254 config X86_IOMMU bool diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index de98750aeff..a99c6e4fe3f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -43,7 +43,6 @@ #include "sysemu/tpm.h" #include "hw/acpi/tpm.h" #include "hw/acpi/vmgenid.h" -#include "hw/boards.h" #include "sysemu/tpm_backend.h" #include "hw/rtc/mc146818rtc_regs.h" #include "migration/vmstate.h" @@ -69,9 +68,11 @@ #include "qom/qom-qobject.h" #include "hw/i386/amd_iommu.h" #include "hw/i386/intel_iommu.h" +#include "hw/virtio/virtio-iommu.h" #include "hw/acpi/ipmi.h" #include "hw/acpi/hmat.h" +#include "hw/acpi/viot.h" /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows @@ -108,7 +109,9 @@ typedef struct AcpiPmInfo { typedef struct AcpiMiscInfo { bool is_piix4; bool has_hpet; +#ifdef CONFIG_TPM TPMVersion tpm_version; +#endif const unsigned char *dsdt_code; unsigned dsdt_size; uint16_t pvpanic_port; @@ -218,10 +221,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */ pm->fadt.rev = 1; pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; - pm->pcihp_io_base = - object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); - pm->pcihp_io_len = - object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); } if (lpc) { uint64_t smi_features = object_property_get_uint(lpc, @@ -237,6 +236,10 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->smi_on_cpu_unplug = !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT)); } + pm->pcihp_io_base = + object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); + pm->pcihp_io_len = + object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); /* The above need not be conditional on machine type because the reset port * happens to be the same on PIIX (pc) and ICH9 (q35). */ @@ -266,10 +269,10 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) qobject_unref(o); pm->pcihp_bridge_en = - object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support", + object_property_get_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, NULL); pm->pcihp_root_en = - object_property_get_bool(obj, "acpi-root-pci-hotplug", + object_property_get_bool(obj, ACPI_PM_PROP_ACPI_PCI_ROOTHP, NULL); } @@ -287,7 +290,9 @@ static void acpi_get_misc_info(AcpiMiscInfo *info) } info->has_hpet = hpet_find(); +#ifdef CONFIG_TPM info->tpm_version = tpm_get_version(tpm_find()); +#endif info->pvpanic_port = pvpanic_port(); info->applesmc_io_base = applesmc_port(); } @@ -296,17 +301,13 @@ static void acpi_get_misc_info(AcpiMiscInfo *info) * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE. * On i386 arch we only have two pci hosts, so we can look only for them. */ -static Object *acpi_get_i386_pci_host(void) +Object *acpi_get_i386_pci_host(void) { PCIHostState *host; - host = OBJECT_CHECK(PCIHostState, - object_resolve_path("/machine/i440fx", NULL), - TYPE_PCI_HOST_BRIDGE); + host = PCI_HOST_BRIDGE(object_resolve_path("/machine/i440fx", NULL)); if (!host) { - host = OBJECT_CHECK(PCIHostState, - object_resolve_path("/machine/q35", NULL), - TYPE_PCI_HOST_BRIDGE); + host = PCI_HOST_BRIDGE(object_resolve_path("/machine/q35", NULL)); } return OBJECT(host); @@ -317,7 +318,10 @@ static void acpi_get_pci_holes(Range *hole, Range *hole64) Object *pci_host; pci_host = acpi_get_i386_pci_host(); - g_assert(pci_host); + + if (!pci_host) { + return; + } range_set_bounds1(hole, object_property_get_uint(pci_host, @@ -343,13 +347,23 @@ static void acpi_align_size(GArray *blob, unsigned align) g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align)); } -/* FACS */ +/* + * ACPI spec 1.0b, + * 5.2.6 Firmware ACPI Control Structure + */ static void build_facs(GArray *table_data) { - AcpiFacsDescriptorRev1 *facs = acpi_data_push(table_data, sizeof *facs); - memcpy(&facs->signature, "FACS", 4); - facs->length = cpu_to_le32(sizeof(*facs)); + const char *sig = "FACS"; + const uint8_t reserved[40] = {}; + + g_array_append_vals(table_data, sig, 4); /* Signature */ + build_append_int_noprefix(table_data, 64, 4); /* Length */ + build_append_int_noprefix(table_data, 0, 4); /* Hardware Signature */ + build_append_int_noprefix(table_data, 0, 4); /* Firmware Waking Vector */ + build_append_int_noprefix(table_data, 0, 4); /* Global Lock */ + build_append_int_noprefix(table_data, 0, 4); /* Flags */ + g_array_append_vals(table_data, reserved, 40); /* Reserved */ } static void build_append_pcihp_notify_entry(Aml *method, int slot) @@ -368,7 +382,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, Aml *dev, *notify_method = NULL, *method; QObject *bsel; PCIBus *sec; - int i; + int devfn; bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); if (bsel) { @@ -378,20 +392,31 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED); } - for (i = 0; i < ARRAY_SIZE(bus->devices); i += PCI_FUNC_MAX) { + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { DeviceClass *dc; PCIDeviceClass *pc; - PCIDevice *pdev = bus->devices[i]; - int slot = PCI_SLOT(i); + PCIDevice *pdev = bus->devices[devfn]; + int slot = PCI_SLOT(devfn); + int func = PCI_FUNC(devfn); + /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */ + int adr = slot << 16 | func; bool hotplug_enabled_dev; bool bridge_in_acpi; bool cold_plugged_bridge; if (!pdev) { - if (bsel) { /* add hotplug slots for non present devices */ - dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); + /* + * add hotplug slots for non present devices. + * hotplug is supported only for non-multifunction device + * so generate device description only for function 0 + */ + if (bsel && !func) { + if (pci_bus_is_express(bus) && slot > 0) { + break; + } + dev = aml_device("S%.02X", devfn); aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); + aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); aml_append(method, aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) @@ -427,16 +452,29 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, continue; } - /* start to compose PCI slot descriptor */ - dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); - aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); + /* + * allow describing coldplugged bridges in ACPI even if they are not + * on function 0, as they are not unpluggable, for all other devices + * generate description only for function 0 per slot + */ + if (func && !bridge_in_acpi) { + continue; + } + + /* start to compose PCI device descriptor */ + dev = aml_device("S%.02X", devfn); + aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); if (bsel) { - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + /* + * Can't declare _SUN here for every device as it changes 'slot' + * enumeration order in linux kernel, so use another variable for it + */ + aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); method = aml_method("_DSM", 4, AML_SERIALIZED); aml_append(method, aml_return( aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2), - aml_arg(3), aml_name("BSEL"), aml_name("_SUN")) + aml_arg(3), aml_name("BSEL"), aml_name("ASUN")) )); aml_append(dev, method); } @@ -463,6 +501,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, aml_append(method, aml_return(aml_int(s3d))); aml_append(dev, method); } else if (hotplug_enabled_dev) { + aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); /* add _EJ0 to make slot hotpluggable */ method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); aml_append(method, @@ -482,7 +521,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, build_append_pci_bus_devices(dev, sec_bus, pcihp_bridge_en); } - /* slot descriptor has been composed, add it into parent context */ + /* device descriptor has been composed, add it into parent context */ aml_append(parent_scope, dev); } @@ -511,13 +550,12 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, /* Notify about child bus events in any case */ if (pcihp_bridge_en) { QLIST_FOREACH(sec, &bus->child, sibling) { - int32_t devfn = sec->parent_dev->devfn; - - if (pci_bus_is_root(sec) || pci_bus_is_express(sec)) { + if (pci_bus_is_root(sec)) { continue; } - aml_append(method, aml_name("^S%.02X.PCNT", devfn)); + aml_append(method, aml_name("^S%.02X.PCNT", + sec->parent_dev->devfn)); } } @@ -1243,7 +1281,7 @@ static void build_piix4_isa_bridge(Aml *table) aml_append(table, scope); } -static void build_piix4_pci_hotplug(Aml *table) +static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) { Aml *scope; Aml *field; @@ -1252,20 +1290,22 @@ static void build_piix4_pci_hotplug(Aml *table) scope = aml_scope("_SB.PCI0"); aml_append(scope, - aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x08)); + aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(pcihp_addr), 0x08)); field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("PCIU", 32)); aml_append(field, aml_named_field("PCID", 32)); aml_append(scope, field); aml_append(scope, - aml_operation_region("SEJ", AML_SYSTEM_IO, aml_int(0xae08), 0x04)); + aml_operation_region("SEJ", AML_SYSTEM_IO, + aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04)); field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("B0EJ", 32)); aml_append(scope, field); aml_append(scope, - aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x08)); + aml_operation_region("BNMR", AML_SYSTEM_IO, + aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08)); field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("BNUM", 32)); aml_append(field, aml_named_field("PIDX", 32)); @@ -1297,7 +1337,7 @@ static void build_piix4_pci_hotplug(Aml *table) aml_append(table, scope); } -static Aml *build_q35_osc_method(void) +static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) { Aml *if_ctx; Aml *if_ctx2; @@ -1319,8 +1359,10 @@ static Aml *build_q35_osc_method(void) /* * Always allow native PME, AER (no dependencies) * Allow SHPC (PCI bridges can have SHPC controller) + * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. */ - aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1F), a_ctrl)); + aml_append(if_ctx, aml_and(a_ctrl, + aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); /* Unknown revision */ @@ -1372,15 +1414,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, uint32_t nr_mem = machine->ram_slots; int root_bus_limit = 0xFF; PCIBus *bus = NULL; +#ifdef CONFIG_TPM TPMIf *tpm = tpm_find(); +#endif int i; VMBusBridge *vmbus_bridge = vmbus_bridge_find(); + AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id, + .oem_table_id = x86ms->oem_table_id }; + acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); - /* Reserve space for header */ - acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader)); - build_dbg_aml(dsdt); if (misc->is_piix4) { sb_scope = aml_scope("_SB"); @@ -1397,7 +1441,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, build_piix4_isa_bridge(dsdt); build_isa_devices_aml(dsdt); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { - build_piix4_pci_hotplug(dsdt); + build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); } else { @@ -1407,7 +1451,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, build_q35_osc_method()); + aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en)); aml_append(sb_scope, dev); if (mcfg_valid) { aml_append(sb_scope, build_q35_dram_controller(&mcfg)); @@ -1445,6 +1489,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } build_q35_isa_bridge(dsdt); build_isa_devices_aml(dsdt); + if (pm->pcihp_bridge_en) { + build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); + } build_q35_pci0_int(dsdt); if (pcms->smbus && !pcmc->do_not_add_smb_acpi) { build_smb0(dsdt, pcms->smbus, ICH9_SMB_DEV, ICH9_SMB_FUNC); @@ -1479,7 +1526,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, { aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); - if (misc->is_piix4 && (pm->pcihp_bridge_en || pm->pcihp_root_en)) { + if (pm->pcihp_bridge_en || pm->pcihp_root_en) { method = aml_method("_E01", 0, AML_NOTSERIALIZED); aml_append(method, aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); @@ -1520,7 +1567,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); - aml_append(dev, build_q35_osc_method()); + + /* Expander bridges do not have ACPI PCI Hot-plug enabled */ + aml_append(dev, build_q35_osc_method(true)); } else { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } @@ -1605,10 +1654,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } } +#ifdef CONFIG_TPM if (TPM_IS_TIS_ISA(tpm_find())) { aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE, TPM_TIS_ADDR_SIZE, AML_READ_WRITE)); } +#endif aml_append(scope, aml_name_decl("_CRS", crs)); /* reserve GPE0 block resources */ @@ -1745,6 +1796,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, PCIBus *bus = NULL; pci_host = acpi_get_i386_pci_host(); + if (pci_host) { bus = PCI_HOST_BRIDGE(pci_host)->bus; } @@ -1754,6 +1806,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, /* Scan all PCI buses. Generate tables to support hotplug. */ build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en); +#ifdef CONFIG_TPM if (TPM_IS_TIS_ISA(tpm)) { if (misc->tpm_version == TPM_VERSION_2_0) { dev = aml_device("TPM"); @@ -1781,11 +1834,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(scope, dev); } +#endif aml_append(sb_scope, scope); } } +#ifdef CONFIG_TPM if (TPM_IS_CRB(tpm)) { dev = aml_device("TPM"); aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101"))); @@ -1800,120 +1855,176 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(sb_scope, dev); } +#endif + + if (pcms->sgx_epc.size != 0) { + uint64_t epc_base = pcms->sgx_epc.base; + uint64_t epc_size = pcms->sgx_epc.size; + dev = aml_device("EPC"); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("INT0E0C"))); + aml_append(dev, aml_name_decl("_STR", + aml_unicode("Enclave Page Cache 1.0"))); + crs = aml_resource_template(); + aml_append(crs, + aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, 0, epc_base, + epc_base + epc_size - 1, 0, epc_size)); + aml_append(dev, aml_name_decl("_CRS", crs)); + + method = aml_method("_STA", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(0x0f))); + aml_append(dev, method); + + aml_append(sb_scope, dev); + } aml_append(dsdt, sb_scope); /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); - build_header(linker, table_data, - (void *)(table_data->data + table_data->len - dsdt->buf->len), - "DSDT", dsdt->buf->len, 1, x86ms->oem_id, x86ms->oem_table_id); + acpi_table_end(linker, &table); free_aml_allocator(); } +/* + * IA-PC HPET (High Precision Event Timers) Specification (Revision: 1.0a) + * 3.2.4The ACPI 2.0 HPET Description Table (HPET) + */ static void build_hpet(GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { - Acpi20Hpet *hpet; + AcpiTable table = { .sig = "HPET", .rev = 1, + .oem_id = oem_id, .oem_table_id = oem_table_id }; - hpet = acpi_data_push(table_data, sizeof(*hpet)); + acpi_table_begin(&table, table_data); /* Note timer_block_id value must be kept in sync with value advertised by * emulated hpet */ - hpet->timer_block_id = cpu_to_le32(0x8086a201); - hpet->addr.address = cpu_to_le64(HPET_BASE); - build_header(linker, table_data, - (void *)hpet, "HPET", sizeof(*hpet), 1, oem_id, oem_table_id); + /* Event Timer Block ID */ + build_append_int_noprefix(table_data, 0x8086a201, 4); + /* BASE_ADDRESS */ + build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0, 0, 0, HPET_BASE); + /* HPET Number */ + build_append_int_noprefix(table_data, 0, 1); + /* Main Counter Minimum Clock_tick in Periodic Mode */ + build_append_int_noprefix(table_data, 0, 2); + /* Page Protection And OEM Attribute */ + build_append_int_noprefix(table_data, 0, 1); + acpi_table_end(linker, &table); } +#ifdef CONFIG_TPM +/* + * TCPA Description Table + * + * Following Level 00, Rev 00.37 of specs: + * http://www.trustedcomputinggroup.org/resources/tcg_acpi_specification + * 7.1.2 ACPI Table Layout + */ static void build_tpm_tcpa(GArray *table_data, BIOSLinker *linker, GArray *tcpalog, const char *oem_id, const char *oem_table_id) { - Acpi20Tcpa *tcpa = acpi_data_push(table_data, sizeof *tcpa); - unsigned log_addr_size = sizeof(tcpa->log_area_start_address); - unsigned log_addr_offset = - (char *)&tcpa->log_area_start_address - table_data->data; - - tcpa->platform_class = cpu_to_le16(TPM_TCPA_ACPI_CLASS_CLIENT); - tcpa->log_area_minimum_length = cpu_to_le32(TPM_LOG_AREA_MINIMUM_SIZE); - acpi_data_push(tcpalog, le32_to_cpu(tcpa->log_area_minimum_length)); + unsigned log_addr_offset; + AcpiTable table = { .sig = "TCPA", .rev = 2, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); + /* Platform Class */ + build_append_int_noprefix(table_data, TPM_TCPA_ACPI_CLASS_CLIENT, 2); + /* Log Area Minimum Length (LAML) */ + build_append_int_noprefix(table_data, TPM_LOG_AREA_MINIMUM_SIZE, 4); + /* Log Area Start Address (LASA) */ + log_addr_offset = table_data->len; + build_append_int_noprefix(table_data, 0, 8); + /* allocate/reserve space for TPM log area */ + acpi_data_push(tcpalog, TPM_LOG_AREA_MINIMUM_SIZE); bios_linker_loader_alloc(linker, ACPI_BUILD_TPMLOG_FILE, tcpalog, 1, false /* high memory */); - /* log area start address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, log_addr_offset, log_addr_size, - ACPI_BUILD_TPMLOG_FILE, 0); + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, + log_addr_offset, 8, ACPI_BUILD_TPMLOG_FILE, 0); - build_header(linker, table_data, - (void *)tcpa, "TCPA", sizeof(*tcpa), 2, oem_id, oem_table_id); + acpi_table_end(linker, &table); } +#endif #define HOLE_640K_START (640 * KiB) #define HOLE_640K_END (1 * MiB) +/* + * ACPI spec, Revision 3.0 + * 5.2.15 System Resource Affinity Table (SRAT) + */ static void build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) { - AcpiSystemResourceAffinityTable *srat; - AcpiSratMemoryAffinity *numamem; - int i; - int srat_start, numa_start, slots; + int numa_mem_start, slots; uint64_t mem_len, mem_base, next_base; MachineClass *mc = MACHINE_GET_CLASS(machine); X86MachineState *x86ms = X86_MACHINE(machine); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); PCMachineState *pcms = PC_MACHINE(machine); - ram_addr_t hotplugabble_address_space_size = + int nb_numa_nodes = machine->numa_state->num_nodes; + NodeInfo *numa_info = machine->numa_state->nodes; + ram_addr_t hotpluggable_address_space_size = object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, NULL); + AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = x86ms->oem_id, + .oem_table_id = x86ms->oem_table_id }; - srat_start = table_data->len; - - srat = acpi_data_push(table_data, sizeof *srat); - srat->reserved1 = cpu_to_le32(1); + acpi_table_begin(&table, table_data); + build_append_int_noprefix(table_data, 1, 4); /* Reserved */ + build_append_int_noprefix(table_data, 0, 8); /* Reserved */ for (i = 0; i < apic_ids->len; i++) { int node_id = apic_ids->cpus[i].props.node_id; uint32_t apic_id = apic_ids->cpus[i].arch_id; if (apic_id < 255) { - AcpiSratProcessorAffinity *core; - - core = acpi_data_push(table_data, sizeof *core); - core->type = ACPI_SRAT_PROCESSOR_APIC; - core->length = sizeof(*core); - core->local_apic_id = apic_id; - core->proximity_lo = node_id; - memset(core->proximity_hi, 0, 3); - core->local_sapic_eid = 0; - core->flags = cpu_to_le32(1); + /* 5.2.15.1 Processor Local APIC/SAPIC Affinity Structure */ + build_append_int_noprefix(table_data, 0, 1); /* Type */ + build_append_int_noprefix(table_data, 16, 1); /* Length */ + /* Proximity Domain [7:0] */ + build_append_int_noprefix(table_data, node_id, 1); + build_append_int_noprefix(table_data, apic_id, 1); /* APIC ID */ + /* Flags, Table 5-36 */ + build_append_int_noprefix(table_data, 1, 4); + build_append_int_noprefix(table_data, 0, 1); /* Local SAPIC EID */ + /* Proximity Domain [31:8] */ + build_append_int_noprefix(table_data, 0, 3); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ } else { - AcpiSratProcessorX2ApicAffinity *core; - - core = acpi_data_push(table_data, sizeof *core); - core->type = ACPI_SRAT_PROCESSOR_x2APIC; - core->length = sizeof(*core); - core->x2apic_id = cpu_to_le32(apic_id); - core->proximity_domain = cpu_to_le32(node_id); - core->flags = cpu_to_le32(1); + /* + * ACPI spec, Revision 4.0 + * 5.2.16.3 Processor Local x2APIC Affinity Structure + */ + build_append_int_noprefix(table_data, 2, 1); /* Type */ + build_append_int_noprefix(table_data, 24, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + /* Proximity Domain */ + build_append_int_noprefix(table_data, node_id, 4); + build_append_int_noprefix(table_data, apic_id, 4); /* X2APIC ID */ + /* Flags, Table 5-39 */ + build_append_int_noprefix(table_data, 1 /* Enabled */, 4); + build_append_int_noprefix(table_data, 0, 4); /* Clock Domain */ + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ } } - /* the memory map is a bit tricky, it contains at least one hole * from 640k-1M and possibly another one from 3.5G-4G. */ next_base = 0; - numa_start = table_data->len; + numa_mem_start = table_data->len; - for (i = 1; i < pcms->numa_nodes + 1; ++i) { + for (i = 1; i < nb_numa_nodes + 1; ++i) { mem_base = next_base; - mem_len = pcms->node_mem[i - 1]; + mem_len = numa_info[i - 1].node_mem; next_base = mem_base + mem_len; /* Cut out the 640K hole */ @@ -1921,8 +2032,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) next_base > HOLE_640K_START) { mem_len -= next_base - HOLE_640K_START; if (mem_len > 0) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, + build_srat_memory(table_data, mem_base, mem_len, i - 1, MEM_AFFINITY_ENABLED); } @@ -1940,8 +2050,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) next_base > x86ms->below_4g_mem_size) { mem_len -= next_base - x86ms->below_4g_mem_size; if (mem_len > 0) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, + build_srat_memory(table_data, mem_base, mem_len, i - 1, MEM_AFFINITY_ENABLED); } mem_base = 1ULL << 32; @@ -1950,8 +2059,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } if (mem_len > 0) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, + build_srat_memory(table_data, mem_base, mem_len, i - 1, MEM_AFFINITY_ENABLED); } } @@ -1960,10 +2068,15 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) nvdimm_build_srat(table_data); } - slots = (table_data->len - numa_start) / sizeof *numamem; - for (; slots < pcms->numa_nodes + 2; slots++) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS); + /* + * TODO: this part is not in ACPI spec and current linux kernel boots fine + * without these entries. But I recall there were issues the last time I + * tried to remove it with some ancient guest OS, however I can't remember + * what that was so keep this around for now + */ + slots = (table_data->len - numa_mem_start) / 40 /* mem affinity len */; + for (; slots < nb_numa_nodes + 2; slots++) { + build_srat_memory(table_data, 0, 0, 0, MEM_AFFINITY_NOFLAGS); } /* @@ -1974,77 +2087,144 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) * Memory devices may override proximity set by this entry, * providing _PXM method if necessary. */ - if (hotplugabble_address_space_size) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, machine->device_memory->base, - hotplugabble_address_space_size, pcms->numa_nodes - 1, + if (hotpluggable_address_space_size) { + build_srat_memory(table_data, machine->device_memory->base, + hotpluggable_address_space_size, nb_numa_nodes - 1, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); } - build_header(linker, table_data, - (void *)(table_data->data + srat_start), - "SRAT", - table_data->len - srat_start, 1, x86ms->oem_id, - x86ms->oem_table_id); + acpi_table_end(linker, &table); } /* - * VT-d spec 8.1 DMA Remapping Reporting Structure - * (version Oct. 2014 or later) + * Insert DMAR scope for PCI bridges and endpoint devcie + */ +static void +insert_scope(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + const size_t device_scope_size = 6 /* device scope structure */ + + 2 /* 1 path entry */; + GArray *scope_blob = opaque; + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + /* Dmar Scope Type: 0x02 for PCI Bridge */ + build_append_int_noprefix(scope_blob, 0x02, 1); + } else { + /* Dmar Scope Type: 0x01 for PCI Endpoint Device */ + build_append_int_noprefix(scope_blob, 0x01, 1); + } + + /* length */ + build_append_int_noprefix(scope_blob, device_scope_size, 1); + /* reserved */ + build_append_int_noprefix(scope_blob, 0, 2); + /* enumeration_id */ + build_append_int_noprefix(scope_blob, 0, 1); + /* bus */ + build_append_int_noprefix(scope_blob, pci_bus_num(bus), 1); + /* device */ + build_append_int_noprefix(scope_blob, PCI_SLOT(dev->devfn), 1); + /* function */ + build_append_int_noprefix(scope_blob, PCI_FUNC(dev->devfn), 1); +} + +/* For a given PCI host bridge, walk and insert DMAR scope */ +static int +dmar_host_bridges(Object *obj, void *opaque) +{ + GArray *scope_blob = opaque; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + pci_for_each_device_under_bus(bus, insert_scope, scope_blob); + } + } + + return 0; +} + +/* + * Intel ® Virtualization Technology for Directed I/O + * Architecture Specification. Revision 3.3 + * 8.1 DMA Remapping Reporting Structure */ static void build_dmar_q35(GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { - int dmar_start = table_data->len; - - AcpiTableDmar *dmar; - AcpiDmarHardwareUnit *drhd; - AcpiDmarRootPortATS *atsr; uint8_t dmar_flags = 0; + uint8_t rsvd10[10] = {}; + /* Root complex IOAPIC uses one path only */ + const size_t ioapic_scope_size = 6 /* device scope structure */ + + 2 /* 1 path entry */; X86IOMMUState *iommu = x86_iommu_get_default(); - AcpiDmarDeviceScope *scope = NULL; - /* Root complex IOAPIC use one path[0] only */ - size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]); IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu); + GArray *scope_blob = g_array_new(false, true, 1); + + AcpiTable table = { .sig = "DMAR", .rev = 1, .oem_id = oem_id, + .oem_table_id = oem_table_id }; + + /* + * A PCI bus walk, for each PCI host bridge. + * Insert scope for each PCI bridge and endpoint device which + * is attached to a bus with iommu enabled. + */ + object_child_foreach_recursive(object_get_root(), + dmar_host_bridges, scope_blob); assert(iommu); if (x86_iommu_ir_supported(iommu)) { dmar_flags |= 0x1; /* Flags: 0x1: INT_REMAP */ } - dmar = acpi_data_push(table_data, sizeof(*dmar)); - dmar->host_address_width = intel_iommu->aw_bits - 1; - dmar->flags = dmar_flags; + acpi_table_begin(&table, table_data); + /* Host Address Width */ + build_append_int_noprefix(table_data, intel_iommu->aw_bits - 1, 1); + build_append_int_noprefix(table_data, dmar_flags, 1); /* Flags */ + g_array_append_vals(table_data, rsvd10, sizeof(rsvd10)); /* Reserved */ - /* DMAR Remapping Hardware Unit Definition structure */ - drhd = acpi_data_push(table_data, sizeof(*drhd) + ioapic_scope_size); - drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); - drhd->length = cpu_to_le16(sizeof(*drhd) + ioapic_scope_size); - drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; - drhd->pci_segment = cpu_to_le16(0); - drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + /* 8.3 DMAR Remapping Hardware Unit Definition structure */ + build_append_int_noprefix(table_data, 0, 2); /* Type */ + /* Length */ + build_append_int_noprefix(table_data, + 16 + ioapic_scope_size + scope_blob->len, 2); + /* Flags */ + build_append_int_noprefix(table_data, 0 /* Don't include all pci device */ , + 1); + build_append_int_noprefix(table_data, 0 , 1); /* Reserved */ + build_append_int_noprefix(table_data, 0 , 2); /* Segment Number */ + /* Register Base Address */ + build_append_int_noprefix(table_data, Q35_HOST_BRIDGE_IOMMU_ADDR , 8); /* Scope definition for the root-complex IOAPIC. See VT-d spec * 8.3.1 (version Oct. 2014 or later). */ - scope = &drhd->scope[0]; - scope->entry_type = 0x03; /* Type: 0x03 for IOAPIC */ - scope->length = ioapic_scope_size; - scope->enumeration_id = ACPI_BUILD_IOAPIC_ID; - scope->bus = Q35_PSEUDO_BUS_PLATFORM; - scope->path[0].device = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC); - scope->path[0].function = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC); + build_append_int_noprefix(table_data, 0x03 /* IOAPIC */, 1); /* Type */ + build_append_int_noprefix(table_data, ioapic_scope_size, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + /* Enumeration ID */ + build_append_int_noprefix(table_data, ACPI_BUILD_IOAPIC_ID, 1); + /* Start Bus Number */ + build_append_int_noprefix(table_data, Q35_PSEUDO_BUS_PLATFORM, 1); + /* Path, {Device, Function} pair */ + build_append_int_noprefix(table_data, PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC), 1); + build_append_int_noprefix(table_data, PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC), 1); + + /* Add scope found above */ + g_array_append_vals(table_data, scope_blob->data, scope_blob->len); + g_array_free(scope_blob, true); if (iommu->dt_supported) { - atsr = acpi_data_push(table_data, sizeof(*atsr)); - atsr->type = cpu_to_le16(ACPI_DMAR_TYPE_ATSR); - atsr->length = cpu_to_le16(sizeof(*atsr)); - atsr->flags = ACPI_DMAR_ATSR_ALL_PORTS; - atsr->pci_segment = cpu_to_le16(0); + /* 8.5 Root Port ATS Capability Reporting Structure */ + build_append_int_noprefix(table_data, 2, 2); /* Type */ + build_append_int_noprefix(table_data, 8, 2); /* Length */ + build_append_int_noprefix(table_data, 1 /* ALL_PORTS */, 1); /* Flags */ + build_append_int_noprefix(table_data, 0, 1); /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); /* Segment Number */ } - build_header(linker, table_data, (void *)(table_data->data + dmar_start), - "DMAR", table_data->len - dmar_start, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } /* @@ -2058,10 +2238,10 @@ static void build_waet(GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { - int waet_start = table_data->len; + AcpiTable table = { .sig = "WAET", .rev = 1, .oem_id = oem_id, + .oem_table_id = oem_table_id }; - /* WAET header */ - acpi_data_push(table_data, sizeof(AcpiTableHeader)); + acpi_table_begin(&table, table_data); /* * Set "ACPI PM timer good" flag. * @@ -2070,9 +2250,7 @@ build_waet(GArray *table_data, BIOSLinker *linker, const char *oem_id, * Which avoids costly VMExits caused by guest re-reading it unnecessarily. */ build_append_int_noprefix(table_data, 1 << 1 /* ACPI PM timer good */, 4); - - build_header(linker, table_data, (void *)(table_data->data + waet_start), - "WAET", table_data->len - waet_start, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } /* @@ -2165,8 +2343,8 @@ ivrs_host_bridges(Object *obj, void *opaque) if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; - if (bus) { - pci_for_each_device(bus, pci_bus_num(bus), insert_ivhd, ivhd_blob); + if (bus && !pci_bus_bypass_iommu(bus)) { + pci_for_each_device_under_bus(bus, insert_ivhd, ivhd_blob); } } @@ -2178,12 +2356,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { int ivhd_table_len = 24; - int iommu_start = table_data->len; AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default()); GArray *ivhd_blob = g_array_new(false, true, 1); + AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id, + .oem_table_id = oem_table_id }; - /* IVRS header */ - acpi_data_push(table_data, sizeof(AcpiTableHeader)); + acpi_table_begin(&table, table_data); /* IVinfo - IO virtualization information common to all * IOMMU units in a system */ @@ -2268,10 +2446,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, 0x48, /* special device */ 8); } - - build_header(linker, table_data, (void *)(table_data->data + iommu_start), - "IVRS", table_data->len - iommu_start, 1, oem_id, - oem_table_id); + acpi_table_end(linker, &table); } typedef @@ -2291,7 +2466,9 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) QObject *o; pci_host = acpi_get_i386_pci_host(); - g_assert(pci_host); + if (!pci_host) { + return false; + } o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL); if (!o) { @@ -2316,12 +2493,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(machine); + DeviceState *iommu = pcms->iommu; GArray *table_offsets; unsigned facs, dsdt, rsdt, fadt; AcpiPmInfo pm; AcpiMiscInfo misc; AcpiMcfgInfo mcfg; - Range pci_hole, pci_hole64; + Range pci_hole = {}, pci_hole64 = {}; uint8_t *u; size_t aml_len = 0; GArray *tables_blob = tables->table_data; @@ -2400,6 +2578,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) build_hpet(tables_blob, tables->linker, x86ms->oem_id, x86ms->oem_table_id); } +#ifdef CONFIG_TPM if (misc.tpm_version != TPM_VERSION_UNSPEC) { if (misc.tpm_version == TPM_VERSION_1_2) { acpi_add_table(table_offsets, tables_blob); @@ -2411,7 +2590,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) x86ms->oem_id, x86ms->oem_table_id); } } - if (pcms->numa_nodes) { +#endif + if (machine->numa_state->num_nodes) { acpi_add_table(table_offsets, tables_blob); build_srat(tables_blob, tables->linker, machine); if (machine->numa_state->have_numa_distance) { @@ -2430,17 +2610,20 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) build_mcfg(tables_blob, tables->linker, &mcfg, x86ms->oem_id, x86ms->oem_table_id); } - if (x86_iommu_get_default()) { - IommuType IOMMUType = x86_iommu_get_type(); - if (IOMMUType == TYPE_AMD) { - acpi_add_table(table_offsets, tables_blob); - build_amd_iommu(tables_blob, tables->linker, x86ms->oem_id, - x86ms->oem_table_id); - } else if (IOMMUType == TYPE_INTEL) { - acpi_add_table(table_offsets, tables_blob); - build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id, - x86ms->oem_table_id); - } + if (object_dynamic_cast(OBJECT(iommu), TYPE_AMD_IOMMU_DEVICE)) { + acpi_add_table(table_offsets, tables_blob); + build_amd_iommu(tables_blob, tables->linker, x86ms->oem_id, + x86ms->oem_table_id); + } else if (object_dynamic_cast(OBJECT(iommu), TYPE_INTEL_IOMMU_DEVICE)) { + acpi_add_table(table_offsets, tables_blob); + build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id, + x86ms->oem_table_id); + } else if (object_dynamic_cast(OBJECT(iommu), TYPE_VIRTIO_IOMMU_PCI)) { + PCIDevice *pdev = PCI_DEVICE(iommu); + + acpi_add_table(table_offsets, tables_blob); + build_viot(machine, tables_blob, tables->linker, pci_get_bdf(pdev), + x86ms->oem_id, x86ms->oem_table_id); } if (machine->nvdimms_state->is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, @@ -2602,8 +2785,10 @@ void acpi_setup(void) AcpiBuildTables tables; AcpiBuildState *build_state; Object *vmgenid_dev; +#ifdef CONFIG_TPM TPMIf *tpm; static FwCfgTPMConfig tpm_config; +#endif if (!x86ms->fw_cfg) { ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); @@ -2635,6 +2820,7 @@ void acpi_setup(void) acpi_add_rom_blob(acpi_build_update, build_state, tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE); +#ifdef CONFIG_TPM fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); @@ -2648,6 +2834,7 @@ void acpi_setup(void) fw_cfg_add_file(x86ms->fw_cfg, "etc/tpm/config", &tpm_config, sizeof tpm_config); } +#endif vmgenid_dev = find_vmgenid_dev(); if (vmgenid_dev) { diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h index 74df5fc6128..0dce155c8cc 100644 --- a/hw/i386/acpi-build.h +++ b/hw/i386/acpi-build.h @@ -5,6 +5,11 @@ extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio; +/* PCI Hot-plug registers bases. See docs/spec/acpi_pci_hotplug.txt */ +#define ACPI_PCIHP_SEJ_BASE 0x8 +#define ACPI_PCIHP_BNMR_BASE 0x10 + void acpi_setup(void); +Object *acpi_get_i386_pci_host(void); #endif diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c index 1f5947fcf96..4aaafbdd7b5 100644 --- a/hw/i386/acpi-common.c +++ b/hw/i386/acpi-common.c @@ -34,9 +34,13 @@ #include "acpi-common.h" void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *apic_ids, GArray *entry) + const CPUArchIdList *apic_ids, GArray *entry, + bool force_enabled) { uint32_t apic_id = apic_ids->cpus[uid].arch_id; + /* Flags – Local APIC Flags */ + uint32_t flags = apic_ids->cpus[uid].cpu != NULL || force_enabled ? + 1 /* Enabled */ : 0; /* ACPI spec says that LAPIC entry for non present * CPU may be omitted from MADT or it must be marked @@ -45,82 +49,84 @@ void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, * should be put in MADT but kept disabled. */ if (apic_id < 255) { - AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic); - - apic->type = ACPI_APIC_PROCESSOR; - apic->length = sizeof(*apic); - apic->processor_id = uid; - apic->local_apic_id = apic_id; - if (apic_ids->cpus[uid].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - apic->flags = cpu_to_le32(0); - } + /* Rev 1.0b, Table 5-13 Processor Local APIC Structure */ + build_append_int_noprefix(entry, 0, 1); /* Type */ + build_append_int_noprefix(entry, 8, 1); /* Length */ + build_append_int_noprefix(entry, uid, 1); /* ACPI Processor ID */ + build_append_int_noprefix(entry, apic_id, 1); /* APIC ID */ + build_append_int_noprefix(entry, flags, 4); /* Flags */ } else { - AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic); - - apic->type = ACPI_APIC_LOCAL_X2APIC; - apic->length = sizeof(*apic); - apic->uid = cpu_to_le32(uid); - apic->x2apic_id = cpu_to_le32(apic_id); - if (apic_ids->cpus[uid].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - apic->flags = cpu_to_le32(0); - } + /* Rev 4.0, 5.2.12.12 Processor Local x2APIC Structure */ + build_append_int_noprefix(entry, 9, 1); /* Type */ + build_append_int_noprefix(entry, 16, 1); /* Length */ + build_append_int_noprefix(entry, 0, 2); /* Reserved */ + build_append_int_noprefix(entry, apic_id, 4); /* X2APIC ID */ + build_append_int_noprefix(entry, flags, 4); /* Flags */ + build_append_int_noprefix(entry, uid, 4); /* ACPI Processor UID */ } } +static void build_ioapic(GArray *entry, uint8_t id, uint32_t addr, uint32_t irq) +{ + /* Rev 1.0b, 5.2.8.2 IO APIC */ + build_append_int_noprefix(entry, 1, 1); /* Type */ + build_append_int_noprefix(entry, 12, 1); /* Length */ + build_append_int_noprefix(entry, id, 1); /* IO APIC ID */ + build_append_int_noprefix(entry, 0, 1); /* Reserved */ + build_append_int_noprefix(entry, addr, 4); /* IO APIC Address */ + build_append_int_noprefix(entry, irq, 4); /* System Vector Base */ +} + +static void +build_xrupt_override(GArray *entry, uint8_t src, uint32_t gsi, uint16_t flags) +{ + /* Rev 1.0b, 5.2.8.3.1 Interrupt Source Overrides */ + build_append_int_noprefix(entry, 2, 1); /* Type */ + build_append_int_noprefix(entry, 10, 1); /* Length */ + build_append_int_noprefix(entry, 0, 1); /* Bus */ + build_append_int_noprefix(entry, src, 1); /* Source */ + /* Global System Interrupt Vector */ + build_append_int_noprefix(entry, gsi, 4); + build_append_int_noprefix(entry, flags, 2); /* Flags */ +} + +/* + * ACPI spec, Revision 1.0b + * 5.2.8 Multiple APIC Description Table + */ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, X86MachineState *x86ms, AcpiDeviceIf *adev, const char *oem_id, const char *oem_table_id) { + int i; + bool x2apic_mode = false; MachineClass *mc = MACHINE_GET_CLASS(x86ms); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms)); - int madt_start = table_data->len; AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev); - bool x2apic_mode = false; + AcpiTable table = { .sig = "APIC", .rev = 1, .oem_id = oem_id, + .oem_table_id = oem_table_id }; - AcpiMultipleApicTable *madt; - AcpiMadtIoApic *io_apic; - AcpiMadtIntsrcovr *intsrcovr; - int i; - - madt = acpi_data_push(table_data, sizeof *madt); - madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); - madt->flags = cpu_to_le32(1); + acpi_table_begin(&table, table_data); + /* Local APIC Address */ + build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); + build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ for (i = 0; i < apic_ids->len; i++) { - adevc->madt_cpu(adev, i, apic_ids, table_data); + adevc->madt_cpu(adev, i, apic_ids, table_data, false); if (apic_ids->cpus[i].arch_id > 254) { x2apic_mode = true; } } - io_apic = acpi_data_push(table_data, sizeof *io_apic); - io_apic->type = ACPI_APIC_IO; - io_apic->length = sizeof(*io_apic); - io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; - io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); - io_apic->interrupt = cpu_to_le32(0); - + build_ioapic(table_data, ACPI_BUILD_IOAPIC_ID, IO_APIC_DEFAULT_ADDRESS, 0); if (x86ms->ioapic2) { - AcpiMadtIoApic *io_apic2; - io_apic2 = acpi_data_push(table_data, sizeof *io_apic); - io_apic2->type = ACPI_APIC_IO; - io_apic2->length = sizeof(*io_apic); - io_apic2->io_apic_id = ACPI_BUILD_IOAPIC_ID + 1; - io_apic2->address = cpu_to_le32(IO_APIC_SECONDARY_ADDRESS); - io_apic2->interrupt = cpu_to_le32(IO_APIC_SECONDARY_IRQBASE); + build_ioapic(table_data, ACPI_BUILD_IOAPIC_ID + 1, + IO_APIC_SECONDARY_ADDRESS, IO_APIC_SECONDARY_IRQBASE); } if (x86ms->apic_xrupt_override) { - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = 0; - intsrcovr->gsi = cpu_to_le32(2); - intsrcovr->flags = cpu_to_le16(0); /* conforms to bus specifications */ + build_xrupt_override(table_data, 0, 2, + 0 /* Flags: Conforms to the specifications of the bus */); } for (i = 1; i < 16; i++) { @@ -128,36 +134,32 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, /* No need for a INT source override structure. */ continue; } - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = i; - intsrcovr->gsi = cpu_to_le32(i); - intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */ + build_xrupt_override(table_data, i, i, + 0xd /* Flags: Active high, Level Triggered */); } if (x2apic_mode) { - AcpiMadtLocalX2ApicNmi *local_nmi; - - local_nmi = acpi_data_push(table_data, sizeof *local_nmi); - local_nmi->type = ACPI_APIC_LOCAL_X2APIC_NMI; - local_nmi->length = sizeof(*local_nmi); - local_nmi->uid = 0xFFFFFFFF; /* all processors */ - local_nmi->flags = cpu_to_le16(0); - local_nmi->lint = 1; /* ACPI_LINT1 */ + /* Rev 4.0, 5.2.12.13 Local x2APIC NMI Structure*/ + build_append_int_noprefix(table_data, 0xA, 1); /* Type */ + build_append_int_noprefix(table_data, 12, 1); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Flags */ + /* ACPI Processor UID */ + build_append_int_noprefix(table_data, 0xFFFFFFFF /* all processors */, + 4); + /* Local x2APIC LINT# */ + build_append_int_noprefix(table_data, 1 /* ACPI_LINT1 */, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ } else { - AcpiMadtLocalNmi *local_nmi; - - local_nmi = acpi_data_push(table_data, sizeof *local_nmi); - local_nmi->type = ACPI_APIC_LOCAL_NMI; - local_nmi->length = sizeof(*local_nmi); - local_nmi->processor_id = 0xff; /* all processors */ - local_nmi->flags = cpu_to_le16(0); - local_nmi->lint = 1; /* ACPI_LINT1 */ + /* Rev 1.0b, 5.2.8.3.3 Local APIC NMI */ + build_append_int_noprefix(table_data, 4, 1); /* Type */ + build_append_int_noprefix(table_data, 6, 1); /* Length */ + /* ACPI Processor ID */ + build_append_int_noprefix(table_data, 0xFF /* all processors */, 1); + build_append_int_noprefix(table_data, 0, 2); /* Flags */ + /* Local APIC INTI# */ + build_append_int_noprefix(table_data, 1 /* ACPI_LINT1 */, 1); } - build_header(linker, table_data, - (void *)(table_data->data + madt_start), "APIC", - table_data->len - madt_start, 1, oem_id, oem_table_id); + acpi_table_end(linker, &table); } diff --git a/hw/i386/acpi-common.h b/hw/i386/acpi-common.h index b12cd73ea5d..a68825acf50 100644 --- a/hw/i386/acpi-common.h +++ b/hw/i386/acpi-common.h @@ -1,9 +1,9 @@ #ifndef HW_I386_ACPI_COMMON_H #define HW_I386_ACPI_COMMON_H -#include "include/hw/acpi/acpi_dev_interface.h" -#include "include/hw/acpi/bios-linker-loader.h" -#include "include/hw/i386/x86.h" +#include "hw/acpi/acpi_dev_interface.h" +#include "hw/acpi/bios-linker-loader.h" +#include "hw/i386/x86.h" /* Default IOAPIC ID */ #define ACPI_BUILD_IOAPIC_ID 0x0 diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index ccd3303aaca..196d3184995 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -30,7 +30,6 @@ #include "hw/acpi/bios-linker-loader.h" #include "hw/acpi/generic_event_device.h" #include "hw/acpi/utils.h" -#include "hw/boards.h" #include "hw/i386/fw_cfg.h" #include "hw/i386/microvm.h" #include "hw/pci/pci.h" @@ -114,16 +113,16 @@ build_dsdt_microvm(GArray *table_data, BIOSLinker *linker, Aml *dsdt, *sb_scope, *scope, *pkg; bool ambiguous; Object *isabus; + AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = x86ms->oem_id, + .oem_table_id = x86ms->oem_table_id }; isabus = object_resolve_path_type("", TYPE_ISA_BUS, &ambiguous); assert(isabus); assert(!ambiguous); + acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); - /* Reserve space for header */ - acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader)); - sb_scope = aml_scope("_SB"); fw_cfg_add_acpi_dsdt(sb_scope, x86ms->fw_cfg); isa_build_aml(ISA_BUS(isabus), sb_scope); @@ -145,11 +144,10 @@ build_dsdt_microvm(GArray *table_data, BIOSLinker *linker, aml_append(scope, aml_name_decl("_S5", pkg)); aml_append(dsdt, scope); - /* copy AML table into ACPI tables blob and patch header there */ + /* copy AML bytecode into ACPI tables blob */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); - build_header(linker, table_data, - (void *)(table_data->data + table_data->len - dsdt->buf->len), - "DSDT", dsdt->buf->len, 2, x86ms->oem_id, x86ms->oem_table_id); + + acpi_table_end(linker, &table); free_aml_allocator(); } diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 74a93a5d93f..91fe34ae589 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -99,7 +99,7 @@ static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr) } /* internal write */ -static void amdvi_writeq_raw(AMDVIState *s, uint64_t val, hwaddr addr) +static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val) { stq_le_p(&s->mmior[addr], val); } @@ -382,7 +382,7 @@ static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd) } /* set completion interrupt */ if (extract64(cmd[0], 1, 1)) { - amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); + amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); /* generate interrupt */ amdvi_generate_msi_interrupt(s); } @@ -553,7 +553,7 @@ static void amdvi_cmdbuf_run(AMDVIState *s) trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf); amdvi_cmdbuf_exec(s); s->cmdbuf_head += AMDVI_COMMAND_SIZE; - amdvi_writeq_raw(s, s->cmdbuf_head, AMDVI_MMIO_COMMAND_HEAD); + amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head); /* wrap head pointer */ if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) { @@ -860,8 +860,8 @@ static inline uint8_t get_pte_translation_mode(uint64_t pte) static inline uint64_t pte_override_page_mask(uint64_t pte) { - uint8_t page_mask = 12; - uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) ^ AMDVI_DEV_PT_ROOT_MASK; + uint8_t page_mask = 13; + uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12; /* find the first zero bit */ while (addr & 1) { page_mask++; @@ -1526,7 +1526,7 @@ static void amdvi_init(AMDVIState *s) AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); } -static void amdvi_reset(DeviceState *dev) +static void amdvi_sysbus_reset(DeviceState *dev) { AMDVIState *s = AMD_IOMMU_DEVICE(dev); @@ -1534,11 +1534,10 @@ static void amdvi_reset(DeviceState *dev) amdvi_init(s); } -static void amdvi_realize(DeviceState *dev, Error **errp) +static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) { int ret = 0; AMDVIState *s = AMD_IOMMU_DEVICE(dev); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(ms); X86MachineState *x86ms = X86_MACHINE(ms); @@ -1548,7 +1547,6 @@ static void amdvi_realize(DeviceState *dev, Error **errp) amdvi_uint64_equal, g_free, g_free); /* This device should take care of IOMMU PCI properties */ - x86_iommu->type = TYPE_AMD; if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { return; } @@ -1585,27 +1583,27 @@ static void amdvi_realize(DeviceState *dev, Error **errp) amdvi_init(s); } -static const VMStateDescription vmstate_amdvi = { +static const VMStateDescription vmstate_amdvi_sysbus = { .name = "amd-iommu", .unmigratable = 1 }; -static void amdvi_instance_init(Object *klass) +static void amdvi_sysbus_instance_init(Object *klass) { AMDVIState *s = AMD_IOMMU_DEVICE(klass); object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); } -static void amdvi_class_init(ObjectClass *klass, void* data) +static void amdvi_sysbus_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass); - dc->reset = amdvi_reset; - dc->vmsd = &vmstate_amdvi; + dc->reset = amdvi_sysbus_reset; + dc->vmsd = &vmstate_amdvi_sysbus; dc->hotpluggable = false; - dc_class->realize = amdvi_realize; + dc_class->realize = amdvi_sysbus_realize; dc_class->int_remap = amdvi_int_remap; /* Supported by the pc-q35-* machine types */ dc->user_creatable = true; @@ -1613,18 +1611,27 @@ static void amdvi_class_init(ObjectClass *klass, void* data) dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; } -static const TypeInfo amdvi = { +static const TypeInfo amdvi_sysbus = { .name = TYPE_AMD_IOMMU_DEVICE, .parent = TYPE_X86_IOMMU_DEVICE, .instance_size = sizeof(AMDVIState), - .instance_init = amdvi_instance_init, - .class_init = amdvi_class_init + .instance_init = amdvi_sysbus_instance_init, + .class_init = amdvi_sysbus_class_init }; -static const TypeInfo amdviPCI = { +static void amdvi_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; +} + +static const TypeInfo amdvi_pci = { .name = TYPE_AMD_IOMMU_PCI, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(AMDVIPCIState), + .class_init = amdvi_pci_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { }, @@ -1645,11 +1652,11 @@ static const TypeInfo amdvi_iommu_memory_region_info = { .class_init = amdvi_iommu_memory_region_class_init, }; -static void amdviPCI_register_types(void) +static void amdvi_register_types(void) { - type_register_static(&amdviPCI); - type_register_static(&amdvi); + type_register_static(&amdvi_pci); + type_register_static(&amdvi_sysbus); type_register_static(&amdvi_iommu_memory_region_info); } -type_init(amdviPCI_register_types); +type_init(amdvi_register_types); diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index e48a54fa364..a283785a8de 100644 --- a/hw/i386/fw_cfg.c +++ b/hw/i386/fw_cfg.c @@ -22,6 +22,7 @@ #include "hw/nvram/fw_cfg.h" #include "e820_memory_layout.h" #include "kvm/kvm_i386.h" +#include "qapi/error.h" #include CONFIG_DEVICES struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; @@ -78,7 +79,8 @@ void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg) } smbios_get_tables(ms, mem_array, array_count, &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); + &smbios_anchor, &smbios_anchor_len, + &error_fatal); g_free(mem_array); if (smbios_anchor) { @@ -157,7 +159,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg) { X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); CPUX86State *env = &cpu->env; - uint32_t unused, ecx, edx; + uint32_t unused, ebx, ecx, edx; uint64_t feature_control_bits = 0; uint64_t *val; @@ -172,6 +174,16 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg) feature_control_bits |= FEATURE_CONTROL_LMCE; } + if (env->cpuid_level >= 7) { + cpu_x86_cpuid(env, 0x7, 0, &unused, &ebx, &ecx, &unused); + if (ebx & CPUID_7_0_EBX_SGX) { + feature_control_bits |= FEATURE_CONTROL_SGX; + } + if (ecx & CPUID_7_0_ECX_SGX_LC) { + feature_control_bits |= FEATURE_CONTROL_SGX_LC; + } + } + if (!feature_control_bits) { return; } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6be8f329185..f584449d8d1 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -24,14 +24,12 @@ #include "qemu/main-loop.h" #include "qapi/error.h" #include "hw/sysbus.h" -#include "exec/address-spaces.h" #include "intel_iommu_internal.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/i386/pc.h" #include "hw/i386/apic-msidef.h" -#include "hw/boards.h" #include "hw/i386/x86-iommu.h" #include "hw/pci-host/q35.h" #include "sysemu/kvm.h" @@ -681,7 +679,7 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu, } break; default: - /* Unknwon type */ + /* Unknown type */ return false; } return true; @@ -694,7 +692,7 @@ static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) /** * Caller of this function should check present bit if wants - * to use pdir entry for futher usage except for fpd bit check. + * to use pdir entry for further usage except for fpd bit check. */ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid, @@ -748,7 +746,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, /** * Caller of this function should check present bit if wants - * to use pasid entry for futher usage except for fpd bit check. + * to use pasid entry for further usage except for fpd bit check. */ static int vtd_get_pe_from_pdire(IntelIOMMUState *s, uint32_t pasid, @@ -1107,7 +1105,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) .translated_addr = entry->translated_addr, .perm = entry->perm, }; - DMAMap *mapped = iova_tree_find(as->iova_tree, &target); + const DMAMap *mapped = iova_tree_find(as->iova_tree, &target); if (event->type == IOMMU_NOTIFIER_UNMAP && !info->notify_unmap) { trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); @@ -1509,7 +1507,7 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) } /* - * Check if specific device is configed to bypass address + * Check if specific device is configured to bypass address * translation for DMA requests. In Scalable Mode, bypass * 1st-level translation or 2nd-level translation, it depends * on PGTT setting. @@ -3631,6 +3629,12 @@ static void vtd_init(IntelIOMMUState *s) vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, x86_iommu->dt_supported); + if (s->scalable_mode) { + vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; + } + if (x86_iommu_ir_supported(x86_iommu)) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { @@ -3808,9 +3812,6 @@ static void vtd_realize(DeviceState *dev, Error **errp) X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); - - x86_iommu->type = TYPE_INTEL; if (!vtd_decide_config(s, errp)) { return; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 3d5487fe2ce..a6c788049ba 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -388,6 +388,8 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8 /* Rsvd field masks for spte */ +#define VTD_SPTE_SNP 0x800ULL + #define VTD_SPTE_PAGE_L1_RSVD_MASK(aw, dt_supported) \ dt_supported ? \ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 3dbff2be2e2..1e89ca0899c 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qemu/module.h" -#include "cpu.h" #include "hw/i386/apic_internal.h" #include "hw/pci/msi.h" #include "sysemu/hw_accel.h" @@ -146,7 +145,7 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data) ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_LAPIC, &kapic); if (ret < 0) { - fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(-ret)); abort(); } } diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 51872dd84c0..df70b4a0338 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -14,7 +14,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "qemu/host-utils.h" #include "qemu/module.h" #include "sysemu/kvm.h" @@ -106,7 +105,7 @@ static void kvm_update_clock(KVMClockState *s) ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); if (ret < 0) { - fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(-ret)); abort(); } s->clock = data.clock; @@ -190,7 +189,7 @@ static void kvmclock_vm_state_change(void *opaque, bool running, data.clock = s->clock; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { - fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(-ret)); abort(); } diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c index c558893961b..191a26fa57e 100644 --- a/hw/i386/kvm/i8254.c +++ b/hw/i386/kvm/i8254.c @@ -59,11 +59,6 @@ struct KVMPITClass { DeviceRealize parent_realize; }; -static int64_t abs64(int64_t v) -{ - return v < 0 ? -v : v; -} - static void kvm_pit_update_clock_offset(KVMPITState *s) { int64_t offset, clock_offset; @@ -81,7 +76,7 @@ static void kvm_pit_update_clock_offset(KVMPITState *s) clock_gettime(CLOCK_MONOTONIC, &ts); offset -= ts.tv_nsec; offset -= (int64_t)ts.tv_sec * 1000000000; - if (abs64(offset) < abs64(clock_offset)) { + if (uabs64(offset) < uabs64(clock_offset)) { clock_offset = offset; } } @@ -104,7 +99,7 @@ static void kvm_pit_get(PITCommonState *pit) if (kvm_has_pit_state2()) { ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, &kpit); if (ret < 0) { - fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(-ret)); abort(); } pit->channels[0].irq_disabled = kpit.flags & KVM_PIT_FLAGS_HPET_LEGACY; @@ -115,7 +110,7 @@ static void kvm_pit_get(PITCommonState *pit) */ ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT, &kpit); if (ret < 0) { - fprintf(stderr, "KVM_GET_PIT failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_GET_PIT failed: %s\n", strerror(-ret)); abort(); } } @@ -180,7 +175,7 @@ static void kvm_pit_put(PITCommonState *pit) if (ret < 0) { fprintf(stderr, "%s failed: %s\n", kvm_has_pit_state2() ? "KVM_SET_PIT2" : "KVM_SET_PIT", - strerror(ret)); + strerror(-ret)); abort(); } } @@ -272,7 +267,7 @@ static void kvm_pit_realizefn(DeviceState *dev, Error **errp) } if (ret < 0) { error_setg(errp, "Create kernel PIC irqchip failed: %s", - strerror(ret)); + strerror(-ret)); return; } switch (s->lost_tick_policy) { @@ -286,7 +281,7 @@ static void kvm_pit_realizefn(DeviceState *dev, Error **errp) if (ret < 0) { error_setg(errp, "Can't disable in-kernel PIT reinjection: %s", - strerror(ret)); + strerror(-ret)); return; } } diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c index 3f8bf69e9ca..d61bae4dc35 100644 --- a/hw/i386/kvm/i8259.c +++ b/hw/i386/kvm/i8259.c @@ -43,7 +43,7 @@ static void kvm_pic_get(PICCommonState *s) chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); if (ret < 0) { - fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(-ret)); abort(); } @@ -96,7 +96,7 @@ static void kvm_pic_put(PICCommonState *s) ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); if (ret < 0) { - fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(-ret)); abort(); } } diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c index dfc3c980057..ee7c8ef68be 100644 --- a/hw/i386/kvm/ioapic.c +++ b/hw/i386/kvm/ioapic.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "monitor/monitor.h" #include "hw/i386/x86.h" -#include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/i386/ioapic_internal.h" #include "hw/i386/apic_internal.h" @@ -63,7 +62,7 @@ static void kvm_ioapic_get(IOAPICCommonState *s) chip.chip_id = KVM_IRQCHIP_IOAPIC; ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); if (ret < 0) { - fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(-ret)); abort(); } @@ -96,7 +95,7 @@ static void kvm_ioapic_put(IOAPICCommonState *s) ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); if (ret < 0) { - fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(ret)); + fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(-ret)); abort(); } } diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c index 46315445d22..43f8a8f679e 100644 --- a/hw/i386/kvmvapic.c +++ b/hw/i386/kvmvapic.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "qemu/module.h" -#include "cpu.h" #include "sysemu/sysemu.h" #include "sysemu/cpus.h" #include "sysemu/hw_accel.h" diff --git a/hw/i386/meson.build b/hw/i386/meson.build index e5d109f5c64..213e2e82b3d 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -11,11 +11,13 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), if_false: files('x86-iommu-stub.c')) i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c')) i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) -i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c')) +i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c')) +i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), + if_false: files('sgx-stub.c')) i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) i386_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device_x86.c')) @@ -24,6 +26,8 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files( 'pc_sysfw.c', 'acpi-build.c', 'port92.c')) +i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'), + if_false: files('pc_sysfw_ovmf-stubs.c')) subdir('kvm') subdir('xen') diff --git a/hw/i386/microvm-dt.c b/hw/i386/microvm-dt.c new file mode 100644 index 00000000000..9c3c4995b41 --- /dev/null +++ b/hw/i386/microvm-dt.c @@ -0,0 +1,348 @@ +/* + * microvm device tree support + * + * This generates an device tree for microvm and exports it via fw_cfg + * as "etc/fdt" to the firmware (edk2 specifically). + * + * The use case is to allow edk2 find the pcie ecam and the virtio + * devices, without adding an ACPI parser, reusing the fdt parser + * which is needed anyway for the arm platform. + * + * Note 1: The device tree is incomplete. CPUs and memory is missing + * for example, those can be detected using other fw_cfg files. + * Also pci ecam irq routing is not there, edk2 doesn't use + * interrupts. + * + * Note 2: This is for firmware only. OSes should use the more + * complete ACPI tables for hardware discovery. + * + * ---------------------------------------------------------------------- + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "sysemu/device_tree.h" +#include "hw/char/serial.h" +#include "hw/i386/fw_cfg.h" +#include "hw/rtc/mc146818rtc.h" +#include "hw/sysbus.h" +#include "hw/virtio/virtio-mmio.h" +#include "hw/usb/xhci.h" + +#include "microvm-dt.h" + +static bool debug; + +static void dt_add_microvm_irq(MicrovmMachineState *mms, + const char *nodename, uint32_t irq) +{ + int index = 0; + + if (irq >= IO_APIC_SECONDARY_IRQBASE) { + irq -= IO_APIC_SECONDARY_IRQBASE; + index++; + } + + qemu_fdt_setprop_cell(mms->fdt, nodename, "interrupt-parent", + mms->ioapic_phandle[index]); + qemu_fdt_setprop_cells(mms->fdt, nodename, "interrupts", irq, 0); +} + +static void dt_add_virtio(MicrovmMachineState *mms, VirtIOMMIOProxy *mmio) +{ + SysBusDevice *dev = SYS_BUS_DEVICE(mmio); + VirtioBusState *mmio_virtio_bus = &mmio->bus; + BusState *mmio_bus = &mmio_virtio_bus->parent_obj; + char *nodename; + + if (QTAILQ_EMPTY(&mmio_bus->children)) { + return; + } + + hwaddr base = dev->mmio[0].addr; + hwaddr size = 512; + unsigned index = (base - VIRTIO_MMIO_BASE) / size; + uint32_t irq = mms->virtio_irq_base + index; + + nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base); + qemu_fdt_add_subnode(mms->fdt, nodename); + qemu_fdt_setprop_string(mms->fdt, nodename, "compatible", "virtio,mmio"); + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size); + qemu_fdt_setprop(mms->fdt, nodename, "dma-coherent", NULL, 0); + dt_add_microvm_irq(mms, nodename, irq); + g_free(nodename); +} + +static void dt_add_xhci(MicrovmMachineState *mms) +{ + const char compat[] = "generic-xhci"; + uint32_t irq = MICROVM_XHCI_IRQ; + hwaddr base = MICROVM_XHCI_BASE; + hwaddr size = XHCI_LEN_REGS; + char *nodename; + + nodename = g_strdup_printf("/usb@%" PRIx64, base); + qemu_fdt_add_subnode(mms->fdt, nodename); + qemu_fdt_setprop(mms->fdt, nodename, "compatible", compat, sizeof(compat)); + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size); + qemu_fdt_setprop(mms->fdt, nodename, "dma-coherent", NULL, 0); + dt_add_microvm_irq(mms, nodename, irq); + g_free(nodename); +} + +static void dt_add_pcie(MicrovmMachineState *mms) +{ + hwaddr base = PCIE_MMIO_BASE; + int nr_pcie_buses; + char *nodename; + + nodename = g_strdup_printf("/pcie@%" PRIx64, base); + qemu_fdt_add_subnode(mms->fdt, nodename); + qemu_fdt_setprop_string(mms->fdt, nodename, + "compatible", "pci-host-ecam-generic"); + qemu_fdt_setprop_string(mms->fdt, nodename, "device_type", "pci"); + qemu_fdt_setprop_cell(mms->fdt, nodename, "#address-cells", 3); + qemu_fdt_setprop_cell(mms->fdt, nodename, "#size-cells", 2); + qemu_fdt_setprop_cell(mms->fdt, nodename, "linux,pci-domain", 0); + qemu_fdt_setprop(mms->fdt, nodename, "dma-coherent", NULL, 0); + + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", + 2, PCIE_ECAM_BASE, 2, PCIE_ECAM_SIZE); + if (mms->gpex.mmio64.size) { + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "ranges", + + 1, FDT_PCI_RANGE_MMIO, + 2, mms->gpex.mmio32.base, + 2, mms->gpex.mmio32.base, + 2, mms->gpex.mmio32.size, + + 1, FDT_PCI_RANGE_MMIO_64BIT, + 2, mms->gpex.mmio64.base, + 2, mms->gpex.mmio64.base, + 2, mms->gpex.mmio64.size); + } else { + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "ranges", + + 1, FDT_PCI_RANGE_MMIO, + 2, mms->gpex.mmio32.base, + 2, mms->gpex.mmio32.base, + 2, mms->gpex.mmio32.size); + } + + nr_pcie_buses = PCIE_ECAM_SIZE / PCIE_MMCFG_SIZE_MIN; + qemu_fdt_setprop_cells(mms->fdt, nodename, "bus-range", 0, + nr_pcie_buses - 1); + + g_free(nodename); +} + +static void dt_add_ioapic(MicrovmMachineState *mms, SysBusDevice *dev) +{ + hwaddr base = dev->mmio[0].addr; + char *nodename; + uint32_t ph; + int index; + + switch (base) { + case IO_APIC_DEFAULT_ADDRESS: + index = 0; + break; + case IO_APIC_SECONDARY_ADDRESS: + index = 1; + break; + default: + fprintf(stderr, "unknown ioapic @ %" PRIx64 "\n", base); + return; + } + + nodename = g_strdup_printf("/ioapic%d@%" PRIx64, index + 1, base); + qemu_fdt_add_subnode(mms->fdt, nodename); + qemu_fdt_setprop_string(mms->fdt, nodename, + "compatible", "intel,ce4100-ioapic"); + qemu_fdt_setprop(mms->fdt, nodename, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mms->fdt, nodename, "#interrupt-cells", 0x2); + qemu_fdt_setprop_cell(mms->fdt, nodename, "#address-cells", 0x2); + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", + 2, base, 2, 0x1000); + + ph = qemu_fdt_alloc_phandle(mms->fdt); + qemu_fdt_setprop_cell(mms->fdt, nodename, "phandle", ph); + qemu_fdt_setprop_cell(mms->fdt, nodename, "linux,phandle", ph); + mms->ioapic_phandle[index] = ph; + + g_free(nodename); +} + +static void dt_add_isa_serial(MicrovmMachineState *mms, ISADevice *dev) +{ + const char compat[] = "ns16550"; + uint32_t irq = object_property_get_int(OBJECT(dev), "irq", NULL); + hwaddr base = object_property_get_int(OBJECT(dev), "iobase", NULL); + hwaddr size = 8; + char *nodename; + + nodename = g_strdup_printf("/serial@%" PRIx64, base); + qemu_fdt_add_subnode(mms->fdt, nodename); + qemu_fdt_setprop(mms->fdt, nodename, "compatible", compat, sizeof(compat)); + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size); + dt_add_microvm_irq(mms, nodename, irq); + + if (base == 0x3f8 /* com1 */) { + qemu_fdt_setprop_string(mms->fdt, "/chosen", "stdout-path", nodename); + } + + g_free(nodename); +} + +static void dt_add_isa_rtc(MicrovmMachineState *mms, ISADevice *dev) +{ + const char compat[] = "motorola,mc146818"; + uint32_t irq = RTC_ISA_IRQ; + hwaddr base = RTC_ISA_BASE; + hwaddr size = 8; + char *nodename; + + nodename = g_strdup_printf("/rtc@%" PRIx64, base); + qemu_fdt_add_subnode(mms->fdt, nodename); + qemu_fdt_setprop(mms->fdt, nodename, "compatible", compat, sizeof(compat)); + qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size); + dt_add_microvm_irq(mms, nodename, irq); + g_free(nodename); +} + +static void dt_setup_isa_bus(MicrovmMachineState *mms, DeviceState *bridge) +{ + BusState *bus = qdev_get_child_bus(bridge, "isa.0"); + BusChild *kid; + Object *obj; + + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + + /* serial */ + obj = object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL); + if (obj) { + dt_add_isa_serial(mms, ISA_DEVICE(obj)); + continue; + } + + /* rtc */ + obj = object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC); + if (obj) { + dt_add_isa_rtc(mms, ISA_DEVICE(obj)); + continue; + } + + if (debug) { + fprintf(stderr, "%s: unhandled: %s\n", __func__, + object_get_typename(OBJECT(dev))); + } + } +} + +static void dt_setup_sys_bus(MicrovmMachineState *mms) +{ + BusState *bus; + BusChild *kid; + Object *obj; + + /* sysbus devices */ + bus = sysbus_get_default(); + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + + /* ioapic */ + obj = object_dynamic_cast(OBJECT(dev), TYPE_IOAPIC); + if (obj) { + dt_add_ioapic(mms, SYS_BUS_DEVICE(obj)); + continue; + } + } + + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + + /* virtio */ + obj = object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MMIO); + if (obj) { + dt_add_virtio(mms, VIRTIO_MMIO(obj)); + continue; + } + + /* xhci */ + obj = object_dynamic_cast(OBJECT(dev), TYPE_XHCI_SYSBUS); + if (obj) { + dt_add_xhci(mms); + continue; + } + + /* pcie */ + obj = object_dynamic_cast(OBJECT(dev), TYPE_GPEX_HOST); + if (obj) { + dt_add_pcie(mms); + continue; + } + + /* isa */ + obj = object_dynamic_cast(OBJECT(dev), "isabus-bridge"); + if (obj) { + dt_setup_isa_bus(mms, DEVICE(obj)); + continue; + } + + if (debug) { + obj = object_dynamic_cast(OBJECT(dev), TYPE_IOAPIC); + if (obj) { + /* ioapic already added in first pass */ + continue; + } + fprintf(stderr, "%s: unhandled: %s\n", __func__, + object_get_typename(OBJECT(dev))); + } + } +} + +void dt_setup_microvm(MicrovmMachineState *mms) +{ + X86MachineState *x86ms = X86_MACHINE(mms); + int size = 0; + + mms->fdt = create_device_tree(&size); + + /* root node */ + qemu_fdt_setprop_string(mms->fdt, "/", "compatible", "linux,microvm"); + qemu_fdt_setprop_cell(mms->fdt, "/", "#address-cells", 0x2); + qemu_fdt_setprop_cell(mms->fdt, "/", "#size-cells", 0x2); + + qemu_fdt_add_subnode(mms->fdt, "/chosen"); + dt_setup_sys_bus(mms); + + /* add to fw_cfg */ + if (debug) { + fprintf(stderr, "%s: add etc/fdt to fw_cfg\n", __func__); + } + fw_cfg_add_file(x86ms->fw_cfg, "etc/fdt", mms->fdt, size); + + if (debug) { + fprintf(stderr, "%s: writing microvm.fdt\n", __func__); + if (!g_file_set_contents("microvm.fdt", mms->fdt, size, NULL)) { + fprintf(stderr, "%s: writing microvm.fdt failed\n", __func__); + return; + } + int ret = system("dtc -I dtb -O dts microvm.fdt"); + if (ret != 0) { + fprintf(stderr, "%s: oops, dtc not installed?\n", __func__); + } + } +} diff --git a/hw/i386/microvm-dt.h b/hw/i386/microvm-dt.h new file mode 100644 index 00000000000..77c79cbdd9f --- /dev/null +++ b/hw/i386/microvm-dt.h @@ -0,0 +1,8 @@ +#ifndef HW_I386_MICROVM_DT_H +#define HW_I386_MICROVM_DT_H + +#include "hw/i386/microvm.h" + +void dt_setup_microvm(MicrovmMachineState *mms); + +#endif diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index edf2b0f0618..4b3b1dd262f 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -28,6 +28,7 @@ #include "sysemu/reset.h" #include "sysemu/runstate.h" #include "acpi-microvm.h" +#include "microvm-dt.h" #include "hw/loader.h" #include "hw/irq.h" @@ -49,7 +50,6 @@ #include "hw/pci-host/gpex.h" #include "hw/usb/xhci.h" -#include "cpu.h" #include "elf.h" #include "kvm/kvm_i386.h" #include "hw/xen/start_info.h" @@ -332,7 +332,7 @@ static void microvm_memory_init(MicrovmMachineState *mms) rom_set_fw(fw_cfg); if (machine->kernel_filename != NULL) { - x86_load_linux(x86ms, fw_cfg, 0, true, true); + x86_load_linux(x86ms, fw_cfg, 0, true); } if (mms->option_roms) { @@ -459,15 +459,10 @@ static void microvm_machine_state_init(MachineState *machine) { MicrovmMachineState *mms = MICROVM_MACHINE(machine); X86MachineState *x86ms = X86_MACHINE(machine); - Error *local_err = NULL; microvm_memory_init(mms); x86_cpus_init(x86ms, CPU_VERSION_LATEST); - if (local_err) { - error_report_err(local_err); - exit(1); - } microvm_devices_init(mms); } @@ -632,6 +627,7 @@ static void microvm_machine_done(Notifier *notifier, void *data) machine_done); acpi_setup_microvm(mms); + dt_setup_microvm(mms); } static void microvm_powerdown_req(Notifier *notifier, void *data) @@ -673,6 +669,7 @@ static void microvm_machine_initfn(Object *obj) static void microvm_class_init(ObjectClass *oc, void *data) { + X86MachineClass *x86mc = X86_MACHINE_CLASS(oc); MachineClass *mc = MACHINE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); @@ -703,6 +700,8 @@ static void microvm_class_init(ObjectClass *oc, void *data) hc->unplug_request = microvm_device_unplug_request_cb; hc->unplug = microvm_device_unplug_cb; + x86mc->fwcfg_dma_enabled = true; + object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto", microvm_machine_get_pic, microvm_machine_set_pic, diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c index 9e7d69d4705..0a10089f14b 100644 --- a/hw/i386/multiboot.c +++ b/hw/i386/multiboot.c @@ -143,7 +143,8 @@ static void mb_add_mod(MultibootState *s, s->mb_mods_count++; } -int load_multiboot(FWCfgState *fw_cfg, +int load_multiboot(X86MachineState *x86ms, + FWCfgState *fw_cfg, FILE *f, const char *kernel_filename, const char *initrd_filename, @@ -151,6 +152,7 @@ int load_multiboot(FWCfgState *fw_cfg, int kernel_file_size, uint8_t *header) { + bool multiboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; int i, is_multiboot = 0; uint32_t flags = 0; uint32_t mh_entry_addr; @@ -401,7 +403,11 @@ int load_multiboot(FWCfgState *fw_cfg, fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, mb_bootinfo_data, sizeof(bootinfo)); - option_rom[nb_option_roms].name = "multiboot.bin"; + if (multiboot_dma_enabled) { + option_rom[nb_option_roms].name = "multiboot_dma.bin"; + } else { + option_rom[nb_option_roms].name = "multiboot.bin"; + } option_rom[nb_option_roms].bootindex = 0; nb_option_roms++; diff --git a/hw/i386/multiboot.h b/hw/i386/multiboot.h index 60de309cd13..2b9182a8ea8 100644 --- a/hw/i386/multiboot.h +++ b/hw/i386/multiboot.h @@ -2,8 +2,10 @@ #define QEMU_MULTIBOOT_H #include "hw/nvram/fw_cfg.h" +#include "hw/i386/x86.h" -int load_multiboot(FWCfgState *fw_cfg, +int load_multiboot(X86MachineState *x86ms, + FWCfgState *fw_cfg, FILE *f, const char *kernel_filename, const char *initrd_filename, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8a84b25a031..a2ef40ecbc2 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -65,8 +65,6 @@ #include "hw/xen/start_info.h" #include "ui/qemu-spice.h" #include "exec/memory.h" -#include "exec/address-spaces.h" -#include "sysemu/arch_init.h" #include "qemu/bitmap.h" #include "qemu/config-file.h" #include "qemu/error-report.h" @@ -74,7 +72,6 @@ #include "qemu/cutils.h" #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" -#include "hw/boards.h" #include "acpi-build.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" @@ -86,6 +83,7 @@ #include "hw/i386/intel_iommu.h" #include "hw/net/ne2000-isa.h" #include "standard-headers/asm-x86/bootparam.h" +#include "hw/virtio/virtio-iommu.h" #include "hw/virtio/virtio-pmem-pci.h" #include "hw/virtio/virtio-mem-pci.h" #include "hw/mem/memory-device.h" @@ -96,6 +94,24 @@ #include "trace.h" #include CONFIG_DEVICES +GlobalProperty pc_compat_6_1[] = { + { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, + { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, + { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "false" }, +}; +const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1); + +GlobalProperty pc_compat_6_0[] = { + { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, + { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, + { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, + { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, + { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "true" }, +}; +const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); + GlobalProperty pc_compat_5_2[] = { { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, }; @@ -304,7 +320,7 @@ const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0); GlobalProperty pc_compat_1_7[] = { PC_CPU_MODEL_IDS("1.7.0") { TYPE_USB_DEVICE, "msos-desc", "no" }, - { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, + { "PIIX4_PM", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, { "hpet", HPET_INTCAP, "4" }, }; const size_t pc_compat_1_7_len = G_N_ELEMENTS(pc_compat_1_7); @@ -701,79 +717,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) } } -/* - * This function is very similar to smp_parse() - * in hw/core/machine.c but includes CPU die support. - */ -void pc_smp_parse(MachineState *ms, QemuOpts *opts) -{ - X86MachineState *x86ms = X86_MACHINE(ms); - - if (opts) { - unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); - unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); - unsigned dies = qemu_opt_get_number(opts, "dies", 1); - unsigned cores = qemu_opt_get_number(opts, "cores", 0); - unsigned threads = qemu_opt_get_number(opts, "threads", 0); - - /* compute missing values, prefer sockets over cores over threads */ - if (cpus == 0 || sockets == 0) { - cores = cores > 0 ? cores : 1; - threads = threads > 0 ? threads : 1; - if (cpus == 0) { - sockets = sockets > 0 ? sockets : 1; - cpus = cores * threads * dies * sockets; - } else { - ms->smp.max_cpus = - qemu_opt_get_number(opts, "maxcpus", cpus); - sockets = ms->smp.max_cpus / (cores * threads * dies); - } - } else if (cores == 0) { - threads = threads > 0 ? threads : 1; - cores = cpus / (sockets * dies * threads); - cores = cores > 0 ? cores : 1; - } else if (threads == 0) { - threads = cpus / (cores * dies * sockets); - threads = threads > 0 ? threads : 1; - } else if (sockets * dies * cores * threads < cpus) { - error_report("cpu topology: " - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " - "smp_cpus (%u)", - sockets, dies, cores, threads, cpus); - exit(1); - } - - ms->smp.max_cpus = - qemu_opt_get_number(opts, "maxcpus", cpus); - - if (ms->smp.max_cpus < cpus) { - error_report("maxcpus must be equal to or greater than smp"); - exit(1); - } - - if (sockets * dies * cores * threads != ms->smp.max_cpus) { - error_report("Invalid CPU topology deprecated: " - "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " - "!= maxcpus (%u)", - sockets, dies, cores, threads, - ms->smp.max_cpus); - exit(1); - } - - ms->smp.cpus = cpus; - ms->smp.cores = cores; - ms->smp.threads = threads; - ms->smp.sockets = sockets; - x86ms->smp_dies = dies; - } - - if (ms->smp.cpus > 1) { - Error *blocker = NULL; - error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); - replay_add_blocker(blocker); - } -} - static void pc_machine_done(Notifier *notifier, void *data) { @@ -805,18 +748,9 @@ void pc_machine_done(Notifier *notifier, void *data) void pc_guest_info_init(PCMachineState *pcms) { - int i; - MachineState *ms = MACHINE(pcms); X86MachineState *x86ms = X86_MACHINE(pcms); x86ms->apic_xrupt_override = true; - pcms->numa_nodes = ms->numa_state->num_nodes; - pcms->node_mem = g_malloc0(pcms->numa_nodes * - sizeof *pcms->node_mem); - for (i = 0; i < ms->numa_state->num_nodes; i++) { - pcms->node_mem[i] = ms->numa_state->nodes[i].node_mem; - } - pcms->machine_done.notify = pc_machine_done; qemu_add_machine_init_done_notifier(&pcms->machine_done); } @@ -844,17 +778,24 @@ void xen_load_linux(PCMachineState *pcms) rom_set_fw(fw_cfg); x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, - pcmc->pvh_enabled, pcmc->linuxboot_dma_enabled); + pcmc->pvh_enabled); for (i = 0; i < nb_option_roms; i++) { assert(!strcmp(option_rom[i].name, "linuxboot.bin") || !strcmp(option_rom[i].name, "linuxboot_dma.bin") || !strcmp(option_rom[i].name, "pvh.bin") || - !strcmp(option_rom[i].name, "multiboot.bin")); + !strcmp(option_rom[i].name, "multiboot.bin") || + !strcmp(option_rom[i].name, "multiboot_dma.bin")); rom_add_option(option_rom[i].name, option_rom[i].bootindex); } x86ms->fw_cfg = fw_cfg; } +#define PC_ROM_MIN_VGA 0xc0000 +#define PC_ROM_MIN_OPTION 0xc8000 +#define PC_ROM_MAX 0xe0000 +#define PC_ROM_ALIGN 0x800 +#define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA) + void pc_memory_init(PCMachineState *pcms, MemoryRegion *system_memory, MemoryRegion *rom_memory, @@ -895,6 +836,10 @@ void pc_memory_init(PCMachineState *pcms, e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM); } + if (pcms->sgx_epc.size != 0) { + e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED); + } + if (!pcmc->has_reserved_memory && (machine->ram_slots || (machine->maxram_size > machine->ram_size))) { @@ -925,8 +870,15 @@ void pc_memory_init(PCMachineState *pcms, exit(EXIT_FAILURE); } + if (pcms->sgx_epc.size != 0) { + machine->device_memory->base = sgx_epc_above_4g_end(&pcms->sgx_epc); + } else { + machine->device_memory->base = + 0x100000000ULL + x86ms->above_4g_mem_size; + } + machine->device_memory->base = - ROUND_UP(0x100000000ULL + x86ms->above_4g_mem_size, 1 * GiB); + ROUND_UP(machine->device_memory->base, 1 * GiB); if (pcmc->enforce_aligned_dimm) { /* size device region assuming 1G page max alignment per slot */ @@ -979,7 +931,7 @@ void pc_memory_init(PCMachineState *pcms, if (linux_boot) { x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, - pcmc->pvh_enabled, pcmc->linuxboot_dma_enabled); + pcmc->pvh_enabled); } for (i = 0; i < nb_option_roms; i++) { @@ -1011,6 +963,8 @@ uint64_t pc_pci_hole64_start(void) if (!pcmc->broken_reserved_end) { hole64_start += memory_region_size(&ms->device_memory->mr); } + } else if (pcms->sgx_epc.size != 0) { + hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc); } else { hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size; } @@ -1380,6 +1334,27 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { pc_virtio_md_pci_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { + /* Declare the APIC range as the reserved MSI region */ + char *resv_prop_str = g_strdup_printf("0xfee00000:0xfeefffff:%d", + VIRTIO_IOMMU_RESV_MEM_T_MSI); + + object_property_set_uint(OBJECT(dev), "len-reserved-regions", 1, errp); + object_property_set_str(OBJECT(dev), "reserved-regions[0]", + resv_prop_str, errp); + g_free(resv_prop_str); + } + + if (object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + if (pcms->iommu) { + error_setg(errp, "QEMU does not support multiple vIOMMUs " + "for x86 yet."); + return; + } + pcms->iommu = dev; } } @@ -1434,7 +1409,9 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || object_dynamic_cast(OBJECT(dev), TYPE_CPU) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { return HOTPLUG_HANDLER(machine); } @@ -1529,6 +1506,21 @@ static void pc_machine_set_hpet(Object *obj, bool value, Error **errp) pcms->hpet_enabled = value; } +static bool pc_machine_get_default_bus_bypass_iommu(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->default_bus_bypass_iommu; +} + +static void pc_machine_set_default_bus_bypass_iommu(Object *obj, bool value, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->default_bus_bypass_iommu = value; +} + static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -1628,6 +1620,7 @@ static void pc_machine_initfn(Object *obj) #ifdef CONFIG_HPET pcms->hpet_enabled = true; #endif + pcms->default_bus_bypass_iommu = false; pc_system_flash_create(pcms); pcms->pcspk = isa_new(TYPE_PC_SPEAKER); @@ -1698,7 +1691,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported * to be used at the moment, 32K should be enough for a while. */ pcmc->acpi_data_size = 0x20000 + 0x8000; - pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -1711,7 +1703,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->auto_enable_numa_with_memdev = true; mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; - mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; mc->max_cpus = 255; mc->reset = pc_machine_reset; @@ -1722,6 +1713,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) hc->unplug = pc_machine_device_unplug_cb; mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE; mc->nvdimm_supported = true; + mc->smp_props.dies_supported = true; mc->default_ram_id = "pc.ram"; object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", @@ -1752,6 +1744,10 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "hpet", pc_machine_get_hpet, pc_machine_set_hpet); + object_class_property_add_bool(oc, "default-bus-bypass-iommu", + pc_machine_get_default_bus_bypass_iommu, + pc_machine_set_default_bus_bypass_iommu); + object_class_property_add(oc, PC_MACHINE_MAX_FW_SIZE, "size", pc_machine_get_max_fw_size, pc_machine_set_max_fw_size, NULL, NULL); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 46cc951073b..223dd3e05d1 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -42,15 +42,11 @@ #include "hw/irq.h" #include "sysemu/kvm.h" #include "hw/kvm/clock.h" -#include "sysemu/sysemu.h" #include "hw/sysbus.h" -#include "sysemu/arch_init.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/xen/xen-x86.h" #include "exec/memory.h" -#include "exec/address-spaces.h" #include "hw/acpi/acpi.h" -#include "cpu.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -64,6 +60,7 @@ #include "hw/hyperv/vmbus-bridge.h" #include "hw/mem/nvdimm.h" #include "hw/i386/acpi-build.h" +#include "kvm/kvm-cpu.h" #define MAX_IDE_BUS 2 @@ -156,6 +153,7 @@ static void pc_init1(MachineState *machine, } } + pc_machine_init_sgx_epc(pcms); x86_cpus_init(x86ms, pcmc->default_cpu_version); if (pcmc->kvmclock_enabled) { @@ -415,7 +413,7 @@ static void pc_i440fx_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); } -static void pc_i440fx_6_0_machine_options(MachineClass *m) +static void pc_i440fx_6_2_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_machine_options(m); @@ -424,6 +422,31 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) pcmc->default_cpu_version = 1; } +DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL, + pc_i440fx_6_2_machine_options); + +static void pc_i440fx_6_1_machine_options(MachineClass *m) +{ + pc_i440fx_6_2_machine_options(m); + m->alias = NULL; + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len); + compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len); + m->smp_props.prefer_sockets = true; +} + +DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL, + pc_i440fx_6_1_machine_options); + +static void pc_i440fx_6_0_machine_options(MachineClass *m) +{ + pc_i440fx_6_1_machine_options(m); + m->alias = NULL; + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len); + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); +} + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, pc_i440fx_6_0_machine_options); @@ -597,11 +620,12 @@ DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL, static void pc_i440fx_2_6_machine_options(MachineClass *m) { + X86MachineClass *x86mc = X86_MACHINE_CLASS(m); PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_2_7_machine_options(m); pcmc->legacy_cpu_hotplug = true; - pcmc->linuxboot_dma_enabled = false; + x86mc->fwcfg_dma_enabled = false; compat_props_add(m->compat_props, hw_compat_2_6, hw_compat_2_6_len); compat_props_add(m->compat_props, pc_compat_2_6, pc_compat_2_6_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 53450190f54..e1e100316d9 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -31,14 +31,13 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "hw/loader.h" -#include "sysemu/arch_init.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/rtc/mc146818rtc.h" #include "sysemu/kvm.h" #include "hw/kvm/clock.h" #include "hw/pci-host/q35.h" +#include "hw/pci/pcie_port.h" #include "hw/qdev-properties.h" -#include "exec/address-spaces.h" #include "hw/i386/x86.h" #include "hw/i386/pc.h" #include "hw/i386/ich9.h" @@ -137,6 +136,8 @@ static void pc_q35_init(MachineState *machine) ram_addr_t lowmem; DriveInfo *hd[MAX_SATA_PORTS]; MachineClass *mc = MACHINE_GET_CLASS(machine); + bool acpi_pcihp; + bool keep_pci_slot_hpc; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping @@ -177,6 +178,7 @@ static void pc_q35_init(MachineState *machine) x86ms->below_4g_mem_size = machine->ram_size; } + pc_machine_init_sgx_epc(pcms); x86_cpus_init(x86ms, pcmc->default_cpu_version); kvmclock_create(pcmc->kvmclock_create_always); @@ -237,6 +239,19 @@ static void pc_q35_init(MachineState *machine) object_property_set_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, OBJECT(lpc), &error_abort); + acpi_pcihp = object_property_get_bool(OBJECT(lpc), + ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, + NULL); + + keep_pci_slot_hpc = object_property_get_bool(OBJECT(lpc), + "x-keep-pci-slot-hpc", + NULL); + + if (!keep_pci_slot_hpc && acpi_pcihp) { + object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug", + "false", true); + } + /* irq lines */ gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled); @@ -345,7 +360,7 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_6_0_machine_options(MachineClass *m) +static void pc_q35_6_2_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_options(m); @@ -353,6 +368,29 @@ static void pc_q35_6_0_machine_options(MachineClass *m) pcmc->default_cpu_version = 1; } +DEFINE_Q35_MACHINE(v6_2, "pc-q35-6.2", NULL, + pc_q35_6_2_machine_options); + +static void pc_q35_6_1_machine_options(MachineClass *m) +{ + pc_q35_6_2_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len); + compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len); + m->smp_props.prefer_sockets = true; +} + +DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL, + pc_q35_6_1_machine_options); + +static void pc_q35_6_0_machine_options(MachineClass *m) +{ + pc_q35_6_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len); + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); +} + DEFINE_Q35_MACHINE(v6_0, "pc-q35-6.0", NULL, pc_q35_6_0_machine_options); @@ -540,11 +578,12 @@ DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL, static void pc_q35_2_6_machine_options(MachineClass *m) { + X86MachineClass *x86mc = X86_MACHINE_CLASS(m); PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_2_7_machine_options(m); pcmc->legacy_cpu_hotplug = true; - pcmc->linuxboot_dma_enabled = false; + x86mc->fwcfg_dma_enabled = false; compat_props_add(m->compat_props, hw_compat_2_6, hw_compat_2_6_len); compat_props_add(m->compat_props, pc_compat_2_6, pc_compat_2_6_len); } diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 9fe72b370e8..c8b17af9535 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -35,10 +35,9 @@ #include "hw/i386/pc.h" #include "hw/loader.h" #include "hw/qdev-properties.h" -#include "sysemu/sysemu.h" #include "hw/block/flash.h" #include "sysemu/kvm.h" -#include "sysemu/sev.h" +#include "sev.h" #define FLASH_SECTOR_SIZE 4096 @@ -125,113 +124,6 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) } } -#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-a366c55a082d" - -static uint8_t *ovmf_table; -static int ovmf_table_len; - -static void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size) -{ - uint8_t *ptr; - QemuUUID guid; - int tot_len; - - /* should only be called once */ - if (ovmf_table) { - return; - } - - if (flash_size < TARGET_PAGE_SIZE) { - return; - } - - /* - * if this is OVMF there will be a table footer - * guid 48 bytes before the end of the flash file. If it's - * not found, silently abort the flash parsing. - */ - qemu_uuid_parse(OVMF_TABLE_FOOTER_GUID, &guid); - guid = qemu_uuid_bswap(guid); /* guids are LE */ - ptr = flash_ptr + flash_size - 48; - if (!qemu_uuid_is_equal((QemuUUID *)ptr, &guid)) { - return; - } - - /* if found, just before is two byte table length */ - ptr -= sizeof(uint16_t); - tot_len = le16_to_cpu(*(uint16_t *)ptr) - sizeof(guid) - sizeof(uint16_t); - - if (tot_len <= 0) { - return; - } - - ovmf_table = g_malloc(tot_len); - ovmf_table_len = tot_len; - - /* - * ptr is the foot of the table, so copy it all to the newly - * allocated ovmf_table and then set the ovmf_table pointer - * to the table foot - */ - memcpy(ovmf_table, ptr - tot_len, tot_len); - ovmf_table += tot_len; -} - -bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, - int *data_len) -{ - uint8_t *ptr = ovmf_table; - int tot_len = ovmf_table_len; - QemuUUID entry_guid; - - if (qemu_uuid_parse(entry, &entry_guid) < 0) { - return false; - } - - if (!ptr) { - return false; - } - - entry_guid = qemu_uuid_bswap(entry_guid); /* guids are LE */ - while (tot_len >= sizeof(QemuUUID) + sizeof(uint16_t)) { - int len; - QemuUUID *guid; - - /* - * The data structure is - * arbitrary length data - * 2 byte length of entire entry - * 16 byte guid - */ - guid = (QemuUUID *)(ptr - sizeof(QemuUUID)); - len = le16_to_cpu(*(uint16_t *)(ptr - sizeof(QemuUUID) - - sizeof(uint16_t))); - - /* - * just in case the table is corrupt, wouldn't want to spin in - * the zero case - */ - if (len < sizeof(QemuUUID) + sizeof(uint16_t)) { - return false; - } else if (len > tot_len) { - return false; - } - - ptr -= len; - tot_len -= len; - if (qemu_uuid_is_equal(guid, &entry_guid)) { - if (data) { - *data = ptr; - } - if (data_len) { - *data_len = len - sizeof(QemuUUID) - sizeof(uint16_t); - } - return true; - } - } - return false; -} - /* * Map the pcms->flash[] from 4GiB downward, and realize. * Map them in descending order, i.e. pcms->flash[0] at the top, diff --git a/hw/i386/pc_sysfw_ovmf-stubs.c b/hw/i386/pc_sysfw_ovmf-stubs.c new file mode 100644 index 00000000000..aabe78b2710 --- /dev/null +++ b/hw/i386/pc_sysfw_ovmf-stubs.c @@ -0,0 +1,26 @@ +/* + * QEMU PC System Firmware (OVMF stubs) + * + * Copyright (c) 2021 Red Hat, Inc. + * + * Author: + * Philippe Mathieu-Daudé + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/i386/pc.h" + +bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, int *data_len) +{ + g_assert_not_reached(); +} + +void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size) +{ + g_assert_not_reached(); +} diff --git a/hw/i386/pc_sysfw_ovmf.c b/hw/i386/pc_sysfw_ovmf.c new file mode 100644 index 00000000000..f4dd92c5882 --- /dev/null +++ b/hw/i386/pc_sysfw_ovmf.c @@ -0,0 +1,151 @@ +/* + * QEMU PC System Firmware (OVMF specific) + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2011-2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/i386/pc.h" +#include "cpu.h" + +#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-a366c55a082d" + +static bool ovmf_flash_parsed; +static uint8_t *ovmf_table; +static int ovmf_table_len; + +void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size) +{ + uint8_t *ptr; + QemuUUID guid; + int tot_len; + + /* should only be called once */ + if (ovmf_flash_parsed) { + return; + } + + ovmf_flash_parsed = true; + + if (flash_size < TARGET_PAGE_SIZE) { + return; + } + + /* + * if this is OVMF there will be a table footer + * guid 48 bytes before the end of the flash file. If it's + * not found, silently abort the flash parsing. + */ + qemu_uuid_parse(OVMF_TABLE_FOOTER_GUID, &guid); + guid = qemu_uuid_bswap(guid); /* guids are LE */ + ptr = flash_ptr + flash_size - 48; + if (!qemu_uuid_is_equal((QemuUUID *)ptr, &guid)) { + return; + } + + /* if found, just before is two byte table length */ + ptr -= sizeof(uint16_t); + tot_len = le16_to_cpu(*(uint16_t *)ptr) - sizeof(guid) - sizeof(uint16_t); + + if (tot_len <= 0) { + return; + } + + ovmf_table = g_malloc(tot_len); + ovmf_table_len = tot_len; + + /* + * ptr is the foot of the table, so copy it all to the newly + * allocated ovmf_table and then set the ovmf_table pointer + * to the table foot + */ + memcpy(ovmf_table, ptr - tot_len, tot_len); + ovmf_table += tot_len; +} + +/** + * pc_system_ovmf_table_find - Find the data associated with an entry in OVMF's + * reset vector GUIDed table. + * + * @entry: GUID string of the entry to lookup + * @data: Filled with a pointer to the entry's value (if not NULL) + * @data_len: Filled with the length of the entry's value (if not NULL). Pass + * NULL here if the length of data is known. + * + * Return: true if the entry was found in the OVMF table; false otherwise. + */ +bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, + int *data_len) +{ + uint8_t *ptr = ovmf_table; + int tot_len = ovmf_table_len; + QemuUUID entry_guid; + + assert(ovmf_flash_parsed); + + if (qemu_uuid_parse(entry, &entry_guid) < 0) { + return false; + } + + if (!ptr) { + return false; + } + + entry_guid = qemu_uuid_bswap(entry_guid); /* guids are LE */ + while (tot_len >= sizeof(QemuUUID) + sizeof(uint16_t)) { + int len; + QemuUUID *guid; + + /* + * The data structure is + * arbitrary length data + * 2 byte length of entire entry + * 16 byte guid + */ + guid = (QemuUUID *)(ptr - sizeof(QemuUUID)); + len = le16_to_cpu(*(uint16_t *)(ptr - sizeof(QemuUUID) - + sizeof(uint16_t))); + + /* + * just in case the table is corrupt, wouldn't want to spin in + * the zero case + */ + if (len < sizeof(QemuUUID) + sizeof(uint16_t)) { + return false; + } else if (len > tot_len) { + return false; + } + + ptr -= len; + tot_len -= len; + if (qemu_uuid_is_equal(guid, &entry_guid)) { + if (data) { + *data = ptr; + } + if (data_len) { + *data_len = len - sizeof(QemuUUID) - sizeof(uint16_t); + } + return true; + } + } + return false; +} diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c new file mode 100644 index 00000000000..e508827e787 --- /dev/null +++ b/hw/i386/sgx-epc.c @@ -0,0 +1,185 @@ +/* + * SGX EPC device + * + * Copyright (C) 2019 Intel Corporation + * + * Authors: + * Sean Christopherson + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "hw/i386/pc.h" +#include "hw/i386/sgx-epc.h" +#include "hw/mem/memory-device.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "target/i386/cpu.h" +#include "exec/address-spaces.h" + +static Property sgx_epc_properties[] = { + DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), + DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, + TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sgx_epc_get_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + Error *local_err = NULL; + uint64_t value; + + value = memory_device_get_region_size(MEMORY_DEVICE(obj), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + visit_type_uint64(v, name, &value, errp); +} + +static void sgx_epc_init(Object *obj) +{ + object_property_add(obj, SGX_EPC_SIZE_PROP, "uint64", sgx_epc_get_size, + NULL, NULL, NULL); +} + +static void sgx_epc_realize(DeviceState *dev, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(pcms); + MemoryDeviceState *md = MEMORY_DEVICE(dev); + SGXEPCState *sgx_epc = &pcms->sgx_epc; + SGXEPCDevice *epc = SGX_EPC(dev); + HostMemoryBackend *hostmem; + const char *path; + + if (x86ms->boot_cpus != 0) { + error_setg(errp, "'" TYPE_SGX_EPC "' can't be created after vCPUs," + "e.g. via -device"); + return; + } + + if (!epc->hostmem) { + error_setg(errp, "'" SGX_EPC_MEMDEV_PROP "' property is not set"); + return; + } + hostmem = MEMORY_BACKEND(epc->hostmem); + if (host_memory_backend_is_mapped(hostmem)) { + path = object_get_canonical_path_component(OBJECT(hostmem)); + error_setg(errp, "can't use already busy memdev: %s", path); + return; + } + + epc->addr = sgx_epc->base + sgx_epc->size; + + memory_region_add_subregion(&sgx_epc->mr, epc->addr - sgx_epc->base, + host_memory_backend_get_memory(hostmem)); + + host_memory_backend_set_mapped(hostmem, true); + + sgx_epc->sections = g_renew(SGXEPCDevice *, sgx_epc->sections, + sgx_epc->nr_sections + 1); + sgx_epc->sections[sgx_epc->nr_sections++] = epc; + + sgx_epc->size += memory_device_get_region_size(md, errp); +} + +static void sgx_epc_unrealize(DeviceState *dev) +{ + SGXEPCDevice *epc = SGX_EPC(dev); + HostMemoryBackend *hostmem = MEMORY_BACKEND(epc->hostmem); + + host_memory_backend_set_mapped(hostmem, false); +} + +static uint64_t sgx_epc_md_get_addr(const MemoryDeviceState *md) +{ + const SGXEPCDevice *epc = SGX_EPC(md); + + return epc->addr; +} + +static void sgx_epc_md_set_addr(MemoryDeviceState *md, uint64_t addr, + Error **errp) +{ + object_property_set_uint(OBJECT(md), SGX_EPC_ADDR_PROP, addr, errp); +} + +static uint64_t sgx_epc_md_get_plugged_size(const MemoryDeviceState *md, + Error **errp) +{ + return 0; +} + +static MemoryRegion *sgx_epc_md_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + SGXEPCDevice *epc = SGX_EPC(md); + HostMemoryBackend *hostmem; + + if (!epc->hostmem) { + error_setg(errp, "'" SGX_EPC_MEMDEV_PROP "' property must be set"); + return NULL; + } + + hostmem = MEMORY_BACKEND(epc->hostmem); + return host_memory_backend_get_memory(hostmem); +} + +static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) +{ + SgxEPCDeviceInfo *se = g_new0(SgxEPCDeviceInfo, 1); + SGXEPCDevice *epc = SGX_EPC(md); + + se->memaddr = epc->addr; + se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, + NULL); + se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); + + info->u.sgx_epc.data = se; + info->type = MEMORY_DEVICE_INFO_KIND_SGX_EPC; +} + +static void sgx_epc_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc); + + dc->hotpluggable = false; + dc->realize = sgx_epc_realize; + dc->unrealize = sgx_epc_unrealize; + dc->desc = "SGX EPC section"; + dc->user_creatable = false; + device_class_set_props(dc, sgx_epc_properties); + + mdc->get_addr = sgx_epc_md_get_addr; + mdc->set_addr = sgx_epc_md_set_addr; + mdc->get_plugged_size = sgx_epc_md_get_plugged_size; + mdc->get_memory_region = sgx_epc_md_get_memory_region; + mdc->fill_device_info = sgx_epc_md_fill_device_info; +} + +static TypeInfo sgx_epc_info = { + .name = TYPE_SGX_EPC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SGXEPCDevice), + .instance_init = sgx_epc_init, + .class_init = sgx_epc_class_init, + .class_size = sizeof(DeviceClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void sgx_epc_register_types(void) +{ + type_register_static(&sgx_epc_info); +} + +type_init(sgx_epc_register_types) diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c new file mode 100644 index 00000000000..c9b379e6651 --- /dev/null +++ b/hw/i386/sgx-stub.c @@ -0,0 +1,34 @@ +#include "qemu/osdep.h" +#include "monitor/monitor.h" +#include "monitor/hmp-target.h" +#include "hw/i386/pc.h" +#include "hw/i386/sgx-epc.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-target.h" + +SGXInfo *qmp_query_sgx(Error **errp) +{ + error_setg(errp, "SGX support is not compiled in"); + return NULL; +} + +SGXInfo *qmp_query_sgx_capabilities(Error **errp) +{ + error_setg(errp, "SGX support is not compiled in"); + return NULL; +} + +void hmp_info_sgx(Monitor *mon, const QDict *qdict) +{ + monitor_printf(mon, "SGX is not available in this QEMU\n"); +} + +void pc_machine_init_sgx_epc(PCMachineState *pcms) +{ + memset(&pcms->sgx_epc, 0, sizeof(SGXEPCState)); +} + +bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) +{ + g_assert_not_reached(); +} diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c new file mode 100644 index 00000000000..8fef3dd8fad --- /dev/null +++ b/hw/i386/sgx.c @@ -0,0 +1,243 @@ +/* + * SGX common code + * + * Copyright (C) 2021 Intel Corporation + * + * Authors: + * Yang Zhong + * Sean Christopherson + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "hw/i386/pc.h" +#include "hw/i386/sgx-epc.h" +#include "hw/mem/memory-device.h" +#include "monitor/qdev.h" +#include "monitor/monitor.h" +#include "monitor/hmp-target.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-target.h" +#include "exec/address-spaces.h" +#include "sysemu/hw_accel.h" +#include "sysemu/reset.h" +#include + +#define SGX_MAX_EPC_SECTIONS 8 +#define SGX_CPUID_EPC_INVALID 0x0 + +/* A valid EPC section. */ +#define SGX_CPUID_EPC_SECTION 0x1 +#define SGX_CPUID_EPC_MASK 0xF + +#define SGX_MAGIC 0xA4 +#define SGX_IOC_VEPC_REMOVE_ALL _IO(SGX_MAGIC, 0x04) + +#define RETRY_NUM 2 + +static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) +{ + return (low & MAKE_64BIT_MASK(12, 20)) + + ((high & MAKE_64BIT_MASK(0, 20)) << 32); +} + +static uint64_t sgx_calc_host_epc_section_size(void) +{ + uint32_t i, type; + uint32_t eax, ebx, ecx, edx; + uint64_t size = 0; + + for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { + host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); + + type = eax & SGX_CPUID_EPC_MASK; + if (type == SGX_CPUID_EPC_INVALID) { + break; + } + + if (type != SGX_CPUID_EPC_SECTION) { + break; + } + + size += sgx_calc_section_metric(ecx, edx); + } + + return size; +} + +static void sgx_epc_reset(void *opaque) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + HostMemoryBackend *hostmem; + SGXEPCDevice *epc; + int failures; + int fd, i, j, r; + static bool warned = false; + + /* + * The second pass is needed to remove SECS pages that could not + * be removed during the first. + */ + for (i = 0; i < RETRY_NUM; i++) { + failures = 0; + for (j = 0; j < pcms->sgx_epc.nr_sections; j++) { + epc = pcms->sgx_epc.sections[j]; + hostmem = MEMORY_BACKEND(epc->hostmem); + fd = memory_region_get_fd(host_memory_backend_get_memory(hostmem)); + + r = ioctl(fd, SGX_IOC_VEPC_REMOVE_ALL); + if (r == -ENOTTY && !warned) { + warned = true; + warn_report("kernel does not support SGX_IOC_VEPC_REMOVE_ALL"); + warn_report("SGX might operate incorrectly in the guest after reset"); + break; + } else if (r > 0) { + /* SECS pages remain */ + failures++; + if (i == 1) { + error_report("cannot reset vEPC section %d", j); + } + } + } + if (!failures) { + break; + } + } +} + +SGXInfo *qmp_query_sgx_capabilities(Error **errp) +{ + SGXInfo *info = NULL; + uint32_t eax, ebx, ecx, edx; + + int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd < 0) { + error_setg(errp, "SGX is not enabled in KVM"); + return NULL; + } + + info = g_new0(SGXInfo, 1); + host_cpuid(0x7, 0, &eax, &ebx, &ecx, &edx); + + info->sgx = ebx & (1U << 2) ? true : false; + info->flc = ecx & (1U << 30) ? true : false; + + host_cpuid(0x12, 0, &eax, &ebx, &ecx, &edx); + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + + info->section_size = sgx_calc_host_epc_section_size(); + + close(fd); + + return info; +} + +SGXInfo *qmp_query_sgx(Error **errp) +{ + SGXInfo *info = NULL; + X86MachineState *x86ms; + PCMachineState *pcms = + (PCMachineState *)object_dynamic_cast(qdev_get_machine(), + TYPE_PC_MACHINE); + if (!pcms) { + error_setg(errp, "SGX is only supported on PC machines"); + return NULL; + } + + x86ms = X86_MACHINE(pcms); + if (!x86ms->sgx_epc_list) { + error_setg(errp, "No EPC regions defined, SGX not available"); + return NULL; + } + + SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; + info->section_size = sgx_epc->size; + + return info; +} + +void hmp_info_sgx(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + g_autoptr(SGXInfo) info = qmp_query_sgx(&err); + + if (err) { + error_report_err(err); + return; + } + monitor_printf(mon, "SGX support: %s\n", + info->sgx ? "enabled" : "disabled"); + monitor_printf(mon, "SGX1 support: %s\n", + info->sgx1 ? "enabled" : "disabled"); + monitor_printf(mon, "SGX2 support: %s\n", + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); + monitor_printf(mon, "size: %" PRIu64 "\n", + info->section_size); +} + +bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + SGXEPCDevice *epc; + + if (pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) { + return true; + } + + epc = pcms->sgx_epc.sections[section_nr]; + + *addr = epc->addr; + *size = memory_device_get_region_size(MEMORY_DEVICE(epc), &error_fatal); + + return false; +} + +void pc_machine_init_sgx_epc(PCMachineState *pcms) +{ + SGXEPCState *sgx_epc = &pcms->sgx_epc; + X86MachineState *x86ms = X86_MACHINE(pcms); + SgxEPCList *list = NULL; + Object *obj; + + memset(sgx_epc, 0, sizeof(SGXEPCState)); + if (!x86ms->sgx_epc_list) { + return; + } + + sgx_epc->base = 0x100000000ULL + x86ms->above_4g_mem_size; + + memory_region_init(&sgx_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX); + memory_region_add_subregion(get_system_memory(), sgx_epc->base, + &sgx_epc->mr); + + for (list = x86ms->sgx_epc_list; list; list = list->next) { + obj = object_new("sgx-epc"); + + /* set the memdev link with memory backend */ + object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, + &error_fatal); + object_property_set_bool(obj, "realized", true, &error_fatal); + object_unref(obj); + } + + if ((sgx_epc->base + sgx_epc->size) < sgx_epc->base) { + error_report("Size of all 'sgx-epc' =0x%"PRIu64" causes EPC to wrap", + sgx_epc->size); + exit(EXIT_FAILURE); + } + + memory_region_set_size(&sgx_epc->mr, sgx_epc->size); + + /* register the reset callback for sgx epc */ + qemu_register_reset(sgx_epc_reset, NULL); +} diff --git a/hw/i386/trace-events b/hw/i386/trace-events index e48bef2b0d6..5bf7e52bf52 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # x86-iommu.c x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32 diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c index df4798f5028..3d663682863 100644 --- a/hw/i386/vmmouse.c +++ b/hw/i386/vmmouse.c @@ -158,6 +158,7 @@ static void vmmouse_read_id(VMMouseState *s) s->queue[s->nb_queue++] = VMMOUSE_VERSION; s->status = 0; + vmmouse_update_handler(s, s->absolute); } static void vmmouse_request_relative(VMMouseState *s) diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c index 490a57f52c8..7cc75dbc6da 100644 --- a/hw/i386/vmport.c +++ b/hw/i386/vmport.c @@ -37,7 +37,6 @@ #include "sysemu/hw_accel.h" #include "sysemu/qtest.h" #include "qemu/log.h" -#include "cpu.h" #include "trace.h" #include "qom/object.h" diff --git a/hw/i386/x86-iommu-stub.c b/hw/i386/x86-iommu-stub.c index c5ba077f9d1..781b5ff9222 100644 --- a/hw/i386/x86-iommu-stub.c +++ b/hw/i386/x86-iommu-stub.c @@ -36,8 +36,3 @@ bool x86_iommu_ir_supported(X86IOMMUState *s) { return false; } - -IommuType x86_iommu_get_type(void) -{ - abort(); -} diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 5f4301639c5..01d11325a69 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "hw/sysbus.h" -#include "hw/boards.h" #include "hw/i386/x86-iommu.h" #include "hw/qdev-properties.h" #include "hw/i386/pc.h" @@ -78,30 +77,17 @@ void x86_iommu_irq_to_msi_message(X86IOMMUIrq *irq, MSIMessage *msg_out) msg_out->data = msg.msi_data; } -/* Default X86 IOMMU device */ -static X86IOMMUState *x86_iommu_default = NULL; - -static void x86_iommu_set_default(X86IOMMUState *x86_iommu) -{ - assert(x86_iommu); - - if (x86_iommu_default) { - error_report("QEMU does not support multiple vIOMMUs " - "for x86 yet."); - exit(1); - } - - x86_iommu_default = x86_iommu; -} - X86IOMMUState *x86_iommu_get_default(void) { - return x86_iommu_default; -} + MachineState *ms = MACHINE(qdev_get_machine()); + PCMachineState *pcms = + PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); -IommuType x86_iommu_get_type(void) -{ - return x86_iommu_default->type; + if (pcms && + object_dynamic_cast(OBJECT(pcms->iommu), TYPE_X86_IOMMU_DEVICE)) { + return X86_IOMMU_DEVICE(pcms->iommu); + } + return NULL; } static void x86_iommu_realize(DeviceState *dev, Error **errp) @@ -137,8 +123,6 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) if (x86_class->realize) { x86_class->realize(dev, errp); } - - x86_iommu_set_default(X86_IOMMU_DEVICE(dev)); } static Property x86_iommu_properties[] = { diff --git a/hw/i386/x86.c b/hw/i386/x86.c index ed796fe6bad..b84840a1bb9 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -30,6 +30,8 @@ #include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qapi/qapi-visit-common.h" +#include "qapi/clone-visitor.h" +#include "qapi/qapi-visit-machine.h" #include "qapi/visitor.h" #include "sysemu/qtest.h" #include "sysemu/whpx.h" @@ -45,6 +47,7 @@ #include "hw/i386/fw_cfg.h" #include "hw/intc/i8259.h" #include "hw/rtc/mc146818rtc.h" +#include "target/i386/sev.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/irq.h" @@ -64,7 +67,7 @@ inline void init_topo_info(X86CPUTopoInfo *topo_info, { MachineState *ms = MACHINE(x86ms); - topo_info->dies_per_pkg = x86ms->smp_dies; + topo_info->dies_per_pkg = ms->smp.dies; topo_info->cores_per_die = ms->smp.cores; topo_info->threads_per_core = ms->smp.threads; } @@ -293,7 +296,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, init_topo_info(&topo_info, x86ms); - env->nr_dies = x86ms->smp_dies; + env->nr_dies = ms->smp.dies; /* * If APIC ID is not set, @@ -301,13 +304,13 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, */ if (cpu->apic_id == UNASSIGNED_APIC_ID) { int max_socket = (ms->smp.max_cpus - 1) / - smp_threads / smp_cores / x86ms->smp_dies; + smp_threads / smp_cores / ms->smp.dies; /* * die-id was optional in QEMU 4.0 and older, so keep it optional * if there's only one die per socket. */ - if (cpu->die_id < 0 && x86ms->smp_dies == 1) { + if (cpu->die_id < 0 && ms->smp.dies == 1) { cpu->die_id = 0; } @@ -322,9 +325,9 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, if (cpu->die_id < 0) { error_setg(errp, "CPU die-id is not set"); return; - } else if (cpu->die_id > x86ms->smp_dies - 1) { + } else if (cpu->die_id > ms->smp.dies - 1) { error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", - cpu->die_id, x86ms->smp_dies - 1); + cpu->die_id, ms->smp.dies - 1); return; } if (cpu->core_id < 0) { @@ -477,7 +480,7 @@ const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) &topo_info, &topo_ids); ms->possible_cpus->cpus[i].props.has_socket_id = true; ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id; - if (x86ms->smp_dies > 1) { + if (ms->smp.dies > 1) { ms->possible_cpus->cpus[i].props.has_die_id = true; ms->possible_cpus->cpus[i].props.die_id = topo_ids.die_id; } @@ -761,9 +764,9 @@ static bool load_elfboot(const char *kernel_filename, void x86_load_linux(X86MachineState *x86ms, FWCfgState *fw_cfg, int acpi_data_size, - bool pvh_enabled, - bool linuxboot_dma_enabled) + bool pvh_enabled) { + bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; uint16_t protocol; int setup_size, kernel_size, cmdline_size; int dtb_size, setup_data_offset; @@ -778,6 +781,7 @@ void x86_load_linux(X86MachineState *x86ms, const char *initrd_filename = machine->initrd_filename; const char *dtb_filename = machine->dtb; const char *kernel_cmdline = machine->kernel_cmdline; + SevKernelLoaderContext sev_load_ctx = {}; /* Align to 16 bytes as a paranoia measure */ cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; @@ -810,7 +814,7 @@ void x86_load_linux(X86MachineState *x86ms, * PVH), so we try multiboot first since we check the multiboot magic * header before to load it. */ - if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, + if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, kernel_cmdline, kernel_size, header)) { return; } @@ -924,6 +928,8 @@ void x86_load_linux(X86MachineState *x86ms, fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + sev_load_ctx.cmdline_data = (char *)kernel_cmdline; + sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; if (protocol >= 0x202) { stl_p(header + 0x228, cmdline_addr); @@ -1005,6 +1011,8 @@ void x86_load_linux(X86MachineState *x86ms, fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); + sev_load_ctx.initrd_data = initrd_data; + sev_load_ctx.initrd_size = initrd_size; stl_p(header + 0x218, initrd_addr); stl_p(header + 0x21c, initrd_size); @@ -1063,15 +1071,32 @@ void x86_load_linux(X86MachineState *x86ms, load_image_size(dtb_filename, setup_data->data, dtb_size); } - memcpy(setup, header, MIN(sizeof(header), setup_size)); + /* + * If we're starting an encrypted VM, it will be OVMF based, which uses the + * efi stub for booting and doesn't require any values to be placed in the + * kernel header. We therefore don't update the header so the hash of the + * kernel on the other side of the fw_cfg interface matches the hash of the + * file the user passed in. + */ + if (!sev_enabled()) { + memcpy(setup, header, MIN(sizeof(header), setup_size)); + } fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + sev_load_ctx.kernel_data = (char *)kernel; + sev_load_ctx.kernel_size = kernel_size; fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); + sev_load_ctx.setup_data = (char *)setup; + sev_load_ctx.setup_size = setup_size; + + if (sev_enabled()) { + sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); + } option_rom[nb_option_roms].bootindex = 0; option_rom[nb_option_roms].name = "linuxboot.bin"; @@ -1246,16 +1271,54 @@ static void x86_machine_set_oem_table_id(Object *obj, const char *value, strncpy(x86ms->oem_table_id, value, 8); } +static void x86_machine_get_bus_lock_ratelimit(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + uint64_t bus_lock_ratelimit = x86ms->bus_lock_ratelimit; + + visit_type_uint64(v, name, &bus_lock_ratelimit, errp); +} + +static void x86_machine_set_bus_lock_ratelimit(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + + visit_type_uint64(v, name, &x86ms->bus_lock_ratelimit, errp); +} + +static void machine_get_sgx_epc(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + SgxEPCList *list = x86ms->sgx_epc_list; + + visit_type_SgxEPCList(v, name, &list, errp); +} + +static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + SgxEPCList *list; + + list = x86ms->sgx_epc_list; + visit_type_SgxEPCList(v, name, &x86ms->sgx_epc_list, errp); + + qapi_free_SgxEPCList(list); +} + static void x86_machine_initfn(Object *obj) { X86MachineState *x86ms = X86_MACHINE(obj); x86ms->smm = ON_OFF_AUTO_AUTO; x86ms->acpi = ON_OFF_AUTO_AUTO; - x86ms->smp_dies = 1; x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS; x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); + x86ms->bus_lock_ratelimit = 0; } static void x86_machine_class_init(ObjectClass *oc, void *data) @@ -1269,6 +1332,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; x86mc->compat_apic_id_mode = false; x86mc->save_tsc_khz = true; + x86mc->fwcfg_dma_enabled = true; nc->nmi_monitor_handler = x86_nmi; object_class_property_add(oc, X86_MACHINE_SMM, "OnOffAuto", @@ -1299,6 +1363,18 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) "Override the default value of field OEM Table ID " "in ACPI table header." "The string may be up to 8 bytes in size"); + + object_class_property_add(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, "uint64_t", + x86_machine_get_bus_lock_ratelimit, + x86_machine_set_bus_lock_ratelimit, NULL, NULL); + object_class_property_set_description(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, + "Set the ratelimit for the bus locks acquired in VMs"); + + object_class_property_add(oc, "sgx-epc", "SgxEPC", + machine_get_sgx_epc, machine_set_sgx_epc, + NULL, NULL); + object_class_property_set_description(oc, "sgx-epc", + "SGX EPC device"); } static const TypeInfo x86_machine_info = { diff --git a/hw/i386/xen/trace-events b/hw/i386/xen/trace-events index ca3a4948baa..5d6be610908 100644 --- a/hw/i386/xen/trace-events +++ b/hw/i386/xen/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # xen_platform.c xen_platform_log(char *s) "xen platform: %s" diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 7ce672e5a5c..482be95415e 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -33,7 +33,6 @@ #include "sysemu/xen.h" #include "sysemu/xen-mapcache.h" #include "trace.h" -#include "exec/address-spaces.h" #include #include @@ -109,6 +108,7 @@ typedef struct XenIOState { shared_iopage_t *shared_page; shared_vmport_iopage_t *shared_vmport_page; buffered_iopage_t *buffered_io_page; + xenforeignmemory_resource_handle *fres; QEMUTimer *buffered_io_timer; CPUState **cpu_by_vcpu_id; /* the evtchn port for polling the notification, */ @@ -721,6 +721,7 @@ static void xen_log_global_stop(MemoryListener *listener) } static MemoryListener xen_memory_listener = { + .name = "xen-memory", .region_add = xen_region_add, .region_del = xen_region_del, .log_start = xen_log_start, @@ -732,6 +733,7 @@ static MemoryListener xen_memory_listener = { }; static MemoryListener xen_io_listener = { + .name = "xen-io", .region_add = xen_io_add, .region_del = xen_io_del, .priority = 10, @@ -1254,6 +1256,9 @@ static void xen_exit_notifier(Notifier *n, void *data) XenIOState *state = container_of(n, XenIOState, exit); xen_destroy_ioreq_server(xen_domid, state->ioservid); + if (state->fres != NULL) { + xenforeignmemory_unmap_resource(xen_fmem, state->fres); + } xenevtchn_close(state->xce_handle); xs_daemon_close(state->xenstore); @@ -1321,7 +1326,6 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data) static int xen_map_ioreq_server(XenIOState *state) { void *addr = NULL; - xenforeignmemory_resource_handle *fres; xen_pfn_t ioreq_pfn; xen_pfn_t bufioreq_pfn; evtchn_port_t bufioreq_evtchn; @@ -1333,12 +1337,12 @@ static int xen_map_ioreq_server(XenIOState *state) */ QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0); QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1); - fres = xenforeignmemory_map_resource(xen_fmem, xen_domid, + state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid, XENMEM_resource_ioreq_server, state->ioservid, 0, 2, &addr, PROT_READ | PROT_WRITE, 0); - if (fres != NULL) { + if (state->fres != NULL) { trace_xen_map_resource_ioreq(state->ioservid, addr); state->buffered_io_page = addr; state->shared_page = addr + TARGET_PAGE_SIZE; @@ -1609,8 +1613,8 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) void qmp_xen_set_global_dirty_log(bool enable, Error **errp) { if (enable) { - memory_global_dirty_log_start(); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); } else { - memory_global_dirty_log_stop(); + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); } } diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index 5b120ed44b1..bd47c3d672f 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -169,9 +169,23 @@ static void xen_remap_bucket(MapCacheEntry *entry, if (entry->vaddr_base != NULL) { if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { - ram_block_notify_remove(entry->vaddr_base, entry->size); + ram_block_notify_remove(entry->vaddr_base, entry->size, + entry->size); } - if (munmap(entry->vaddr_base, entry->size) != 0) { + + /* + * If an entry is being replaced by another mapping and we're using + * MAP_FIXED flag for it - there is possibility of a race for vaddr + * address with another thread doing an mmap call itself + * (see man 2 mmap). To avoid that we skip explicit unmapping here + * and allow the kernel to destroy the previous mappings by replacing + * them in mmap call later. + * + * Non-identical replacements are not allowed therefore. + */ + assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size)); + + if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) { perror("unmap fails"); exit(-1); } @@ -211,7 +225,7 @@ static void xen_remap_bucket(MapCacheEntry *entry, } if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { - ram_block_notify_add(vaddr_base, size); + ram_block_notify_add(vaddr_base, size, size); } entry->vaddr_base = vaddr_base; @@ -452,7 +466,7 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) } pentry->next = entry->next; - ram_block_notify_remove(entry->vaddr_base, entry->size); + ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); if (munmap(entry->vaddr_base, entry->size) != 0) { perror("unmap fails"); exit(-1); diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 01ae1fb1618..72028449bae 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -27,12 +27,10 @@ #include "qapi/error.h" #include "hw/ide.h" #include "hw/pci/pci.h" -#include "hw/irq.h" #include "hw/xen/xen_common.h" #include "migration/vmstate.h" #include "hw/xen/xen-legacy-backend.h" #include "trace.h" -#include "exec/address-spaces.h" #include "sysemu/xen.h" #include "sysemu/block-backend.h" #include "qemu/error-report.h" diff --git a/hw/ide/Kconfig b/hw/ide/Kconfig index 5d9106b1ac2..dd85fa3619f 100644 --- a/hw/ide/Kconfig +++ b/hw/ide/Kconfig @@ -8,7 +8,7 @@ config IDE_QDEV config IDE_PCI bool depends on PCI - select IDE_CORE + select IDE_QDEV config IDE_ISA bool @@ -41,6 +41,7 @@ config IDE_VIA config MICRODRIVE bool select IDE_QDEV + depends on PCMCIA config AHCI bool diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index f2c51574839..a94c6e26fb0 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1548,7 +1548,7 @@ void ahci_realize(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports) for (i = 0; i < s->ports; i++) { AHCIDevice *ad = &s->dev[i]; - ide_bus_new(&ad->port, sizeof(ad->port), qdev, i, 1); + ide_bus_init(&ad->port, sizeof(ad->port), qdev, i, 1); ide_init2(&ad->port, irqs[i]); ad->hba = s; diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index 7f32e87731d..109de9e2d11 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -26,7 +26,6 @@ #include "hw/ide/ahci.h" #include "hw/ide/internal.h" -#include "hw/sysbus.h" #include "hw/pci/pci.h" #define AHCI_MEM_BAR_SIZE 0x1000 diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c index c2546314855..94c576262c1 100644 --- a/hw/ide/cmd646.c +++ b/hw/ide/cmd646.c @@ -293,7 +293,7 @@ static void pci_cmd646_ide_realize(PCIDevice *dev, Error **errp) qdev_init_gpio_in(ds, cmd646_set_irq, 2); for (i = 0; i < 2; i++) { - ide_bus_new(&d->bus[i], sizeof(d->bus[i]), ds, i, 2); + ide_bus_init(&d->bus[i], sizeof(d->bus[i]), ds, i, 2); ide_init2(&d->bus[i], qdev_get_gpio_in(ds, i)); bmdma_init(&d->bus[i], &d->bmdma[i], d); diff --git a/hw/ide/core.c b/hw/ide/core.c index fd69ca3167d..e28f8aad611 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -98,8 +98,12 @@ static void put_le16(uint16_t *p, unsigned int v) static void ide_identify_size(IDEState *s) { uint16_t *p = (uint16_t *)s->identify_data; - put_le16(p + 60, s->nb_sectors); - put_le16(p + 61, s->nb_sectors >> 16); + int64_t nb_sectors_lba28 = s->nb_sectors; + if (nb_sectors_lba28 >= 1 << 28) { + nb_sectors_lba28 = (1 << 28) - 1; + } + put_le16(p + 60, nb_sectors_lba28); + put_le16(p + 61, nb_sectors_lba28 >> 16); put_le16(p + 100, s->nb_sectors); put_le16(p + 101, s->nb_sectors >> 16); put_le16(p + 102, s->nb_sectors >> 32); diff --git a/hw/ide/ioport.c b/hw/ide/ioport.c index b613ff3bbaf..e6caa537fa8 100644 --- a/hw/ide/ioport.c +++ b/hw/ide/ioport.c @@ -50,15 +50,19 @@ static const MemoryRegionPortio ide_portio2_list[] = { PORTIO_END_OF_LIST(), }; -void ide_init_ioport(IDEBus *bus, ISADevice *dev, int iobase, int iobase2) +int ide_init_ioport(IDEBus *bus, ISADevice *dev, int iobase, int iobase2) { + int ret; + /* ??? Assume only ISA and PCI configurations, and that the PCI-ISA bridge has been setup properly to always register with ISA. */ - isa_register_portio_list(dev, &bus->portio_list, - iobase, ide_portio_list, bus, "ide"); + ret = isa_register_portio_list(dev, &bus->portio_list, + iobase, ide_portio_list, bus, "ide"); - if (iobase2) { - isa_register_portio_list(dev, &bus->portio2_list, - iobase2, ide_portio2_list, bus, "ide"); + if (ret == 0 && iobase2) { + ret = isa_register_portio_list(dev, &bus->portio2_list, + iobase2, ide_portio2_list, bus, "ide"); } + + return ret; } diff --git a/hw/ide/isa.c b/hw/ide/isa.c index 6bc19de2265..24bbde24c2b 100644 --- a/hw/ide/isa.c +++ b/hw/ide/isa.c @@ -73,7 +73,7 @@ static void isa_ide_realizefn(DeviceState *dev, Error **errp) ISADevice *isadev = ISA_DEVICE(dev); ISAIDEState *s = ISA_IDE(dev); - ide_bus_new(&s->bus, sizeof(s->bus), dev, 0, 2); + ide_bus_init(&s->bus, sizeof(s->bus), dev, 0, 2); ide_init_ioport(&s->bus, isadev, s->iobase, s->iobase2); isa_init_irq(isadev, &s->irq, s->isairq); ide_init2(&s->bus, s->irq); diff --git a/hw/ide/macio.c b/hw/ide/macio.c index b270a101632..b03d401ceb5 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -449,7 +449,7 @@ static void macio_ide_initfn(Object *obj) SysBusDevice *d = SYS_BUS_DEVICE(obj); MACIOIDEState *s = MACIO_IDE(obj); - ide_bus_new(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2); + ide_bus_init(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2); memory_region_init_io(&s->mem, obj, &pmac_ide_ops, s, "pmac-ide", 0x1000); sysbus_init_mmio(d, &s->mem); sysbus_init_irq(d, &s->real_ide_irq); diff --git a/hw/ide/microdrive.c b/hw/ide/microdrive.c index 58a14fea363..6df9b4cbbe1 100644 --- a/hw/ide/microdrive.c +++ b/hw/ide/microdrive.c @@ -605,7 +605,7 @@ static void microdrive_init(Object *obj) { MicroDriveState *md = MICRODRIVE(obj); - ide_bus_new(&md->bus, sizeof(md->bus), DEVICE(obj), 0, 1); + ide_bus_init(&md->bus, sizeof(md->bus), DEVICE(obj), 0, 1); } static void microdrive_class_init(ObjectClass *oc, void *data) diff --git a/hw/ide/mmio.c b/hw/ide/mmio.c index 36e2f4790ab..fb2ebd4847f 100644 --- a/hw/ide/mmio.c +++ b/hw/ide/mmio.c @@ -142,7 +142,7 @@ static void mmio_ide_initfn(Object *obj) SysBusDevice *d = SYS_BUS_DEVICE(obj); MMIOState *s = MMIO_IDE(obj); - ide_bus_new(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2); + ide_bus_init(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2); sysbus_init_irq(d, &s->irq); } diff --git a/hw/ide/piix.c b/hw/ide/piix.c index b9860e35a5c..ce89fd0aa36 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "hw/pci/pci.h" #include "migration/vmstate.h" +#include "qapi/error.h" #include "qemu/module.h" #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" @@ -123,7 +124,8 @@ static void piix_ide_reset(DeviceState *dev) pci_conf[0x20] = 0x01; /* BMIBA: 20-23h */ } -static void pci_piix_init_ports(PCIIDEState *d) { +static int pci_piix_init_ports(PCIIDEState *d) +{ static const struct { int iobase; int iobase2; @@ -132,24 +134,30 @@ static void pci_piix_init_ports(PCIIDEState *d) { {0x1f0, 0x3f6, 14}, {0x170, 0x376, 15}, }; - int i; + int i, ret; for (i = 0; i < 2; i++) { - ide_bus_new(&d->bus[i], sizeof(d->bus[i]), DEVICE(d), i, 2); - ide_init_ioport(&d->bus[i], NULL, port_info[i].iobase, - port_info[i].iobase2); + ide_bus_init(&d->bus[i], sizeof(d->bus[i]), DEVICE(d), i, 2); + ret = ide_init_ioport(&d->bus[i], NULL, port_info[i].iobase, + port_info[i].iobase2); + if (ret) { + return ret; + } ide_init2(&d->bus[i], isa_get_irq(NULL, port_info[i].isairq)); bmdma_init(&d->bus[i], &d->bmdma[i], d); d->bmdma[i].bus = &d->bus[i]; ide_register_restart_cb(&d->bus[i]); } + + return 0; } static void pci_piix_ide_realize(PCIDevice *dev, Error **errp) { PCIIDEState *d = PCI_IDE(dev); uint8_t *pci_conf = dev->config; + int rc; pci_conf[PCI_CLASS_PROG] = 0x80; // legacy ATA mode @@ -158,7 +166,11 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error **errp) vmstate_register(VMSTATE_IF(dev), 0, &vmstate_ide_pci, d); - pci_piix_init_ports(d); + rc = pci_piix_init_ports(d); + if (rc) { + error_setg_errno(errp, -rc, "Failed to realize %s", + object_get_typename(OBJECT(dev))); + } } int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux) diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index e70ebc83a07..618045b85ac 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -68,10 +68,10 @@ static const TypeInfo ide_bus_info = { .class_init = ide_bus_class_init, }; -void ide_bus_new(IDEBus *idebus, size_t idebus_size, DeviceState *dev, +void ide_bus_init(IDEBus *idebus, size_t idebus_size, DeviceState *dev, int bus_id, int max_units) { - qbus_create_inplace(idebus, idebus_size, TYPE_IDE_BUS, dev, NULL); + qbus_init(idebus, idebus_size, TYPE_IDE_BUS, dev, NULL); idebus->bus_id = bus_id; idebus->max_units = max_units; } diff --git a/hw/ide/sii3112.c b/hw/ide/sii3112.c index 34c347b9c20..46204f10d75 100644 --- a/hw/ide/sii3112.c +++ b/hw/ide/sii3112.c @@ -283,7 +283,7 @@ static void sii3112_pci_realize(PCIDevice *dev, Error **errp) qdev_init_gpio_in(ds, sii3112_set_irq, 2); for (i = 0; i < 2; i++) { - ide_bus_new(&s->bus[i], sizeof(s->bus[i]), ds, i, 1); + ide_bus_init(&s->bus[i], sizeof(s->bus[i]), ds, i, 1); ide_init2(&s->bus[i], qdev_get_gpio_in(ds, i)); bmdma_init(&s->bus[i], &s->bmdma[i], s); diff --git a/hw/ide/trace-events b/hw/ide/trace-events index 6e357685f9b..15d7921f156 100644 --- a/hw/ide/trace-events +++ b/hw/ide/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # core.c # portio diff --git a/hw/ide/via.c b/hw/ide/via.c index be09912b334..82def819c41 100644 --- a/hw/ide/via.c +++ b/hw/ide/via.c @@ -29,7 +29,7 @@ #include "migration/vmstate.h" #include "qemu/module.h" #include "sysemu/dma.h" - +#include "hw/isa/vt82c686.h" #include "hw/ide/pci.h" #include "trace.h" @@ -112,7 +112,7 @@ static void via_ide_set_irq(void *opaque, int n, int level) d->config[0x70 + n * 8] &= ~0x80; } - qemu_set_irq(isa_get_irq(NULL, 14 + n), level); + via_isa_set_irq(pci_get_function_0(d), 14 + n, level); } static void via_ide_reset(DeviceState *dev) @@ -190,7 +190,7 @@ static void via_ide_realize(PCIDevice *dev, Error **errp) qdev_init_gpio_in(ds, via_ide_set_irq, 2); for (i = 0; i < 2; i++) { - ide_bus_new(&d->bus[i], sizeof(d->bus[i]), ds, i, 2); + ide_bus_init(&d->bus[i], sizeof(d->bus[i]), ds, i, 2); ide_init2(&d->bus[i], qdev_get_gpio_in(ds, i)); bmdma_init(&d->bus[i], &d->bmdma[i], d); @@ -217,6 +217,9 @@ static void via_ide_class_init(ObjectClass *klass, void *data) dc->reset = via_ide_reset; dc->vmsd = &vmstate_ide_pci; + /* Reason: only works as function of VIA southbridge */ + dc->user_creatable = false; + k->realize = via_ide_realize; k->exit = via_ide_exitfn; k->vendor_id = PCI_VENDOR_ID_VIA; diff --git a/hw/input/hid.c b/hw/input/hid.c index e1d2e460837..8aab0521f40 100644 --- a/hw/input/hid.c +++ b/hw/input/hid.c @@ -51,8 +51,8 @@ static const uint8_t hid_usage_keys[0x100] = { 0x45, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0xe8, 0xe9, 0x71, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65, + 0x88, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x8a, 0x00, 0x8b, 0x00, 0x89, 0xe7, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/hw/input/lasips2.c b/hw/input/lasips2.c index 0786e573388..68d741d3421 100644 --- a/hw/input/lasips2.c +++ b/hw/input/lasips2.c @@ -24,12 +24,9 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "hw/qdev-properties.h" -#include "hw/hw.h" #include "hw/input/ps2.h" #include "hw/input/lasips2.h" -#include "hw/sysbus.h" #include "exec/hwaddr.h" -#include "sysemu/sysemu.h" #include "trace.h" #include "exec/address-spaces.h" #include "migration/vmstate.h" @@ -99,7 +96,7 @@ typedef enum { LASIPS2_STATUS_CLKSHD = 0x80, } lasips2_status_reg_t; -static const char *artist_read_reg_name(uint64_t addr) +static const char *lasips2_read_reg_name(uint64_t addr) { switch (addr & 0xc) { case REG_PS2_ID: @@ -119,7 +116,7 @@ static const char *artist_read_reg_name(uint64_t addr) } } -static const char *artist_write_reg_name(uint64_t addr) +static const char *lasips2_write_reg_name(uint64_t addr) { switch (addr & 0x0c) { case REG_PS2_RESET: @@ -148,7 +145,7 @@ static void lasips2_reg_write(void *opaque, hwaddr addr, uint64_t val, LASIPS2Port *port = opaque; trace_lasips2_reg_write(size, port->id, addr, - artist_write_reg_name(addr), val); + lasips2_write_reg_name(addr), val); switch (addr & 0xc) { case REG_PS2_CONTROL: @@ -242,7 +239,7 @@ static uint64_t lasips2_reg_read(void *opaque, hwaddr addr, unsigned size) break; } trace_lasips2_reg_read(size, port->id, addr, - artist_read_reg_name(addr), ret); + lasips2_read_reg_name(addr), ret); return ret; } diff --git a/hw/input/lm832x.c b/hw/input/lm832x.c index 4cb1e9de01f..19a646d9bb4 100644 --- a/hw/input/lm832x.c +++ b/hw/input/lm832x.c @@ -19,6 +19,7 @@ */ #include "qemu/osdep.h" +#include "hw/input/lm832x.h" #include "hw/i2c/i2c.h" #include "hw/irq.h" #include "migration/vmstate.h" @@ -27,7 +28,6 @@ #include "ui/console.h" #include "qom/object.h" -#define TYPE_LM8323 "lm8323" OBJECT_DECLARE_SIMPLE_TYPE(LM823KbdState, LM8323) struct LM823KbdState { diff --git a/hw/input/meson.build b/hw/input/meson.build index 0042c3f0dc5..8deb011d4a6 100644 --- a/hw/input/meson.build +++ b/hw/input/meson.build @@ -13,7 +13,6 @@ softmmu_ss.add(when: 'CONFIG_VIRTIO_INPUT', if_true: files('virtio-input-hid.c') softmmu_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host.c')) softmmu_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) -softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-softusb.c')) softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_keypad.c')) softmmu_ss.add(when: 'CONFIG_TSC210X', if_true: files('tsc210x.c')) softmmu_ss.add(when: 'CONFIG_LASIPS2', if_true: files('lasips2.c')) diff --git a/hw/input/milkymist-softusb.c b/hw/input/milkymist-softusb.c deleted file mode 100644 index d885c708d7c..00000000000 --- a/hw/input/milkymist-softusb.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * QEMU model of the Milkymist SoftUSB block. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * not available yet - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "ui/console.h" -#include "hw/input/hid.h" -#include "hw/irq.h" -#include "hw/qdev-properties.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -enum { - R_CTRL = 0, - R_MAX -}; - -enum { - CTRL_RESET = (1<<0), -}; - -#define COMLOC_DEBUG_PRODUCE 0x1000 -#define COMLOC_DEBUG_BASE 0x1001 -#define COMLOC_MEVT_PRODUCE 0x1101 -#define COMLOC_MEVT_BASE 0x1102 -#define COMLOC_KEVT_PRODUCE 0x1142 -#define COMLOC_KEVT_BASE 0x1143 - -#define TYPE_MILKYMIST_SOFTUSB "milkymist-softusb" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistSoftUsbState, MILKYMIST_SOFTUSB) - -struct MilkymistSoftUsbState { - SysBusDevice parent_obj; - - HIDState hid_kbd; - HIDState hid_mouse; - - MemoryRegion regs_region; - MemoryRegion pmem; - MemoryRegion dmem; - qemu_irq irq; - - void *pmem_ptr; - void *dmem_ptr; - - /* device properties */ - uint32_t pmem_size; - uint32_t dmem_size; - - /* device registers */ - uint32_t regs[R_MAX]; - - /* mouse state */ - uint8_t mouse_hid_buffer[4]; - - /* keyboard state */ - uint8_t kbd_hid_buffer[8]; -}; - -static uint64_t softusb_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistSoftUsbState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_CTRL: - r = s->regs[addr]; - break; - - default: - error_report("milkymist_softusb: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_softusb_memory_read(addr << 2, r); - - return r; -} - -static void -softusb_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistSoftUsbState *s = opaque; - - trace_milkymist_softusb_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_CTRL: - s->regs[addr] = value; - break; - - default: - error_report("milkymist_softusb: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } -} - -static const MemoryRegionOps softusb_mmio_ops = { - .read = softusb_read, - .write = softusb_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, -}; - -static inline void softusb_read_dmem(MilkymistSoftUsbState *s, - uint32_t offset, uint8_t *buf, uint32_t len) -{ - if (offset + len >= s->dmem_size) { - error_report("milkymist_softusb: read dmem out of bounds " - "at offset 0x%x, len %d", offset, len); - memset(buf, 0, len); - return; - } - - memcpy(buf, s->dmem_ptr + offset, len); -} - -static inline void softusb_write_dmem(MilkymistSoftUsbState *s, - uint32_t offset, uint8_t *buf, uint32_t len) -{ - if (offset + len >= s->dmem_size) { - error_report("milkymist_softusb: write dmem out of bounds " - "at offset 0x%x, len %d", offset, len); - return; - } - - memcpy(s->dmem_ptr + offset, buf, len); -} - -static void softusb_mouse_changed(MilkymistSoftUsbState *s) -{ - uint8_t m; - - softusb_read_dmem(s, COMLOC_MEVT_PRODUCE, &m, 1); - trace_milkymist_softusb_mevt(m); - softusb_write_dmem(s, COMLOC_MEVT_BASE + 4 * m, s->mouse_hid_buffer, 4); - m = (m + 1) & 0xf; - softusb_write_dmem(s, COMLOC_MEVT_PRODUCE, &m, 1); - - trace_milkymist_softusb_pulse_irq(); - qemu_irq_pulse(s->irq); -} - -static void softusb_kbd_changed(MilkymistSoftUsbState *s) -{ - uint8_t m; - - softusb_read_dmem(s, COMLOC_KEVT_PRODUCE, &m, 1); - trace_milkymist_softusb_kevt(m); - softusb_write_dmem(s, COMLOC_KEVT_BASE + 8 * m, s->kbd_hid_buffer, 8); - m = (m + 1) & 0x7; - softusb_write_dmem(s, COMLOC_KEVT_PRODUCE, &m, 1); - - trace_milkymist_softusb_pulse_irq(); - qemu_irq_pulse(s->irq); -} - -static void softusb_kbd_hid_datain(HIDState *hs) -{ - MilkymistSoftUsbState *s = container_of(hs, MilkymistSoftUsbState, hid_kbd); - int len; - - /* if device is in reset, do nothing */ - if (s->regs[R_CTRL] & CTRL_RESET) { - return; - } - - while (hid_has_events(hs)) { - len = hid_keyboard_poll(hs, s->kbd_hid_buffer, - sizeof(s->kbd_hid_buffer)); - - if (len == 8) { - softusb_kbd_changed(s); - } - } -} - -static void softusb_mouse_hid_datain(HIDState *hs) -{ - MilkymistSoftUsbState *s = - container_of(hs, MilkymistSoftUsbState, hid_mouse); - int len; - - /* if device is in reset, do nothing */ - if (s->regs[R_CTRL] & CTRL_RESET) { - return; - } - - while (hid_has_events(hs)) { - len = hid_pointer_poll(hs, s->mouse_hid_buffer, - sizeof(s->mouse_hid_buffer)); - - if (len == 4) { - softusb_mouse_changed(s); - } - } -} - -static void milkymist_softusb_reset(DeviceState *d) -{ - MilkymistSoftUsbState *s = MILKYMIST_SOFTUSB(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - memset(s->kbd_hid_buffer, 0, sizeof(s->kbd_hid_buffer)); - memset(s->mouse_hid_buffer, 0, sizeof(s->mouse_hid_buffer)); - - hid_reset(&s->hid_kbd); - hid_reset(&s->hid_mouse); - - /* defaults */ - s->regs[R_CTRL] = CTRL_RESET; -} - -static void milkymist_softusb_realize(DeviceState *dev, Error **errp) -{ - MilkymistSoftUsbState *s = MILKYMIST_SOFTUSB(dev); - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - - sysbus_init_irq(sbd, &s->irq); - - memory_region_init_io(&s->regs_region, OBJECT(s), &softusb_mmio_ops, s, - "milkymist-softusb", R_MAX * 4); - sysbus_init_mmio(sbd, &s->regs_region); - - /* register pmem and dmem */ - memory_region_init_ram_nomigrate(&s->pmem, OBJECT(s), "milkymist-softusb.pmem", - s->pmem_size, &error_fatal); - vmstate_register_ram_global(&s->pmem); - s->pmem_ptr = memory_region_get_ram_ptr(&s->pmem); - sysbus_init_mmio(sbd, &s->pmem); - memory_region_init_ram_nomigrate(&s->dmem, OBJECT(s), "milkymist-softusb.dmem", - s->dmem_size, &error_fatal); - vmstate_register_ram_global(&s->dmem); - s->dmem_ptr = memory_region_get_ram_ptr(&s->dmem); - sysbus_init_mmio(sbd, &s->dmem); - - hid_init(&s->hid_kbd, HID_KEYBOARD, softusb_kbd_hid_datain); - hid_init(&s->hid_mouse, HID_MOUSE, softusb_mouse_hid_datain); -} - -static const VMStateDescription vmstate_milkymist_softusb = { - .name = "milkymist-softusb", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistSoftUsbState, R_MAX), - VMSTATE_HID_KEYBOARD_DEVICE(hid_kbd, MilkymistSoftUsbState), - VMSTATE_HID_POINTER_DEVICE(hid_mouse, MilkymistSoftUsbState), - VMSTATE_BUFFER(kbd_hid_buffer, MilkymistSoftUsbState), - VMSTATE_BUFFER(mouse_hid_buffer, MilkymistSoftUsbState), - VMSTATE_END_OF_LIST() - } -}; - -static Property milkymist_softusb_properties[] = { - DEFINE_PROP_UINT32("pmem_size", MilkymistSoftUsbState, pmem_size, 0x00001000), - DEFINE_PROP_UINT32("dmem_size", MilkymistSoftUsbState, dmem_size, 0x00002000), - DEFINE_PROP_END_OF_LIST(), -}; - -static void milkymist_softusb_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_softusb_realize; - dc->reset = milkymist_softusb_reset; - dc->vmsd = &vmstate_milkymist_softusb; - device_class_set_props(dc, milkymist_softusb_properties); -} - -static const TypeInfo milkymist_softusb_info = { - .name = TYPE_MILKYMIST_SOFTUSB, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistSoftUsbState), - .class_init = milkymist_softusb_class_init, -}; - -static void milkymist_softusb_register_types(void) -{ - type_register_static(&milkymist_softusb_info); -} - -type_init(milkymist_softusb_register_types) diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c index dde85ba6c68..baba62f357a 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -23,13 +23,16 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/timer.h" #include "hw/isa/isa.h" #include "migration/vmstate.h" #include "hw/acpi/aml-build.h" #include "hw/input/ps2.h" #include "hw/irq.h" #include "hw/input/i8042.h" +#include "hw/qdev-properties.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" @@ -59,21 +62,6 @@ #define KBD_CCMD_RESET 0xFE /* Pulse bit 0 of the output port P2 = CPU reset. */ #define KBD_CCMD_NO_OP 0xFF /* Pulse no bits of the output port P2. */ -/* Keyboard Commands */ -#define KBD_CMD_SET_LEDS 0xED /* Set keyboard leds */ -#define KBD_CMD_ECHO 0xEE -#define KBD_CMD_GET_ID 0xF2 /* get keyboard ID */ -#define KBD_CMD_SET_RATE 0xF3 /* Set typematic rate */ -#define KBD_CMD_ENABLE 0xF4 /* Enable scanning */ -#define KBD_CMD_RESET_DISABLE 0xF5 /* reset and disable scanning */ -#define KBD_CMD_RESET_ENABLE 0xF6 /* reset and enable scanning */ -#define KBD_CMD_RESET 0xFF /* Reset */ - -/* Keyboard Replies */ -#define KBD_REPLY_POR 0xAA /* Power on reset */ -#define KBD_REPLY_ACK 0xFA /* Command ACK */ -#define KBD_REPLY_RESEND 0xFE /* Command NACK, send the cmd again */ - /* Status Register Bits */ #define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */ #define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */ @@ -106,41 +94,37 @@ */ #define KBD_OUT_ONES 0xcc -/* Mouse Commands */ -#define AUX_SET_SCALE11 0xE6 /* Set 1:1 scaling */ -#define AUX_SET_SCALE21 0xE7 /* Set 2:1 scaling */ -#define AUX_SET_RES 0xE8 /* Set resolution */ -#define AUX_GET_SCALE 0xE9 /* Get scaling factor */ -#define AUX_SET_STREAM 0xEA /* Set stream mode */ -#define AUX_POLL 0xEB /* Poll */ -#define AUX_RESET_WRAP 0xEC /* Reset wrap mode */ -#define AUX_SET_WRAP 0xEE /* Set wrap mode */ -#define AUX_SET_REMOTE 0xF0 /* Set remote mode */ -#define AUX_GET_TYPE 0xF2 /* Get type */ -#define AUX_SET_SAMPLE 0xF3 /* Set sample rate */ -#define AUX_ENABLE_DEV 0xF4 /* Enable aux device */ -#define AUX_DISABLE_DEV 0xF5 /* Disable aux device */ -#define AUX_SET_DEFAULT 0xF6 -#define AUX_RESET 0xFF /* Reset aux device */ -#define AUX_ACK 0xFA /* Command byte ACK. */ - -#define MOUSE_STATUS_REMOTE 0x40 -#define MOUSE_STATUS_ENABLED 0x20 -#define MOUSE_STATUS_SCALE21 0x10 - -#define KBD_PENDING_KBD 1 -#define KBD_PENDING_AUX 2 +#define KBD_PENDING_KBD_COMPAT 0x01 +#define KBD_PENDING_AUX_COMPAT 0x02 +#define KBD_PENDING_CTRL_KBD 0x04 +#define KBD_PENDING_CTRL_AUX 0x08 +#define KBD_PENDING_KBD KBD_MODE_DISABLE_KBD /* 0x10 */ +#define KBD_PENDING_AUX KBD_MODE_DISABLE_MOUSE /* 0x20 */ + +#define KBD_MIGR_TIMER_PENDING 0x1 + +#define KBD_OBSRC_KBD 0x01 +#define KBD_OBSRC_MOUSE 0x02 +#define KBD_OBSRC_CTRL 0x04 typedef struct KBDState { uint8_t write_cmd; /* if non zero, write data to port 60 is expected */ uint8_t status; uint8_t mode; uint8_t outport; + uint32_t migration_flags; + uint32_t obsrc; bool outport_present; + bool extended_state; + bool extended_state_loaded; /* Bitmask of devices with data available. */ uint8_t pending; + uint8_t obdata; + uint8_t cbdata; + uint8_t pending_tmp; void *kbd; void *mouse; + QEMUTimer *throttle_timer; qemu_irq irq_kbd; qemu_irq irq_mouse; @@ -148,56 +132,123 @@ typedef struct KBDState { hwaddr mask; } KBDState; -/* update irq and KBD_STAT_[MOUSE_]OBF */ /* XXX: not generating the irqs if KBD_MODE_DISABLE_KBD is set may be incorrect, but it avoids having to simulate exact delays */ -static void kbd_update_irq(KBDState *s) +static void kbd_update_irq_lines(KBDState *s) { int irq_kbd_level, irq_mouse_level; irq_kbd_level = 0; irq_mouse_level = 0; + + if (s->status & KBD_STAT_OBF) { + if (s->status & KBD_STAT_MOUSE_OBF) { + if (s->mode & KBD_MODE_MOUSE_INT) { + irq_mouse_level = 1; + } + } else { + if ((s->mode & KBD_MODE_KBD_INT) && + !(s->mode & KBD_MODE_DISABLE_KBD)) { + irq_kbd_level = 1; + } + } + } + qemu_set_irq(s->irq_kbd, irq_kbd_level); + qemu_set_irq(s->irq_mouse, irq_mouse_level); +} + +static void kbd_deassert_irq(KBDState *s) +{ + s->status &= ~(KBD_STAT_OBF | KBD_STAT_MOUSE_OBF); + s->outport &= ~(KBD_OUT_OBF | KBD_OUT_MOUSE_OBF); + kbd_update_irq_lines(s); +} + +static uint8_t kbd_pending(KBDState *s) +{ + if (s->extended_state) { + return s->pending & (~s->mode | ~(KBD_PENDING_KBD | KBD_PENDING_AUX)); + } else { + return s->pending; + } +} + +/* update irq and KBD_STAT_[MOUSE_]OBF */ +static void kbd_update_irq(KBDState *s) +{ + uint8_t pending = kbd_pending(s); + s->status &= ~(KBD_STAT_OBF | KBD_STAT_MOUSE_OBF); s->outport &= ~(KBD_OUT_OBF | KBD_OUT_MOUSE_OBF); - if (s->pending) { + if (pending) { s->status |= KBD_STAT_OBF; s->outport |= KBD_OUT_OBF; - /* kbd data takes priority over aux data. */ - if (s->pending == KBD_PENDING_AUX) { + if (pending & KBD_PENDING_CTRL_KBD) { + s->obsrc = KBD_OBSRC_CTRL; + } else if (pending & KBD_PENDING_CTRL_AUX) { s->status |= KBD_STAT_MOUSE_OBF; s->outport |= KBD_OUT_MOUSE_OBF; - if (s->mode & KBD_MODE_MOUSE_INT) - irq_mouse_level = 1; + s->obsrc = KBD_OBSRC_CTRL; + } else if (pending & KBD_PENDING_KBD) { + s->obsrc = KBD_OBSRC_KBD; } else { - if ((s->mode & KBD_MODE_KBD_INT) && - !(s->mode & KBD_MODE_DISABLE_KBD)) - irq_kbd_level = 1; + s->status |= KBD_STAT_MOUSE_OBF; + s->outport |= KBD_OUT_MOUSE_OBF; + s->obsrc = KBD_OBSRC_MOUSE; } } - qemu_set_irq(s->irq_kbd, irq_kbd_level); - qemu_set_irq(s->irq_mouse, irq_mouse_level); + kbd_update_irq_lines(s); +} + +static void kbd_safe_update_irq(KBDState *s) +{ + /* + * with KBD_STAT_OBF set, a call to kbd_read_data() will eventually call + * kbd_update_irq() + */ + if (s->status & KBD_STAT_OBF) { + return; + } + /* the throttle timer is pending and will call kbd_update_irq() */ + if (s->throttle_timer && timer_pending(s->throttle_timer)) { + return; + } + if (kbd_pending(s)) { + kbd_update_irq(s); + } } static void kbd_update_kbd_irq(void *opaque, int level) { - KBDState *s = (KBDState *)opaque; + KBDState *s = opaque; - if (level) + if (level) { s->pending |= KBD_PENDING_KBD; - else + } else { s->pending &= ~KBD_PENDING_KBD; - kbd_update_irq(s); + } + kbd_safe_update_irq(s); } static void kbd_update_aux_irq(void *opaque, int level) { - KBDState *s = (KBDState *)opaque; + KBDState *s = opaque; - if (level) + if (level) { s->pending |= KBD_PENDING_AUX; - else + } else { s->pending &= ~KBD_PENDING_AUX; - kbd_update_irq(s); + } + kbd_safe_update_irq(s); +} + +static void kbd_throttle_timeout(void *opaque) +{ + KBDState *s = opaque; + + if (kbd_pending(s)) { + kbd_update_irq(s); + } } static uint64_t kbd_read_status(void *opaque, hwaddr addr, @@ -212,10 +263,25 @@ static uint64_t kbd_read_status(void *opaque, hwaddr addr, static void kbd_queue(KBDState *s, int b, int aux) { - if (aux) - ps2_queue(s->mouse, b); - else - ps2_queue(s->kbd, b); + if (s->extended_state) { + s->cbdata = b; + s->pending &= ~KBD_PENDING_CTRL_KBD & ~KBD_PENDING_CTRL_AUX; + s->pending |= aux ? KBD_PENDING_CTRL_AUX : KBD_PENDING_CTRL_KBD; + kbd_safe_update_irq(s); + } else { + ps2_queue(aux ? s->mouse : s->kbd, b); + } +} + +static uint8_t kbd_dequeue(KBDState *s) +{ + uint8_t b = s->cbdata; + + s->pending &= ~KBD_PENDING_CTRL_KBD & ~KBD_PENDING_CTRL_AUX; + if (kbd_pending(s)) { + kbd_update_irq(s); + } + return b; } static void outport_write(KBDState *s, uint32_t val) @@ -265,6 +331,7 @@ static void kbd_write_command(void *opaque, hwaddr addr, break; case KBD_CCMD_MOUSE_ENABLE: s->mode &= ~KBD_MODE_DISABLE_MOUSE; + kbd_safe_update_irq(s); break; case KBD_CCMD_TEST_MOUSE: kbd_queue(s, 0x00, 0); @@ -278,11 +345,10 @@ static void kbd_write_command(void *opaque, hwaddr addr, break; case KBD_CCMD_KBD_DISABLE: s->mode |= KBD_MODE_DISABLE_KBD; - kbd_update_irq(s); break; case KBD_CCMD_KBD_ENABLE: s->mode &= ~KBD_MODE_DISABLE_KBD; - kbd_update_irq(s); + kbd_safe_update_irq(s); break; case KBD_CCMD_READ_INPORT: kbd_queue(s, 0x80, 0); @@ -315,15 +381,24 @@ static uint64_t kbd_read_data(void *opaque, hwaddr addr, unsigned size) { KBDState *s = opaque; - uint32_t val; - if (s->pending == KBD_PENDING_AUX) - val = ps2_read_data(s->mouse); - else - val = ps2_read_data(s->kbd); + if (s->status & KBD_STAT_OBF) { + kbd_deassert_irq(s); + if (s->obsrc & KBD_OBSRC_KBD) { + if (s->throttle_timer) { + timer_mod(s->throttle_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + 1000); + } + s->obdata = ps2_read_data(s->kbd); + } else if (s->obsrc & KBD_OBSRC_MOUSE) { + s->obdata = ps2_read_data(s->mouse); + } else if (s->obsrc & KBD_OBSRC_CTRL) { + s->obdata = kbd_dequeue(s); + } + } - trace_pckbd_kbd_read_data(val); - return val; + trace_pckbd_kbd_read_data(s->obdata); + return s->obdata; } static void kbd_write_data(void *opaque, hwaddr addr, @@ -336,12 +411,23 @@ static void kbd_write_data(void *opaque, hwaddr addr, switch(s->write_cmd) { case 0: ps2_write_keyboard(s->kbd, val); + /* sending data to the keyboard reenables PS/2 communication */ + s->mode &= ~KBD_MODE_DISABLE_KBD; + kbd_safe_update_irq(s); break; case KBD_CCMD_WRITE_MODE: s->mode = val; ps2_keyboard_set_translation(s->kbd, (s->mode & KBD_MODE_KCC) != 0); - /* ??? */ - kbd_update_irq(s); + /* + * a write to the mode byte interrupt enable flags directly updates + * the irq lines + */ + kbd_update_irq_lines(s); + /* + * a write to the mode byte disable interface flags may raise + * an irq if there is pending data in the PS/2 queues. + */ + kbd_safe_update_irq(s); break; case KBD_CCMD_WRITE_OBUF: kbd_queue(s, val, 0); @@ -354,6 +440,9 @@ static void kbd_write_data(void *opaque, hwaddr addr, break; case KBD_CCMD_WRITE_MOUSE: ps2_write_mouse(s->mouse, val); + /* sending data to the mouse reenables PS/2 communication */ + s->mode &= ~KBD_MODE_DISABLE_MOUSE; + kbd_safe_update_irq(s); break; default: break; @@ -368,7 +457,11 @@ static void kbd_reset(void *opaque) s->mode = KBD_MODE_KBD_INT | KBD_MODE_MOUSE_INT; s->status = KBD_STAT_CMD | KBD_STAT_UNLOCKED; s->outport = KBD_OUT_RESET | KBD_OUT_A20 | KBD_OUT_ONES; - s->outport_present = false; + s->pending = 0; + kbd_deassert_irq(s); + if (s->throttle_timer) { + timer_del(s->throttle_timer); + } } static uint8_t kbd_outport_default(KBDState *s) @@ -403,13 +496,99 @@ static const VMStateDescription vmstate_kbd_outport = { } }; +static int kbd_extended_state_pre_save(void *opaque) +{ + KBDState *s = opaque; + + s->migration_flags = 0; + if (s->throttle_timer && timer_pending(s->throttle_timer)) { + s->migration_flags |= KBD_MIGR_TIMER_PENDING; + } + + return 0; +} + +static int kbd_extended_state_post_load(void *opaque, int version_id) +{ + KBDState *s = opaque; + + if (s->migration_flags & KBD_MIGR_TIMER_PENDING) { + kbd_throttle_timeout(s); + } + s->extended_state_loaded = true; + + return 0; +} + +static bool kbd_extended_state_needed(void *opaque) +{ + KBDState *s = opaque; + + return s->extended_state; +} + +static const VMStateDescription vmstate_kbd_extended_state = { + .name = "pckbd/extended_state", + .post_load = kbd_extended_state_post_load, + .pre_save = kbd_extended_state_pre_save, + .needed = kbd_extended_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(migration_flags, KBDState), + VMSTATE_UINT32(obsrc, KBDState), + VMSTATE_UINT8(obdata, KBDState), + VMSTATE_UINT8(cbdata, KBDState), + VMSTATE_END_OF_LIST() + } +}; + +static int kbd_pre_save(void *opaque) +{ + KBDState *s = opaque; + + if (s->extended_state) { + s->pending_tmp = s->pending; + } else { + s->pending_tmp = 0; + if (s->pending & KBD_PENDING_KBD) { + s->pending_tmp |= KBD_PENDING_KBD_COMPAT; + } + if (s->pending & KBD_PENDING_AUX) { + s->pending_tmp |= KBD_PENDING_AUX_COMPAT; + } + } + return 0; +} + +static int kbd_pre_load(void *opaque) +{ + KBDState *s = opaque; + + s->outport_present = false; + s->extended_state_loaded = false; + return 0; +} + static int kbd_post_load(void *opaque, int version_id) { KBDState *s = opaque; if (!s->outport_present) { s->outport = kbd_outport_default(s); } - s->outport_present = false; + s->pending = s->pending_tmp; + if (!s->extended_state_loaded) { + s->obsrc = s->status & KBD_STAT_OBF ? + (s->status & KBD_STAT_MOUSE_OBF ? KBD_OBSRC_MOUSE : KBD_OBSRC_KBD) : + 0; + if (s->pending & KBD_PENDING_KBD_COMPAT) { + s->pending |= KBD_PENDING_KBD; + } + if (s->pending & KBD_PENDING_AUX_COMPAT) { + s->pending |= KBD_PENDING_AUX; + } + } + /* clear all unused flags */ + s->pending &= KBD_PENDING_CTRL_KBD | KBD_PENDING_CTRL_AUX | + KBD_PENDING_KBD | KBD_PENDING_AUX; return 0; } @@ -417,16 +596,19 @@ static const VMStateDescription vmstate_kbd = { .name = "pckbd", .version_id = 3, .minimum_version_id = 3, + .pre_load = kbd_pre_load, .post_load = kbd_post_load, + .pre_save = kbd_pre_save, .fields = (VMStateField[]) { VMSTATE_UINT8(write_cmd, KBDState), VMSTATE_UINT8(status, KBDState), VMSTATE_UINT8(mode, KBDState), - VMSTATE_UINT8(pending, KBDState), + VMSTATE_UINT8(pending_tmp, KBDState), VMSTATE_END_OF_LIST() }, .subsections = (const VMStateDescription*[]) { &vmstate_kbd_outport, + &vmstate_kbd_extended_state, NULL } }; @@ -472,6 +654,8 @@ void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq, s->irq_mouse = mouse_irq; s->mask = mask; + s->extended_state = true; + vmstate_register(NULL, 0, &vmstate_kbd, s); memory_region_init_io(region, NULL, &i8042_mmio_ops, s, "i8042", size); @@ -485,6 +669,7 @@ struct ISAKBDState { ISADevice parent_obj; KBDState kbd; + bool kbd_throttle; MemoryRegion io[2]; }; @@ -557,6 +742,13 @@ static void i8042_realizefn(DeviceState *dev, Error **errp) s->kbd = ps2_kbd_init(kbd_update_kbd_irq, s); s->mouse = ps2_mouse_init(kbd_update_aux_irq, s); + if (isa_s->kbd_throttle && !isa_s->kbd.extended_state) { + warn_report(TYPE_I8042 ": can't enable kbd-throttle without" + " extended-state, disabling kbd-throttle"); + } else if (isa_s->kbd_throttle) { + s->throttle_timer = timer_new_us(QEMU_CLOCK_VIRTUAL, + kbd_throttle_timeout, s); + } qemu_register_reset(kbd_reset, s); } @@ -588,11 +780,18 @@ static void i8042_build_aml(ISADevice *isadev, Aml *scope) aml_append(scope, mou); } +static Property i8042_properties[] = { + DEFINE_PROP_BOOL("extended-state", ISAKBDState, kbd.extended_state, true), + DEFINE_PROP_BOOL("kbd-throttle", ISAKBDState, kbd_throttle, false), + DEFINE_PROP_END_OF_LIST(), +}; + static void i8042_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); + device_class_set_props(dc, i8042_properties); dc->realize = i8042_realizefn; dc->vmsd = &vmstate_kbd_isa; isa->build_aml = i8042_build_aml; diff --git a/hw/input/ps2.c b/hw/input/ps2.c index 72cdb80ae1c..9376a8f4ce5 100644 --- a/hw/input/ps2.c +++ b/hw/input/ps2.c @@ -74,7 +74,13 @@ #define MOUSE_STATUS_ENABLED 0x20 #define MOUSE_STATUS_SCALE21 0x10 -#define PS2_QUEUE_SIZE 16 /* Buffer size required by PS/2 protocol */ +/* + * PS/2 buffer size. Keep 256 bytes for compatibility with + * older QEMU versions. + */ +#define PS2_BUFFER_SIZE 256 +#define PS2_QUEUE_SIZE 16 /* Queue size required by PS/2 protocol */ +#define PS2_QUEUE_HEADROOM 8 /* Queue size for keyboard command replies */ /* Bits for 'modifiers' field in PS2KbdState */ #define MOD_CTRL_L (1 << 0) @@ -85,10 +91,8 @@ #define MOD_ALT_R (1 << 5) typedef struct { - /* Keep the data array 256 bytes long, which compatibility - with older qemu versions. */ - uint8_t data[256]; - int rptr, wptr, count; + uint8_t data[PS2_BUFFER_SIZE]; + int rptr, wptr, cwptr, count; } PS2Queue; struct PS2State { @@ -183,6 +187,7 @@ static void ps2_reset_queue(PS2State *s) q->rptr = 0; q->wptr = 0; + q->cwptr = -1; q->count = 0; } @@ -195,13 +200,14 @@ void ps2_queue_noirq(PS2State *s, int b) { PS2Queue *q = &s->queue; - if (q->count == PS2_QUEUE_SIZE) { + if (q->count >= PS2_QUEUE_SIZE) { return; } q->data[q->wptr] = b; - if (++q->wptr == PS2_QUEUE_SIZE) + if (++q->wptr == PS2_BUFFER_SIZE) { q->wptr = 0; + } q->count++; } @@ -212,8 +218,12 @@ void ps2_raise_irq(PS2State *s) void ps2_queue(PS2State *s, int b) { + if (PS2_QUEUE_SIZE - s->queue.count < 1) { + return; + } + ps2_queue_noirq(s, b); - s->update_irq(s->update_arg, 1); + ps2_raise_irq(s); } void ps2_queue_2(PS2State *s, int b1, int b2) @@ -224,7 +234,7 @@ void ps2_queue_2(PS2State *s, int b1, int b2) ps2_queue_noirq(s, b1); ps2_queue_noirq(s, b2); - s->update_irq(s->update_arg, 1); + ps2_raise_irq(s); } void ps2_queue_3(PS2State *s, int b1, int b2, int b3) @@ -236,7 +246,7 @@ void ps2_queue_3(PS2State *s, int b1, int b2, int b3) ps2_queue_noirq(s, b1); ps2_queue_noirq(s, b2); ps2_queue_noirq(s, b3); - s->update_irq(s->update_arg, 1); + ps2_raise_irq(s); } void ps2_queue_4(PS2State *s, int b1, int b2, int b3, int b4) @@ -249,7 +259,64 @@ void ps2_queue_4(PS2State *s, int b1, int b2, int b3, int b4) ps2_queue_noirq(s, b2); ps2_queue_noirq(s, b3); ps2_queue_noirq(s, b4); - s->update_irq(s->update_arg, 1); + ps2_raise_irq(s); +} + +static void ps2_cqueue_data(PS2Queue *q, int b) +{ + q->data[q->cwptr] = b; + if (++q->cwptr >= PS2_BUFFER_SIZE) { + q->cwptr = 0; + } + q->count++; +} + +static void ps2_cqueue_1(PS2State *s, int b1) +{ + PS2Queue *q = &s->queue; + + q->rptr = (q->rptr - 1) & (PS2_BUFFER_SIZE - 1); + q->cwptr = q->rptr; + ps2_cqueue_data(q, b1); + ps2_raise_irq(s); +} + +static void ps2_cqueue_2(PS2State *s, int b1, int b2) +{ + PS2Queue *q = &s->queue; + + q->rptr = (q->rptr - 2) & (PS2_BUFFER_SIZE - 1); + q->cwptr = q->rptr; + ps2_cqueue_data(q, b1); + ps2_cqueue_data(q, b2); + ps2_raise_irq(s); +} + +static void ps2_cqueue_3(PS2State *s, int b1, int b2, int b3) +{ + PS2Queue *q = &s->queue; + + q->rptr = (q->rptr - 3) & (PS2_BUFFER_SIZE - 1); + q->cwptr = q->rptr; + ps2_cqueue_data(q, b1); + ps2_cqueue_data(q, b2); + ps2_cqueue_data(q, b3); + ps2_raise_irq(s); +} + +static void ps2_cqueue_reset(PS2State *s) +{ + PS2Queue *q = &s->queue; + int ccount; + + if (q->cwptr == -1) { + return; + } + + ccount = (q->cwptr - q->rptr) & (PS2_BUFFER_SIZE - 1); + q->count -= ccount; + q->rptr = q->cwptr; + q->cwptr = -1; } /* keycode is the untranslated scancode in the current scancode set. */ @@ -293,7 +360,8 @@ static void ps2_keyboard_event(DeviceState *dev, QemuConsole *src, qcode = qemu_input_key_value_to_qcode(key->key); mod = ps2_modifier_bit(qcode); - trace_ps2_keyboard_event(s, qcode, key->down, mod, s->modifiers); + trace_ps2_keyboard_event(s, qcode, key->down, mod, + s->modifiers, s->scancode_set, s->translate); if (key->down) { s->modifiers |= mod; } else { @@ -504,18 +572,26 @@ uint32_t ps2_read_data(PS2State *s) (needed for EMM386) */ /* XXX: need a timer to do things correctly */ index = q->rptr - 1; - if (index < 0) - index = PS2_QUEUE_SIZE - 1; + if (index < 0) { + index = PS2_BUFFER_SIZE - 1; + } val = q->data[index]; } else { val = q->data[q->rptr]; - if (++q->rptr == PS2_QUEUE_SIZE) + if (++q->rptr == PS2_BUFFER_SIZE) { q->rptr = 0; + } q->count--; + if (q->rptr == q->cwptr) { + /* command reply queue is empty */ + q->cwptr = -1; + } /* reading deasserts IRQ */ s->update_irq(s->update_arg, 0); /* reassert IRQs if data left */ - s->update_irq(s->update_arg, q->count != 0); + if (q->count) { + s->update_irq(s->update_arg, 1); + } } return val; } @@ -541,92 +617,83 @@ void ps2_write_keyboard(void *opaque, int val) PS2KbdState *s = (PS2KbdState *)opaque; trace_ps2_write_keyboard(opaque, val); + ps2_cqueue_reset(&s->common); switch(s->common.write_cmd) { default: case -1: switch(val) { case 0x00: - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); break; case 0x05: - ps2_queue(&s->common, KBD_REPLY_RESEND); + ps2_cqueue_1(&s->common, KBD_REPLY_RESEND); break; case KBD_CMD_GET_ID: /* We emulate a MF2 AT keyboard here */ - if (s->translate) - ps2_queue_3(&s->common, - KBD_REPLY_ACK, - KBD_REPLY_ID, - 0x41); - else - ps2_queue_3(&s->common, - KBD_REPLY_ACK, - KBD_REPLY_ID, - 0x83); + ps2_cqueue_3(&s->common, KBD_REPLY_ACK, KBD_REPLY_ID, + s->translate ? 0x41 : 0x83); break; case KBD_CMD_ECHO: - ps2_queue(&s->common, KBD_CMD_ECHO); + ps2_cqueue_1(&s->common, KBD_CMD_ECHO); break; case KBD_CMD_ENABLE: s->scan_enabled = 1; - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); break; case KBD_CMD_SCANCODE: case KBD_CMD_SET_LEDS: case KBD_CMD_SET_RATE: case KBD_CMD_SET_MAKE_BREAK: s->common.write_cmd = val; - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); break; case KBD_CMD_RESET_DISABLE: ps2_reset_keyboard(s); s->scan_enabled = 0; - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); break; case KBD_CMD_RESET_ENABLE: ps2_reset_keyboard(s); s->scan_enabled = 1; - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); break; case KBD_CMD_RESET: ps2_reset_keyboard(s); - ps2_queue_2(&s->common, + ps2_cqueue_2(&s->common, KBD_REPLY_ACK, KBD_REPLY_POR); break; case KBD_CMD_SET_TYPEMATIC: - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); break; default: - ps2_queue(&s->common, KBD_REPLY_RESEND); + ps2_cqueue_1(&s->common, KBD_REPLY_RESEND); break; } break; case KBD_CMD_SET_MAKE_BREAK: - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); s->common.write_cmd = -1; break; case KBD_CMD_SCANCODE: if (val == 0) { - if (s->common.queue.count <= PS2_QUEUE_SIZE - 2) { - ps2_queue(&s->common, KBD_REPLY_ACK); - ps2_put_keycode(s, s->scancode_set); - } + ps2_cqueue_2(&s->common, KBD_REPLY_ACK, s->translate ? + translate_table[s->scancode_set] : s->scancode_set); } else if (val >= 1 && val <= 3) { s->scancode_set = val; - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); } else { - ps2_queue(&s->common, KBD_REPLY_RESEND); + ps2_cqueue_1(&s->common, KBD_REPLY_RESEND); } s->common.write_cmd = -1; break; case KBD_CMD_SET_LEDS: ps2_set_ledstate(s, val); - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); s->common.write_cmd = -1; break; case KBD_CMD_SET_RATE: - ps2_queue(&s->common, KBD_REPLY_ACK); + ps2_cqueue_1(&s->common, KBD_REPLY_ACK); s->common.write_cmd = -1; break; } @@ -645,7 +712,8 @@ void ps2_keyboard_set_translation(void *opaque, int mode) static int ps2_mouse_send_packet(PS2MouseState *s) { - const int needed = 3 + (s->mouse_type - 2); + /* IMPS/2 and IMEX send 4 bytes, PS2 sends 3 bytes */ + const int needed = s->mouse_type ? 4 : 3; unsigned int b; int dx1, dy1, dz1; @@ -918,30 +986,27 @@ static void ps2_common_reset(PS2State *s) static void ps2_common_post_load(PS2State *s) { PS2Queue *q = &s->queue; - uint8_t i, size; - uint8_t tmp_data[PS2_QUEUE_SIZE]; - - /* set the useful data buffer queue size, < PS2_QUEUE_SIZE */ - size = q->count; - if (q->count < 0) { - size = 0; - } else if (q->count > PS2_QUEUE_SIZE) { - size = PS2_QUEUE_SIZE; - } + int ccount = 0; - /* move the queue elements to the start of data array */ - for (i = 0; i < size; i++) { - if (q->rptr < 0 || q->rptr >= sizeof(q->data)) { - q->rptr = 0; + /* limit the number of queued command replies to PS2_QUEUE_HEADROOM */ + if (q->cwptr != -1) { + ccount = (q->cwptr - q->rptr) & (PS2_BUFFER_SIZE - 1); + if (ccount > PS2_QUEUE_HEADROOM) { + ccount = PS2_QUEUE_HEADROOM; } - tmp_data[i] = q->data[q->rptr++]; } - memcpy(q->data, tmp_data, size); - /* reset rptr/wptr/count */ - q->rptr = 0; - q->wptr = (size == PS2_QUEUE_SIZE) ? 0 : size; - q->count = size; + /* limit the scancode queue size to PS2_QUEUE_SIZE */ + if (q->count < ccount) { + q->count = ccount; + } else if (q->count > ccount + PS2_QUEUE_SIZE) { + q->count = ccount + PS2_QUEUE_SIZE; + } + + /* sanitize rptr and recalculate wptr and cwptr */ + q->rptr = q->rptr & (PS2_BUFFER_SIZE - 1); + q->wptr = (q->rptr + q->count) & (PS2_BUFFER_SIZE - 1); + q->cwptr = ccount ? (q->rptr + ccount) & (PS2_BUFFER_SIZE - 1) : -1; } static void ps2_kbd_reset(void *opaque) @@ -1032,6 +1097,22 @@ static const VMStateDescription vmstate_ps2_keyboard_need_high_bit = { } }; +static bool ps2_keyboard_cqueue_needed(void *opaque) +{ + PS2KbdState *s = opaque; + + return s->common.queue.cwptr != -1; /* the queue is mostly empty */ +} + +static const VMStateDescription vmstate_ps2_keyboard_cqueue = { + .name = "ps2kbd/command_reply_queue", + .needed = ps2_keyboard_cqueue_needed, + .fields = (VMStateField[]) { + VMSTATE_INT32(common.queue.cwptr, PS2KbdState), + VMSTATE_END_OF_LIST() + } +}; + static int ps2_kbd_post_load(void* opaque, int version_id) { PS2KbdState *s = (PS2KbdState*)opaque; @@ -1045,22 +1126,11 @@ static int ps2_kbd_post_load(void* opaque, int version_id) return 0; } -static int ps2_kbd_pre_save(void *opaque) -{ - PS2KbdState *s = (PS2KbdState *)opaque; - PS2State *ps2 = &s->common; - - ps2_common_post_load(ps2); - - return 0; -} - static const VMStateDescription vmstate_ps2_keyboard = { .name = "ps2kbd", .version_id = 3, .minimum_version_id = 2, .post_load = ps2_kbd_post_load, - .pre_save = ps2_kbd_pre_save, .fields = (VMStateField[]) { VMSTATE_STRUCT(common, PS2KbdState, 0, vmstate_ps2_common, PS2State), VMSTATE_INT32(scan_enabled, PS2KbdState), @@ -1071,6 +1141,7 @@ static const VMStateDescription vmstate_ps2_keyboard = { .subsections = (const VMStateDescription*[]) { &vmstate_ps2_keyboard_ledstate, &vmstate_ps2_keyboard_need_high_bit, + &vmstate_ps2_keyboard_cqueue, NULL } }; @@ -1085,22 +1156,11 @@ static int ps2_mouse_post_load(void *opaque, int version_id) return 0; } -static int ps2_mouse_pre_save(void *opaque) -{ - PS2MouseState *s = (PS2MouseState *)opaque; - PS2State *ps2 = &s->common; - - ps2_common_post_load(ps2); - - return 0; -} - static const VMStateDescription vmstate_ps2_mouse = { .name = "ps2mouse", .version_id = 2, .minimum_version_id = 2, .post_load = ps2_mouse_post_load, - .pre_save = ps2_mouse_pre_save, .fields = (VMStateField[]) { VMSTATE_STRUCT(common, PS2MouseState, 0, vmstate_ps2_common, PS2State), VMSTATE_UINT8(mouse_status, PS2MouseState), diff --git a/hw/input/trace-events b/hw/input/trace-events index 1dd8ad6018a..e0bfe7f3ee4 100644 --- a/hw/input/trace-events +++ b/hw/input/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # adb-kbd.c adb_device_kbd_no_key(void) "Ignoring NO_KEY" @@ -30,7 +30,7 @@ pckbd_kbd_write_data(uint64_t val) "0x%02"PRIx64 # ps2.c ps2_put_keycode(void *opaque, int keycode) "%p keycode 0x%02x" -ps2_keyboard_event(void *opaque, int qcode, int down, unsigned int modifier, unsigned int modifiers) "%p qcode %d down %d modifier 0x%x modifiers 0x%x" +ps2_keyboard_event(void *opaque, int qcode, int down, unsigned int modifier, unsigned int modifiers, int set, int xlate) "%p qcode %d down %d modifier 0x%x modifiers 0x%x set %d xlate %d" ps2_read_data(void *opaque) "%p" ps2_set_ledstate(void *s, int ledstate) "%p ledstate %d" ps2_reset_keyboard(void *s) "%p" @@ -44,13 +44,6 @@ ps2_mouse_reset(void *opaque) "%p" ps2_kbd_init(void *s) "%p" ps2_mouse_init(void *s) "%p" -# milkymist-softusb.c -milkymist_softusb_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_softusb_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_softusb_mevt(uint8_t m) "m %d" -milkymist_softusb_kevt(uint8_t m) "m %d" -milkymist_softusb_pulse_irq(void) "Pulse IRQ" - # hid.c hid_kbd_queue_full(void) "queue full" hid_kbd_queue_empty(void) "queue empty" diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c index 63984a8ba78..273e96a7b12 100644 --- a/hw/input/vhost-user-input.c +++ b/hw/input/vhost-user-input.c @@ -49,13 +49,15 @@ static void vhost_input_get_config(VirtIODevice *vdev, uint8_t *config_data) { VirtIOInput *vinput = VIRTIO_INPUT(vdev); VHostUserInput *vhi = VHOST_USER_INPUT(vdev); + Error *local_err = NULL; int ret; memset(config_data, 0, vinput->cfg_size); - ret = vhost_dev_get_config(&vhi->vhost->dev, config_data, vinput->cfg_size); + ret = vhost_dev_get_config(&vhi->vhost->dev, config_data, vinput->cfg_size, + &local_err); if (ret) { - error_report("vhost-user-input: get device config space failed"); + error_report_err(local_err); return; } } diff --git a/hw/input/virtio-input-host.c b/hw/input/virtio-input-host.c index 85daf73f1a8..137efba57b0 100644 --- a/hw/input/virtio-input-host.c +++ b/hw/input/virtio-input-host.c @@ -193,13 +193,16 @@ static void virtio_input_host_handle_status(VirtIOInput *vinput, { VirtIOInputHost *vih = VIRTIO_INPUT_HOST(vinput); struct input_event evdev; + struct timeval tval; int rc; - if (gettimeofday(&evdev.time, NULL)) { + if (gettimeofday(&tval, NULL)) { perror("virtio_input_host_handle_status: gettimeofday"); return; } + evdev.input_event_sec = tval.tv_sec; + evdev.input_event_usec = tval.tv_usec; evdev.type = le16_to_cpu(event->type); evdev.code = le16_to_cpu(event->code); evdev.value = le32_to_cpu(event->value); diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig index f4694088a48..78aed93c454 100644 --- a/hw/intc/Kconfig +++ b/hw/intc/Kconfig @@ -62,7 +62,7 @@ config RX_ICU config LOONGSON_LIOINTC bool -config SIFIVE_CLINT +config RISCV_ACLINT bool config SIFIVE_PLIC diff --git a/hw/intc/apic.c b/hw/intc/apic.c index f4f50f974e6..3df11c34d68 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -17,7 +17,6 @@ * License along with this library; if not, see */ #include "qemu/osdep.h" -#include "cpu.h" #include "qemu/thread.h" #include "hw/i386/apic_internal.h" #include "hw/i386/apic.h" diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 97dd96dffaa..2a20982066d 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -22,7 +22,6 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "qapi/error.h" -#include "cpu.h" #include "qapi/visitor.h" #include "hw/i386/apic.h" #include "hw/i386/apic_internal.h" diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 9494185cf46..7d2a13273a4 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -22,8 +22,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/module.h" -#include "cpu.h" -#include "hw/sysbus.h" #include "migration/blocker.h" #include "sysemu/kvm.h" #include "kvm_arm.h" diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c index 66eaa971982..9f5f815db9b 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c @@ -18,7 +18,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/module.h" -#include "hw/sysbus.h" #include "hw/intc/arm_gicv3.h" #include "gicv3_internal.h" @@ -166,6 +165,16 @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs) cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq); } + if ((cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) && cs->gic->lpi_enable && + (cs->hpplpi.prio != 0xff)) { + if (irqbetter(cs, cs->hpplpi.irq, cs->hpplpi.prio)) { + cs->hppi.irq = cs->hpplpi.irq; + cs->hppi.prio = cs->hpplpi.prio; + cs->hppi.grp = cs->hpplpi.grp; + seenbetter = true; + } + } + /* If the best interrupt we just found would preempt whatever * was the previous best interrupt before this update, then * we know it's definitely the best one now. @@ -177,7 +186,9 @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs) * interrupt has reduced in priority and any other interrupt could * now be the new best one). */ - if (!seenbetter && cs->hppi.prio != 0xff && cs->hppi.irq < GIC_INTERNAL) { + if (!seenbetter && cs->hppi.prio != 0xff && + (cs->hppi.irq < GIC_INTERNAL || + cs->hppi.irq >= GICV3_LPI_INTID_START)) { gicv3_full_update_noirqset(cs->gic); } } @@ -340,9 +351,13 @@ static void gicv3_set_irq(void *opaque, int irq, int level) static void arm_gicv3_post_load(GICv3State *s) { + int i; /* Recalculate our cached idea of the current highest priority * pending interrupt, but don't set IRQ or FIQ lines. */ + for (i = 0; i < s->num_cpu; i++) { + gicv3_redist_update_lpi_only(&s->cpu[i]); + } gicv3_full_update_noirqset(s); /* Repopulate the cache of GICv3CPUState pointers for target CPUs */ gicv3_cache_all_target_cpustates(s); @@ -374,17 +389,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) return; } - if (s->nb_redist_regions != 1) { - error_setg(errp, "VGICv3 redist region number(%d) not equal to 1", - s->nb_redist_regions); - return; - } - - gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops); gicv3_init_cpuif(s); } diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 58ef65f589e..9884d2e39b9 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -250,21 +250,11 @@ static const VMStateDescription vmstate_gicv3 = { }; void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, - const MemoryRegionOps *ops, Error **errp) + const MemoryRegionOps *ops) { SysBusDevice *sbd = SYS_BUS_DEVICE(s); - int rdist_capacity = 0; int i; - - for (i = 0; i < s->nb_redist_regions; i++) { - rdist_capacity += s->redist_region_count[i]; - } - if (rdist_capacity < s->num_cpu) { - error_setg(errp, "Capacity of the redist regions(%d) " - "is less than number of vcpus(%d)", - rdist_capacity, s->num_cpu); - return; - } + int cpuidx; /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU. * GPIO array layout is thus: @@ -293,14 +283,20 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, "gicv3_dist", 0x10000); sysbus_init_mmio(sbd, &s->iomem_dist); - s->iomem_redist = g_new0(MemoryRegion, s->nb_redist_regions); + s->redist_regions = g_new0(GICv3RedistRegion, s->nb_redist_regions); + cpuidx = 0; for (i = 0; i < s->nb_redist_regions; i++) { char *name = g_strdup_printf("gicv3_redist_region[%d]", i); + GICv3RedistRegion *region = &s->redist_regions[i]; - memory_region_init_io(&s->iomem_redist[i], OBJECT(s), - ops ? &ops[1] : NULL, s, name, + region->gic = s; + region->cpuidx = cpuidx; + cpuidx += s->redist_region_count[i]; + + memory_region_init_io(®ion->iomem, OBJECT(s), + ops ? &ops[1] : NULL, region, name, s->redist_region_count[i] * GICV3_REDIST_SIZE); - sysbus_init_mmio(sbd, &s->iomem_redist[i]); + sysbus_init_mmio(sbd, ®ion->iomem); g_free(name); } } @@ -308,7 +304,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) { GICv3State *s = ARM_GICV3_COMMON(dev); - int i; + int i, rdist_capacity, cpuidx; /* revision property is actually reserved and currently used only in order * to keep the interface compatible with GICv2 code, avoiding extra @@ -345,12 +341,27 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) return; } + if (s->lpi_enable && !s->dma) { + error_setg(errp, "Redist-ITS: Guest 'sysmem' reference link not set"); + return; + } + + rdist_capacity = 0; + for (i = 0; i < s->nb_redist_regions; i++) { + rdist_capacity += s->redist_region_count[i]; + } + if (rdist_capacity < s->num_cpu) { + error_setg(errp, "Capacity of the redist regions(%d) " + "is less than number of vcpus(%d)", + rdist_capacity, s->num_cpu); + return; + } + s->cpu = g_new0(GICv3CPUState, s->num_cpu); for (i = 0; i < s->num_cpu; i++) { CPUState *cpu = qemu_get_cpu(i); uint64_t cpu_affid; - int last; s->cpu[i].cpu = cpu; s->cpu[i].gic = s; @@ -370,7 +381,6 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) * PLPIS == 0 (physical LPIs not supported) */ cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); - last = (i == s->num_cpu - 1); /* The CPU mp-affinity property is in MPIDR register format; squash * the affinity bytes into 32 bits as the GICR_TYPER has them. @@ -379,8 +389,21 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) (cpu_affid & 0xFFFFFF); s->cpu[i].gicr_typer = (cpu_affid << 32) | (1 << 24) | - (i << 8) | - (last << 4); + (i << 8); + + if (s->lpi_enable) { + s->cpu[i].gicr_typer |= GICR_TYPER_PLPIS; + } + } + + /* + * Now go through and set GICR_TYPER.Last for the final + * redistributor in each region. + */ + cpuidx = 0; + for (i = 0; i < s->nb_redist_regions; i++) { + cpuidx += s->redist_region_count[i]; + s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST; } } @@ -426,6 +449,7 @@ static void arm_gicv3_common_reset(DeviceState *dev) memset(cs->gicr_ipriorityr, 0, sizeof(cs->gicr_ipriorityr)); cs->hppi.prio = 0xff; + cs->hpplpi.prio = 0xff; /* State in the CPU interface must *not* be reset here, because it * is part of the CPU's reset domain, not the GIC device's. @@ -494,9 +518,12 @@ static Property arm_gicv3_common_properties[] = { DEFINE_PROP_UINT32("num-cpu", GICv3State, num_cpu, 1), DEFINE_PROP_UINT32("num-irq", GICv3State, num_irq, 32), DEFINE_PROP_UINT32("revision", GICv3State, revision, 3), + DEFINE_PROP_BOOL("has-lpi", GICv3State, lpi_enable, 0), DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0), DEFINE_PROP_ARRAY("redist-region-count", GICv3State, nb_redist_regions, redist_region_count, qdev_prop_uint32, uint32_t), + DEFINE_PROP_LINK("sysmem", GICv3State, dma, TYPE_MEMORY_REGION, + MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index 43ef1d7a840..85fc369e550 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qemu/bitops.h" +#include "qemu/log.h" #include "qemu/main-loop.h" #include "trace.h" #include "gicv3_internal.h" @@ -350,7 +351,8 @@ static uint32_t maintenance_interrupt_state(GICv3CPUState *cs) /* Scan list registers and fill in the U, NP and EOI bits */ eoi_maintenance_interrupt_state(cs, &value); - if (cs->ich_hcr_el2 & (ICH_HCR_EL2_LRENPIE | ICH_HCR_EL2_EOICOUNT_MASK)) { + if ((cs->ich_hcr_el2 & ICH_HCR_EL2_LRENPIE) && + (cs->ich_hcr_el2 & ICH_HCR_EL2_EOICOUNT_MASK)) { value |= ICH_MISR_EL2_LRENP; } @@ -416,8 +418,9 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs) } } - if (cs->ich_hcr_el2 & ICH_HCR_EL2_EN) { - maintlevel = maintenance_interrupt_state(cs); + if ((cs->ich_hcr_el2 & ICH_HCR_EL2_EN) && + maintenance_interrupt_state(cs) != 0) { + maintlevel = 1; } trace_gicv3_cpuif_virt_set_irqs(gicv3_redist_affid(cs), fiqlevel, @@ -651,7 +654,7 @@ static uint64_t icv_iar_read(CPUARMState *env, const ARMCPRegInfo *ri) if (thisgrp == grp && icv_hppi_can_preempt(cs, lr)) { intid = ich_lr_vintid(lr); - if (intid < INTID_SECURE) { + if (!gicv3_intid_is_special(intid)) { icv_activate_irq(cs, idx, grp); } else { /* Interrupt goes from Pending to Invalid */ @@ -898,10 +901,12 @@ static void icc_activate_irq(GICv3CPUState *cs, int irq) cs->gicr_iactiver0 = deposit32(cs->gicr_iactiver0, irq, 1, 1); cs->gicr_ipendr0 = deposit32(cs->gicr_ipendr0, irq, 1, 0); gicv3_redist_update(cs); - } else { + } else if (irq < GICV3_LPI_INTID_START) { gicv3_gicd_active_set(cs->gic, irq); gicv3_gicd_pending_clear(cs->gic, irq); gicv3_update(cs->gic, irq, 1); + } else { + gicv3_redist_lpi_pending(cs, irq, 0); } } @@ -993,7 +998,7 @@ static uint64_t icc_iar0_read(CPUARMState *env, const ARMCPRegInfo *ri) intid = icc_hppir0_value(cs, env); } - if (!(intid >= INTID_SECURE && intid <= INTID_SPURIOUS)) { + if (!gicv3_intid_is_special(intid)) { icc_activate_irq(cs, intid); } @@ -1016,7 +1021,7 @@ static uint64_t icc_iar1_read(CPUARMState *env, const ARMCPRegInfo *ri) intid = icc_hppir1_value(cs, env); } - if (!(intid >= INTID_SECURE && intid <= INTID_SPURIOUS)) { + if (!gicv3_intid_is_special(intid)) { icc_activate_irq(cs, intid); } @@ -1226,7 +1231,7 @@ static void icv_dir_write(CPUARMState *env, const ARMCPRegInfo *ri, trace_gicv3_icv_dir_write(gicv3_redist_affid(cs), value); - if (irq >= cs->gic->num_irq) { + if (irq >= GICV3_MAXIRQ) { /* Also catches special interrupt numbers and LPIs */ return; } @@ -1261,8 +1266,7 @@ static void icv_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri, trace_gicv3_icv_eoir_write(ri->crm == 8 ? 0 : 1, gicv3_redist_affid(cs), value); - if (irq >= cs->gic->num_irq) { - /* Also catches special interrupt numbers and LPIs */ + if (gicv3_intid_is_special(irq)) { return; } @@ -1307,28 +1311,18 @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri, GICv3CPUState *cs = icc_cs_from_env(env); int irq = value & 0xffffff; int grp; + bool is_eoir0 = ri->crm == 8; - if (icv_access(env, ri->crm == 8 ? HCR_FMO : HCR_IMO)) { + if (icv_access(env, is_eoir0 ? HCR_FMO : HCR_IMO)) { icv_eoir_write(env, ri, value); return; } - trace_gicv3_icc_eoir_write(ri->crm == 8 ? 0 : 1, + trace_gicv3_icc_eoir_write(is_eoir0 ? 0 : 1, gicv3_redist_affid(cs), value); - if (ri->crm == 8) { - /* EOIR0 */ - grp = GICV3_G0; - } else { - /* EOIR1 */ - if (arm_is_secure(env)) { - grp = GICV3_G1; - } else { - grp = GICV3_G1NS; - } - } - - if (irq >= cs->gic->num_irq) { + if ((irq >= cs->gic->num_irq) && + !(cs->gic->lpi_enable && (irq >= GICV3_LPI_INTID_START))) { /* This handles two cases: * 1. If software writes the ID of a spurious interrupt [ie 1020-1023] * to the GICC_EOIR, the GIC ignores that write. @@ -1340,7 +1334,36 @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri, return; } - if (icc_highest_active_group(cs) != grp) { + grp = icc_highest_active_group(cs); + switch (grp) { + case GICV3_G0: + if (!is_eoir0) { + return; + } + if (!(cs->gic->gicd_ctlr & GICD_CTLR_DS) + && arm_feature(env, ARM_FEATURE_EL3) && !arm_is_secure(env)) { + return; + } + break; + case GICV3_G1: + if (is_eoir0) { + return; + } + if (!arm_is_secure(env)) { + return; + } + break; + case GICV3_G1NS: + if (is_eoir0) { + return; + } + if (!arm_is_el3_or_mon(env) && arm_is_secure(env)) { + return; + } + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: IRQ %d isn't active\n", __func__, irq); return; } diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c index b65f56f9035..4164500ea96 100644 --- a/hw/intc/arm_gicv3_dist.c +++ b/hw/intc/arm_gicv3_dist.c @@ -262,8 +262,21 @@ static void gicd_write_irouter(GICv3State *s, MemTxAttrs attrs, int irq, gicv3_update(s, irq, 1); } -static MemTxResult gicd_readb(GICv3State *s, hwaddr offset, - uint64_t *data, MemTxAttrs attrs) +/** + * gicd_readb + * gicd_readw + * gicd_readl + * gicd_readq + * gicd_writeb + * gicd_writew + * gicd_writel + * gicd_writeq + * + * Return %true if the operation succeeded, %false otherwise. + */ + +static bool gicd_readb(GICv3State *s, hwaddr offset, + uint64_t *data, MemTxAttrs attrs) { /* Most GICv3 distributor registers do not support byte accesses. */ switch (offset) { @@ -273,17 +286,17 @@ static MemTxResult gicd_readb(GICv3State *s, hwaddr offset, /* This GIC implementation always has affinity routing enabled, * so these registers are all RAZ/WI. */ - return MEMTX_OK; + return true; case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff: *data = gicd_read_ipriorityr(s, attrs, offset - GICD_IPRIORITYR); - return MEMTX_OK; + return true; default: - return MEMTX_ERROR; + return false; } } -static MemTxResult gicd_writeb(GICv3State *s, hwaddr offset, - uint64_t value, MemTxAttrs attrs) +static bool gicd_writeb(GICv3State *s, hwaddr offset, + uint64_t value, MemTxAttrs attrs) { /* Most GICv3 distributor registers do not support byte accesses. */ switch (offset) { @@ -293,25 +306,25 @@ static MemTxResult gicd_writeb(GICv3State *s, hwaddr offset, /* This GIC implementation always has affinity routing enabled, * so these registers are all RAZ/WI. */ - return MEMTX_OK; + return true; case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff: { int irq = offset - GICD_IPRIORITYR; if (irq < GIC_INTERNAL || irq >= s->num_irq) { - return MEMTX_OK; + return true; } gicd_write_ipriorityr(s, attrs, irq, value); gicv3_update(s, irq, 1); - return MEMTX_OK; + return true; } default: - return MEMTX_ERROR; + return false; } } -static MemTxResult gicd_readw(GICv3State *s, hwaddr offset, - uint64_t *data, MemTxAttrs attrs) +static bool gicd_readw(GICv3State *s, hwaddr offset, + uint64_t *data, MemTxAttrs attrs) { /* Only GICD_SETSPI_NSR, GICD_CLRSPI_NSR, GICD_SETSPI_SR and GICD_SETSPI_NSR * support 16 bit accesses, and those registers are all part of the @@ -319,11 +332,11 @@ static MemTxResult gicd_readw(GICv3State *s, hwaddr offset, * implement (ie for us GICD_TYPER.MBIS == 0), so for us they are * reserved. */ - return MEMTX_ERROR; + return false; } -static MemTxResult gicd_writew(GICv3State *s, hwaddr offset, - uint64_t value, MemTxAttrs attrs) +static bool gicd_writew(GICv3State *s, hwaddr offset, + uint64_t value, MemTxAttrs attrs) { /* Only GICD_SETSPI_NSR, GICD_CLRSPI_NSR, GICD_SETSPI_SR and GICD_SETSPI_NSR * support 16 bit accesses, and those registers are all part of the @@ -331,11 +344,11 @@ static MemTxResult gicd_writew(GICv3State *s, hwaddr offset, * implement (ie for us GICD_TYPER.MBIS == 0), so for us they are * reserved. */ - return MEMTX_ERROR; + return false; } -static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, - uint64_t *data, MemTxAttrs attrs) +static bool gicd_readl(GICv3State *s, hwaddr offset, + uint64_t *data, MemTxAttrs attrs) { /* Almost all GICv3 distributor registers are 32-bit. * Note that WO registers must return an UNKNOWN value on reads, @@ -363,7 +376,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, } else { *data = s->gicd_ctlr; } - return MEMTX_OK; + return true; case GICD_TYPER: { /* For this implementation: @@ -371,7 +384,9 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, * A3V == 1 (non-zero values of Affinity level 3 supported) * IDbits == 0xf (we support 16-bit interrupt identifiers) * DVIS == 0 (Direct virtual LPI injection not supported) - * LPIS == 0 (LPIs not supported) + * LPIS == 1 (LPIs are supported if affinity routing is enabled) + * num_LPIs == 0b00000 (bits [15:11],Number of LPIs as indicated + * by GICD_TYPER.IDbits) * MBIS == 0 (message-based SPIs not supported) * SecurityExtn == 1 if security extns supported * CPUNumber == 0 since for us ARE is always 1 @@ -386,62 +401,63 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, bool sec_extn = !(s->gicd_ctlr & GICD_CTLR_DS); *data = (1 << 25) | (1 << 24) | (sec_extn << 10) | + (s->lpi_enable << GICD_TYPER_LPIS_SHIFT) | (0xf << 19) | itlinesnumber; - return MEMTX_OK; + return true; } case GICD_IIDR: /* We claim to be an ARM r0p0 with a zero ProductID. * This is the same as an r0p0 GIC-500. */ *data = gicv3_iidr(); - return MEMTX_OK; + return true; case GICD_STATUSR: /* RAZ/WI for us (this is an optional register and our implementation * does not track RO/WO/reserved violations to report them to the guest) */ *data = 0; - return MEMTX_OK; + return true; case GICD_IGROUPR ... GICD_IGROUPR + 0x7f: { int irq; if (!attrs.secure && !(s->gicd_ctlr & GICD_CTLR_DS)) { *data = 0; - return MEMTX_OK; + return true; } /* RAZ/WI for SGIs, PPIs, unimplemented irqs */ irq = (offset - GICD_IGROUPR) * 8; if (irq < GIC_INTERNAL || irq >= s->num_irq) { *data = 0; - return MEMTX_OK; + return true; } *data = *gic_bmp_ptr32(s->group, irq); - return MEMTX_OK; + return true; } case GICD_ISENABLER ... GICD_ISENABLER + 0x7f: *data = gicd_read_bitmap_reg(s, attrs, s->enabled, NULL, offset - GICD_ISENABLER); - return MEMTX_OK; + return true; case GICD_ICENABLER ... GICD_ICENABLER + 0x7f: *data = gicd_read_bitmap_reg(s, attrs, s->enabled, NULL, offset - GICD_ICENABLER); - return MEMTX_OK; + return true; case GICD_ISPENDR ... GICD_ISPENDR + 0x7f: *data = gicd_read_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge1, offset - GICD_ISPENDR); - return MEMTX_OK; + return true; case GICD_ICPENDR ... GICD_ICPENDR + 0x7f: *data = gicd_read_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge2, offset - GICD_ICPENDR); - return MEMTX_OK; + return true; case GICD_ISACTIVER ... GICD_ISACTIVER + 0x7f: *data = gicd_read_bitmap_reg(s, attrs, s->active, mask_nsacr_ge2, offset - GICD_ISACTIVER); - return MEMTX_OK; + return true; case GICD_ICACTIVER ... GICD_ICACTIVER + 0x7f: *data = gicd_read_bitmap_reg(s, attrs, s->active, mask_nsacr_ge2, offset - GICD_ICACTIVER); - return MEMTX_OK; + return true; case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff: { int i, irq = offset - GICD_IPRIORITYR; @@ -452,12 +468,12 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, value |= gicd_read_ipriorityr(s, attrs, i); } *data = value; - return MEMTX_OK; + return true; } case GICD_ITARGETSR ... GICD_ITARGETSR + 0x3ff: /* RAZ/WI since affinity routing is always enabled */ *data = 0; - return MEMTX_OK; + return true; case GICD_ICFGR ... GICD_ICFGR + 0xff: { /* Here only the even bits are used; odd bits are RES0 */ @@ -466,7 +482,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, if (irq < GIC_INTERNAL || irq >= s->num_irq) { *data = 0; - return MEMTX_OK; + return true; } /* Since our edge_trigger bitmap is one bit per irq, we only need @@ -478,7 +494,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, value = extract32(value, (irq & 0x1f) ? 16 : 0, 16); value = half_shuffle32(value) << 1; *data = value; - return MEMTX_OK; + return true; } case GICD_IGRPMODR ... GICD_IGRPMODR + 0xff: { @@ -489,16 +505,16 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, * security enabled and this is an NS access */ *data = 0; - return MEMTX_OK; + return true; } /* RAZ/WI for SGIs, PPIs, unimplemented irqs */ irq = (offset - GICD_IGRPMODR) * 8; if (irq < GIC_INTERNAL || irq >= s->num_irq) { *data = 0; - return MEMTX_OK; + return true; } *data = *gic_bmp_ptr32(s->grpmod, irq); - return MEMTX_OK; + return true; } case GICD_NSACR ... GICD_NSACR + 0xff: { @@ -507,7 +523,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, if (irq < GIC_INTERNAL || irq >= s->num_irq) { *data = 0; - return MEMTX_OK; + return true; } if ((s->gicd_ctlr & GICD_CTLR_DS) || !attrs.secure) { @@ -515,17 +531,17 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, * security enabled and this is an NS access */ *data = 0; - return MEMTX_OK; + return true; } *data = s->gicd_nsacr[irq / 16]; - return MEMTX_OK; + return true; } case GICD_CPENDSGIR ... GICD_CPENDSGIR + 0xf: case GICD_SPENDSGIR ... GICD_SPENDSGIR + 0xf: /* RAZ/WI since affinity routing is always enabled */ *data = 0; - return MEMTX_OK; + return true; case GICD_IROUTER ... GICD_IROUTER + 0x1fdf: { uint64_t r; @@ -537,26 +553,26 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, } else { *data = (uint32_t)r; } - return MEMTX_OK; + return true; } case GICD_IDREGS ... GICD_IDREGS + 0x2f: /* ID registers */ *data = gicv3_idreg(offset - GICD_IDREGS); - return MEMTX_OK; + return true; case GICD_SGIR: /* WO registers, return unknown value */ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest read from WO register at offset " TARGET_FMT_plx "\n", __func__, offset); *data = 0; - return MEMTX_OK; + return true; default: - return MEMTX_ERROR; + return false; } } -static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, - uint64_t value, MemTxAttrs attrs) +static bool gicd_writel(GICv3State *s, hwaddr offset, + uint64_t value, MemTxAttrs attrs) { /* Almost all GICv3 distributor registers are 32-bit. Note that * RO registers must ignore writes, not abort. @@ -600,68 +616,68 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, s->gicd_ctlr &= ~(GICD_CTLR_EN_GRP1S | GICD_CTLR_ARE_NS); } gicv3_full_update(s); - return MEMTX_OK; + return true; } case GICD_STATUSR: /* RAZ/WI for our implementation */ - return MEMTX_OK; + return true; case GICD_IGROUPR ... GICD_IGROUPR + 0x7f: { int irq; if (!attrs.secure && !(s->gicd_ctlr & GICD_CTLR_DS)) { - return MEMTX_OK; + return true; } /* RAZ/WI for SGIs, PPIs, unimplemented irqs */ irq = (offset - GICD_IGROUPR) * 8; if (irq < GIC_INTERNAL || irq >= s->num_irq) { - return MEMTX_OK; + return true; } *gic_bmp_ptr32(s->group, irq) = value; gicv3_update(s, irq, 32); - return MEMTX_OK; + return true; } case GICD_ISENABLER ... GICD_ISENABLER + 0x7f: gicd_write_set_bitmap_reg(s, attrs, s->enabled, NULL, offset - GICD_ISENABLER, value); - return MEMTX_OK; + return true; case GICD_ICENABLER ... GICD_ICENABLER + 0x7f: gicd_write_clear_bitmap_reg(s, attrs, s->enabled, NULL, offset - GICD_ICENABLER, value); - return MEMTX_OK; + return true; case GICD_ISPENDR ... GICD_ISPENDR + 0x7f: gicd_write_set_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge1, offset - GICD_ISPENDR, value); - return MEMTX_OK; + return true; case GICD_ICPENDR ... GICD_ICPENDR + 0x7f: gicd_write_clear_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge2, offset - GICD_ICPENDR, value); - return MEMTX_OK; + return true; case GICD_ISACTIVER ... GICD_ISACTIVER + 0x7f: gicd_write_set_bitmap_reg(s, attrs, s->active, NULL, offset - GICD_ISACTIVER, value); - return MEMTX_OK; + return true; case GICD_ICACTIVER ... GICD_ICACTIVER + 0x7f: gicd_write_clear_bitmap_reg(s, attrs, s->active, NULL, offset - GICD_ICACTIVER, value); - return MEMTX_OK; + return true; case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff: { int i, irq = offset - GICD_IPRIORITYR; if (irq < GIC_INTERNAL || irq + 3 >= s->num_irq) { - return MEMTX_OK; + return true; } for (i = irq; i < irq + 4; i++, value >>= 8) { gicd_write_ipriorityr(s, attrs, i, value); } gicv3_update(s, irq, 4); - return MEMTX_OK; + return true; } case GICD_ITARGETSR ... GICD_ITARGETSR + 0x3ff: /* RAZ/WI since affinity routing is always enabled */ - return MEMTX_OK; + return true; case GICD_ICFGR ... GICD_ICFGR + 0xff: { /* Here only the odd bits are used; even bits are RES0 */ @@ -669,7 +685,7 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, uint32_t mask, oldval; if (irq < GIC_INTERNAL || irq >= s->num_irq) { - return MEMTX_OK; + return true; } /* Since our edge_trigger bitmap is one bit per irq, our input @@ -687,7 +703,7 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, oldval = *gic_bmp_ptr32(s->edge_trigger, (irq & ~0x1f)); value = (oldval & ~mask) | (value & mask); *gic_bmp_ptr32(s->edge_trigger, irq & ~0x1f) = value; - return MEMTX_OK; + return true; } case GICD_IGRPMODR ... GICD_IGRPMODR + 0xff: { @@ -697,16 +713,16 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, /* RAZ/WI if security disabled, or if * security enabled and this is an NS access */ - return MEMTX_OK; + return true; } /* RAZ/WI for SGIs, PPIs, unimplemented irqs */ irq = (offset - GICD_IGRPMODR) * 8; if (irq < GIC_INTERNAL || irq >= s->num_irq) { - return MEMTX_OK; + return true; } *gic_bmp_ptr32(s->grpmod, irq) = value; gicv3_update(s, irq, 32); - return MEMTX_OK; + return true; } case GICD_NSACR ... GICD_NSACR + 0xff: { @@ -714,41 +730,41 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, int irq = (offset - GICD_NSACR) * 4; if (irq < GIC_INTERNAL || irq >= s->num_irq) { - return MEMTX_OK; + return true; } if ((s->gicd_ctlr & GICD_CTLR_DS) || !attrs.secure) { /* RAZ/WI if security disabled, or if * security enabled and this is an NS access */ - return MEMTX_OK; + return true; } s->gicd_nsacr[irq / 16] = value; /* No update required as this only affects access permission checks */ - return MEMTX_OK; + return true; } case GICD_SGIR: /* RES0 if affinity routing is enabled */ - return MEMTX_OK; + return true; case GICD_CPENDSGIR ... GICD_CPENDSGIR + 0xf: case GICD_SPENDSGIR ... GICD_SPENDSGIR + 0xf: /* RAZ/WI since affinity routing is always enabled */ - return MEMTX_OK; + return true; case GICD_IROUTER ... GICD_IROUTER + 0x1fdf: { uint64_t r; int irq = (offset - GICD_IROUTER) / 8; if (irq < GIC_INTERNAL || irq >= s->num_irq) { - return MEMTX_OK; + return true; } /* Write half of the 64-bit register */ r = gicd_read_irouter(s, attrs, irq); r = deposit64(r, (offset & 7) ? 32 : 0, 32, value); gicd_write_irouter(s, attrs, irq, r); - return MEMTX_OK; + return true; } case GICD_IDREGS ... GICD_IDREGS + 0x2f: case GICD_TYPER: @@ -757,14 +773,14 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write to RO register at offset " TARGET_FMT_plx "\n", __func__, offset); - return MEMTX_OK; + return true; default: - return MEMTX_ERROR; + return false; } } -static MemTxResult gicd_writell(GICv3State *s, hwaddr offset, - uint64_t value, MemTxAttrs attrs) +static bool gicd_writeq(GICv3State *s, hwaddr offset, + uint64_t value, MemTxAttrs attrs) { /* Our only 64-bit registers are GICD_IROUTER */ int irq; @@ -773,14 +789,14 @@ static MemTxResult gicd_writell(GICv3State *s, hwaddr offset, case GICD_IROUTER ... GICD_IROUTER + 0x1fdf: irq = (offset - GICD_IROUTER) / 8; gicd_write_irouter(s, attrs, irq, value); - return MEMTX_OK; + return true; default: - return MEMTX_ERROR; + return false; } } -static MemTxResult gicd_readll(GICv3State *s, hwaddr offset, - uint64_t *data, MemTxAttrs attrs) +static bool gicd_readq(GICv3State *s, hwaddr offset, + uint64_t *data, MemTxAttrs attrs) { /* Our only 64-bit registers are GICD_IROUTER */ int irq; @@ -789,9 +805,9 @@ static MemTxResult gicd_readll(GICv3State *s, hwaddr offset, case GICD_IROUTER ... GICD_IROUTER + 0x1fdf: irq = (offset - GICD_IROUTER) / 8; *data = gicd_read_irouter(s, attrs, irq); - return MEMTX_OK; + return true; default: - return MEMTX_ERROR; + return false; } } @@ -799,7 +815,7 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data, unsigned size, MemTxAttrs attrs) { GICv3State *s = (GICv3State *)opaque; - MemTxResult r; + bool r; switch (size) { case 1: @@ -812,14 +828,14 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data, r = gicd_readl(s, offset, data, attrs); break; case 8: - r = gicd_readll(s, offset, data, attrs); + r = gicd_readq(s, offset, data, attrs); break; default: - r = MEMTX_ERROR; + r = false; break; } - if (r == MEMTX_ERROR) { + if (!r) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest read at offset " TARGET_FMT_plx "size %u\n", __func__, offset, size); @@ -829,19 +845,18 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data, * trigger the guest-error logging but don't return it to * the caller, or we'll cause a spurious guest data abort. */ - r = MEMTX_OK; *data = 0; } else { trace_gicv3_dist_read(offset, *data, size, attrs.secure); } - return r; + return MEMTX_OK; } MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data, unsigned size, MemTxAttrs attrs) { GICv3State *s = (GICv3State *)opaque; - MemTxResult r; + bool r; switch (size) { case 1: @@ -854,14 +869,14 @@ MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data, r = gicd_writel(s, offset, data, attrs); break; case 8: - r = gicd_writell(s, offset, data, attrs); + r = gicd_writeq(s, offset, data, attrs); break; default: - r = MEMTX_ERROR; + r = false; break; } - if (r == MEMTX_ERROR) { + if (!r) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write at offset " TARGET_FMT_plx "size %u\n", __func__, offset, size); @@ -871,11 +886,10 @@ MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data, * trigger the guest-error logging but don't return it to * the caller, or we'll cause a spurious guest data abort. */ - r = MEMTX_OK; } else { trace_gicv3_dist_write(offset, data, size, attrs.secure); } - return r; + return MEMTX_OK; } void gicv3_dist_set_irq(GICv3State *s, int irq, int level) diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c new file mode 100644 index 00000000000..c929a9cb5c3 --- /dev/null +++ b/hw/intc/arm_gicv3_its.c @@ -0,0 +1,1323 @@ +/* + * ITS emulation for a GICv3-based system + * + * Copyright Linaro.org 2021 + * + * Authors: + * Shashi Mallela + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/qdev-properties.h" +#include "hw/intc/arm_gicv3_its_common.h" +#include "gicv3_internal.h" +#include "qom/object.h" +#include "qapi/error.h" + +typedef struct GICv3ITSClass GICv3ITSClass; +/* This is reusing the GICv3ITSState typedef from ARM_GICV3_ITS_COMMON */ +DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSClass, + ARM_GICV3_ITS, TYPE_ARM_GICV3_ITS) + +struct GICv3ITSClass { + GICv3ITSCommonClass parent_class; + void (*parent_reset)(DeviceState *dev); +}; + +/* + * This is an internal enum used to distinguish between LPI triggered + * via command queue and LPI triggered via gits_translater write. + */ +typedef enum ItsCmdType { + NONE = 0, /* internal indication for GITS_TRANSLATER write */ + CLEAR = 1, + DISCARD = 2, + INTERRUPT = 3, +} ItsCmdType; + +typedef struct { + uint32_t iteh; + uint64_t itel; +} IteEntry; + +static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz) +{ + uint64_t result = 0; + + switch (page_sz) { + case GITS_PAGE_SIZE_4K: + case GITS_PAGE_SIZE_16K: + result = FIELD_EX64(value, GITS_BASER, PHYADDR) << 12; + break; + + case GITS_PAGE_SIZE_64K: + result = FIELD_EX64(value, GITS_BASER, PHYADDRL_64K) << 16; + result |= FIELD_EX64(value, GITS_BASER, PHYADDRH_64K) << 48; + break; + + default: + break; + } + return result; +} + +static bool get_cte(GICv3ITSState *s, uint16_t icid, uint64_t *cte, + MemTxResult *res) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint64_t l2t_addr; + uint64_t value; + bool valid_l2t; + uint32_t l2t_id; + uint32_t max_l2_entries; + + if (s->ct.indirect) { + l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE); + + value = address_space_ldq_le(as, + s->ct.base_addr + + (l2t_id * L1TABLE_ENTRY_SIZE), + MEMTXATTRS_UNSPECIFIED, res); + + if (*res == MEMTX_OK) { + valid_l2t = (value & L2_TABLE_VALID_MASK) != 0; + + if (valid_l2t) { + max_l2_entries = s->ct.page_sz / s->ct.entry_sz; + + l2t_addr = value & ((1ULL << 51) - 1); + + *cte = address_space_ldq_le(as, l2t_addr + + ((icid % max_l2_entries) * GITS_CTE_SIZE), + MEMTXATTRS_UNSPECIFIED, res); + } + } + } else { + /* Flat level table */ + *cte = address_space_ldq_le(as, s->ct.base_addr + + (icid * GITS_CTE_SIZE), + MEMTXATTRS_UNSPECIFIED, res); + } + + return (*cte & TABLE_ENTRY_VALID_MASK) != 0; +} + +static bool update_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte, + IteEntry ite) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint64_t itt_addr; + MemTxResult res = MEMTX_OK; + + itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT; + itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */ + + address_space_stq_le(as, itt_addr + (eventid * (sizeof(uint64_t) + + sizeof(uint32_t))), ite.itel, MEMTXATTRS_UNSPECIFIED, + &res); + + if (res == MEMTX_OK) { + address_space_stl_le(as, itt_addr + (eventid * (sizeof(uint64_t) + + sizeof(uint32_t))) + sizeof(uint32_t), ite.iteh, + MEMTXATTRS_UNSPECIFIED, &res); + } + if (res != MEMTX_OK) { + return false; + } else { + return true; + } +} + +static bool get_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte, + uint16_t *icid, uint32_t *pIntid, MemTxResult *res) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint64_t itt_addr; + bool status = false; + IteEntry ite = {}; + + itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT; + itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */ + + ite.itel = address_space_ldq_le(as, itt_addr + + (eventid * (sizeof(uint64_t) + + sizeof(uint32_t))), MEMTXATTRS_UNSPECIFIED, + res); + + if (*res == MEMTX_OK) { + ite.iteh = address_space_ldl_le(as, itt_addr + + (eventid * (sizeof(uint64_t) + + sizeof(uint32_t))) + sizeof(uint32_t), + MEMTXATTRS_UNSPECIFIED, res); + + if (*res == MEMTX_OK) { + if (ite.itel & TABLE_ENTRY_VALID_MASK) { + if ((ite.itel >> ITE_ENTRY_INTTYPE_SHIFT) & + GITS_TYPE_PHYSICAL) { + *pIntid = (ite.itel & ITE_ENTRY_INTID_MASK) >> + ITE_ENTRY_INTID_SHIFT; + *icid = ite.iteh & ITE_ENTRY_ICID_MASK; + status = true; + } + } + } + } + return status; +} + +static uint64_t get_dte(GICv3ITSState *s, uint32_t devid, MemTxResult *res) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint64_t l2t_addr; + uint64_t value; + bool valid_l2t; + uint32_t l2t_id; + uint32_t max_l2_entries; + + if (s->dt.indirect) { + l2t_id = devid / (s->dt.page_sz / L1TABLE_ENTRY_SIZE); + + value = address_space_ldq_le(as, + s->dt.base_addr + + (l2t_id * L1TABLE_ENTRY_SIZE), + MEMTXATTRS_UNSPECIFIED, res); + + if (*res == MEMTX_OK) { + valid_l2t = (value & L2_TABLE_VALID_MASK) != 0; + + if (valid_l2t) { + max_l2_entries = s->dt.page_sz / s->dt.entry_sz; + + l2t_addr = value & ((1ULL << 51) - 1); + + value = address_space_ldq_le(as, l2t_addr + + ((devid % max_l2_entries) * GITS_DTE_SIZE), + MEMTXATTRS_UNSPECIFIED, res); + } + } + } else { + /* Flat level table */ + value = address_space_ldq_le(as, s->dt.base_addr + + (devid * GITS_DTE_SIZE), + MEMTXATTRS_UNSPECIFIED, res); + } + + return value; +} + +/* + * This function handles the processing of following commands based on + * the ItsCmdType parameter passed:- + * 1. triggering of lpi interrupt translation via ITS INT command + * 2. triggering of lpi interrupt translation via gits_translater register + * 3. handling of ITS CLEAR command + * 4. handling of ITS DISCARD command + */ +static bool process_its_cmd(GICv3ITSState *s, uint64_t value, uint32_t offset, + ItsCmdType cmd) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint32_t devid, eventid; + MemTxResult res = MEMTX_OK; + bool dte_valid; + uint64_t dte = 0; + uint32_t max_eventid; + uint16_t icid = 0; + uint32_t pIntid = 0; + bool ite_valid = false; + uint64_t cte = 0; + bool cte_valid = false; + bool result = false; + uint64_t rdbase; + + if (cmd == NONE) { + devid = offset; + } else { + devid = ((value & DEVID_MASK) >> DEVID_SHIFT); + + offset += NUM_BYTES_IN_DW; + value = address_space_ldq_le(as, s->cq.base_addr + offset, + MEMTXATTRS_UNSPECIFIED, &res); + } + + if (res != MEMTX_OK) { + return result; + } + + eventid = (value & EVENTID_MASK); + + dte = get_dte(s, devid, &res); + + if (res != MEMTX_OK) { + return result; + } + dte_valid = dte & TABLE_ENTRY_VALID_MASK; + + if (dte_valid) { + max_eventid = (1UL << (((dte >> 1U) & SIZE_MASK) + 1)); + + ite_valid = get_ite(s, eventid, dte, &icid, &pIntid, &res); + + if (res != MEMTX_OK) { + return result; + } + + if (ite_valid) { + cte_valid = get_cte(s, icid, &cte, &res); + } + + if (res != MEMTX_OK) { + return result; + } + } + + if ((devid > s->dt.maxids.max_devids) || !dte_valid || !ite_valid || + !cte_valid || (eventid > max_eventid)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid command attributes " + "devid %d or eventid %d or invalid dte %d or" + "invalid cte %d or invalid ite %d\n", + __func__, devid, eventid, dte_valid, cte_valid, + ite_valid); + /* + * in this implementation, in case of error + * we ignore this command and move onto the next + * command in the queue + */ + } else { + /* + * Current implementation only supports rdbase == procnum + * Hence rdbase physical address is ignored + */ + rdbase = (cte & GITS_CTE_RDBASE_PROCNUM_MASK) >> 1U; + + if (rdbase > s->gicv3->num_cpu) { + return result; + } + + if ((cmd == CLEAR) || (cmd == DISCARD)) { + gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 0); + } else { + gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 1); + } + + if (cmd == DISCARD) { + IteEntry ite = {}; + /* remove mapping from interrupt translation table */ + result = update_ite(s, eventid, dte, ite); + } + } + + return result; +} + +static bool process_mapti(GICv3ITSState *s, uint64_t value, uint32_t offset, + bool ignore_pInt) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint32_t devid, eventid; + uint32_t pIntid = 0; + uint32_t max_eventid, max_Intid; + bool dte_valid; + MemTxResult res = MEMTX_OK; + uint16_t icid = 0; + uint64_t dte = 0; + IteEntry ite; + uint32_t int_spurious = INTID_SPURIOUS; + bool result = false; + + devid = ((value & DEVID_MASK) >> DEVID_SHIFT); + offset += NUM_BYTES_IN_DW; + value = address_space_ldq_le(as, s->cq.base_addr + offset, + MEMTXATTRS_UNSPECIFIED, &res); + + if (res != MEMTX_OK) { + return result; + } + + eventid = (value & EVENTID_MASK); + + if (!ignore_pInt) { + pIntid = ((value & pINTID_MASK) >> pINTID_SHIFT); + } + + offset += NUM_BYTES_IN_DW; + value = address_space_ldq_le(as, s->cq.base_addr + offset, + MEMTXATTRS_UNSPECIFIED, &res); + + if (res != MEMTX_OK) { + return result; + } + + icid = value & ICID_MASK; + + dte = get_dte(s, devid, &res); + + if (res != MEMTX_OK) { + return result; + } + dte_valid = dte & TABLE_ENTRY_VALID_MASK; + + max_eventid = (1UL << (((dte >> 1U) & SIZE_MASK) + 1)); + + if (!ignore_pInt) { + max_Intid = (1ULL << (GICD_TYPER_IDBITS + 1)) - 1; + } + + if ((devid > s->dt.maxids.max_devids) || (icid > s->ct.maxids.max_collids) + || !dte_valid || (eventid > max_eventid) || + (!ignore_pInt && (((pIntid < GICV3_LPI_INTID_START) || + (pIntid > max_Intid)) && (pIntid != INTID_SPURIOUS)))) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid command attributes " + "devid %d or icid %d or eventid %d or pIntid %d or" + "unmapped dte %d\n", __func__, devid, icid, eventid, + pIntid, dte_valid); + /* + * in this implementation, in case of error + * we ignore this command and move onto the next + * command in the queue + */ + } else { + /* add ite entry to interrupt translation table */ + ite.itel = (dte_valid & TABLE_ENTRY_VALID_MASK) | + (GITS_TYPE_PHYSICAL << ITE_ENTRY_INTTYPE_SHIFT); + + if (ignore_pInt) { + ite.itel |= (eventid << ITE_ENTRY_INTID_SHIFT); + } else { + ite.itel |= (pIntid << ITE_ENTRY_INTID_SHIFT); + } + ite.itel |= (int_spurious << ITE_ENTRY_INTSP_SHIFT); + ite.iteh = icid; + + result = update_ite(s, eventid, dte, ite); + } + + return result; +} + +static bool update_cte(GICv3ITSState *s, uint16_t icid, bool valid, + uint64_t rdbase) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint64_t value; + uint64_t l2t_addr; + bool valid_l2t; + uint32_t l2t_id; + uint32_t max_l2_entries; + uint64_t cte = 0; + MemTxResult res = MEMTX_OK; + + if (!s->ct.valid) { + return true; + } + + if (valid) { + /* add mapping entry to collection table */ + cte = (valid & TABLE_ENTRY_VALID_MASK) | (rdbase << 1ULL); + } + + /* + * The specification defines the format of level 1 entries of a + * 2-level table, but the format of level 2 entries and the format + * of flat-mapped tables is IMPDEF. + */ + if (s->ct.indirect) { + l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE); + + value = address_space_ldq_le(as, + s->ct.base_addr + + (l2t_id * L1TABLE_ENTRY_SIZE), + MEMTXATTRS_UNSPECIFIED, &res); + + if (res != MEMTX_OK) { + return false; + } + + valid_l2t = (value & L2_TABLE_VALID_MASK) != 0; + + if (valid_l2t) { + max_l2_entries = s->ct.page_sz / s->ct.entry_sz; + + l2t_addr = value & ((1ULL << 51) - 1); + + address_space_stq_le(as, l2t_addr + + ((icid % max_l2_entries) * GITS_CTE_SIZE), + cte, MEMTXATTRS_UNSPECIFIED, &res); + } + } else { + /* Flat level table */ + address_space_stq_le(as, s->ct.base_addr + (icid * GITS_CTE_SIZE), + cte, MEMTXATTRS_UNSPECIFIED, &res); + } + if (res != MEMTX_OK) { + return false; + } else { + return true; + } +} + +static bool process_mapc(GICv3ITSState *s, uint32_t offset) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint16_t icid; + uint64_t rdbase; + bool valid; + MemTxResult res = MEMTX_OK; + bool result = false; + uint64_t value; + + offset += NUM_BYTES_IN_DW; + offset += NUM_BYTES_IN_DW; + + value = address_space_ldq_le(as, s->cq.base_addr + offset, + MEMTXATTRS_UNSPECIFIED, &res); + + if (res != MEMTX_OK) { + return result; + } + + icid = value & ICID_MASK; + + rdbase = (value & R_MAPC_RDBASE_MASK) >> R_MAPC_RDBASE_SHIFT; + rdbase &= RDBASE_PROCNUM_MASK; + + valid = (value & CMD_FIELD_VALID_MASK); + + if ((icid > s->ct.maxids.max_collids) || (rdbase > s->gicv3->num_cpu)) { + qemu_log_mask(LOG_GUEST_ERROR, + "ITS MAPC: invalid collection table attributes " + "icid %d rdbase %" PRIu64 "\n", icid, rdbase); + /* + * in this implementation, in case of error + * we ignore this command and move onto the next + * command in the queue + */ + } else { + result = update_cte(s, icid, valid, rdbase); + } + + return result; +} + +static bool update_dte(GICv3ITSState *s, uint32_t devid, bool valid, + uint8_t size, uint64_t itt_addr) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint64_t value; + uint64_t l2t_addr; + bool valid_l2t; + uint32_t l2t_id; + uint32_t max_l2_entries; + uint64_t dte = 0; + MemTxResult res = MEMTX_OK; + + if (s->dt.valid) { + if (valid) { + /* add mapping entry to device table */ + dte = (valid & TABLE_ENTRY_VALID_MASK) | + ((size & SIZE_MASK) << 1U) | + (itt_addr << GITS_DTE_ITTADDR_SHIFT); + } + } else { + return true; + } + + /* + * The specification defines the format of level 1 entries of a + * 2-level table, but the format of level 2 entries and the format + * of flat-mapped tables is IMPDEF. + */ + if (s->dt.indirect) { + l2t_id = devid / (s->dt.page_sz / L1TABLE_ENTRY_SIZE); + + value = address_space_ldq_le(as, + s->dt.base_addr + + (l2t_id * L1TABLE_ENTRY_SIZE), + MEMTXATTRS_UNSPECIFIED, &res); + + if (res != MEMTX_OK) { + return false; + } + + valid_l2t = (value & L2_TABLE_VALID_MASK) != 0; + + if (valid_l2t) { + max_l2_entries = s->dt.page_sz / s->dt.entry_sz; + + l2t_addr = value & ((1ULL << 51) - 1); + + address_space_stq_le(as, l2t_addr + + ((devid % max_l2_entries) * GITS_DTE_SIZE), + dte, MEMTXATTRS_UNSPECIFIED, &res); + } + } else { + /* Flat level table */ + address_space_stq_le(as, s->dt.base_addr + (devid * GITS_DTE_SIZE), + dte, MEMTXATTRS_UNSPECIFIED, &res); + } + if (res != MEMTX_OK) { + return false; + } else { + return true; + } +} + +static bool process_mapd(GICv3ITSState *s, uint64_t value, uint32_t offset) +{ + AddressSpace *as = &s->gicv3->dma_as; + uint32_t devid; + uint8_t size; + uint64_t itt_addr; + bool valid; + MemTxResult res = MEMTX_OK; + bool result = false; + + devid = ((value & DEVID_MASK) >> DEVID_SHIFT); + + offset += NUM_BYTES_IN_DW; + value = address_space_ldq_le(as, s->cq.base_addr + offset, + MEMTXATTRS_UNSPECIFIED, &res); + + if (res != MEMTX_OK) { + return result; + } + + size = (value & SIZE_MASK); + + offset += NUM_BYTES_IN_DW; + value = address_space_ldq_le(as, s->cq.base_addr + offset, + MEMTXATTRS_UNSPECIFIED, &res); + + if (res != MEMTX_OK) { + return result; + } + + itt_addr = (value & ITTADDR_MASK) >> ITTADDR_SHIFT; + + valid = (value & CMD_FIELD_VALID_MASK); + + if ((devid > s->dt.maxids.max_devids) || + (size > FIELD_EX64(s->typer, GITS_TYPER, IDBITS))) { + qemu_log_mask(LOG_GUEST_ERROR, + "ITS MAPD: invalid device table attributes " + "devid %d or size %d\n", devid, size); + /* + * in this implementation, in case of error + * we ignore this command and move onto the next + * command in the queue + */ + } else { + result = update_dte(s, devid, valid, size, itt_addr); + } + + return result; +} + +/* + * Current implementation blocks until all + * commands are processed + */ +static void process_cmdq(GICv3ITSState *s) +{ + uint32_t wr_offset = 0; + uint32_t rd_offset = 0; + uint32_t cq_offset = 0; + uint64_t data; + AddressSpace *as = &s->gicv3->dma_as; + MemTxResult res = MEMTX_OK; + bool result = true; + uint8_t cmd; + int i; + + if (!(s->ctlr & ITS_CTLR_ENABLED)) { + return; + } + + wr_offset = FIELD_EX64(s->cwriter, GITS_CWRITER, OFFSET); + + if (wr_offset > s->cq.max_entries) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid write offset " + "%d\n", __func__, wr_offset); + return; + } + + rd_offset = FIELD_EX64(s->creadr, GITS_CREADR, OFFSET); + + if (rd_offset > s->cq.max_entries) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid read offset " + "%d\n", __func__, rd_offset); + return; + } + + while (wr_offset != rd_offset) { + cq_offset = (rd_offset * GITS_CMDQ_ENTRY_SIZE); + data = address_space_ldq_le(as, s->cq.base_addr + cq_offset, + MEMTXATTRS_UNSPECIFIED, &res); + if (res != MEMTX_OK) { + result = false; + } + cmd = (data & CMD_MASK); + + switch (cmd) { + case GITS_CMD_INT: + res = process_its_cmd(s, data, cq_offset, INTERRUPT); + break; + case GITS_CMD_CLEAR: + res = process_its_cmd(s, data, cq_offset, CLEAR); + break; + case GITS_CMD_SYNC: + /* + * Current implementation makes a blocking synchronous call + * for every command issued earlier, hence the internal state + * is already consistent by the time SYNC command is executed. + * Hence no further processing is required for SYNC command. + */ + break; + case GITS_CMD_MAPD: + result = process_mapd(s, data, cq_offset); + break; + case GITS_CMD_MAPC: + result = process_mapc(s, cq_offset); + break; + case GITS_CMD_MAPTI: + result = process_mapti(s, data, cq_offset, false); + break; + case GITS_CMD_MAPI: + result = process_mapti(s, data, cq_offset, true); + break; + case GITS_CMD_DISCARD: + result = process_its_cmd(s, data, cq_offset, DISCARD); + break; + case GITS_CMD_INV: + case GITS_CMD_INVALL: + /* + * Current implementation doesn't cache any ITS tables, + * but the calculated lpi priority information. We only + * need to trigger lpi priority re-calculation to be in + * sync with LPI config table or pending table changes. + */ + for (i = 0; i < s->gicv3->num_cpu; i++) { + gicv3_redist_update_lpi(&s->gicv3->cpu[i]); + } + break; + default: + break; + } + if (result) { + rd_offset++; + rd_offset %= s->cq.max_entries; + s->creadr = FIELD_DP64(s->creadr, GITS_CREADR, OFFSET, rd_offset); + } else { + /* + * in this implementation, in case of dma read/write error + * we stall the command processing + */ + s->creadr = FIELD_DP64(s->creadr, GITS_CREADR, STALLED, 1); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: %x cmd processing failed\n", __func__, cmd); + break; + } + } +} + +/* + * This function extracts the ITS Device and Collection table specific + * parameters (like base_addr, size etc) from GITS_BASER register. + * It is called during ITS enable and also during post_load migration + */ +static void extract_table_params(GICv3ITSState *s) +{ + uint16_t num_pages = 0; + uint8_t page_sz_type; + uint8_t type; + uint32_t page_sz = 0; + uint64_t value; + + for (int i = 0; i < 8; i++) { + value = s->baser[i]; + + if (!value) { + continue; + } + + page_sz_type = FIELD_EX64(value, GITS_BASER, PAGESIZE); + + switch (page_sz_type) { + case 0: + page_sz = GITS_PAGE_SIZE_4K; + break; + + case 1: + page_sz = GITS_PAGE_SIZE_16K; + break; + + case 2: + case 3: + page_sz = GITS_PAGE_SIZE_64K; + break; + + default: + g_assert_not_reached(); + } + + num_pages = FIELD_EX64(value, GITS_BASER, SIZE) + 1; + + type = FIELD_EX64(value, GITS_BASER, TYPE); + + switch (type) { + + case GITS_BASER_TYPE_DEVICE: + memset(&s->dt, 0 , sizeof(s->dt)); + s->dt.valid = FIELD_EX64(value, GITS_BASER, VALID); + + if (!s->dt.valid) { + return; + } + + s->dt.page_sz = page_sz; + s->dt.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT); + s->dt.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE); + + if (!s->dt.indirect) { + s->dt.max_entries = (num_pages * page_sz) / s->dt.entry_sz; + } else { + s->dt.max_entries = (((num_pages * page_sz) / + L1TABLE_ENTRY_SIZE) * + (page_sz / s->dt.entry_sz)); + } + + s->dt.maxids.max_devids = (1UL << (FIELD_EX64(s->typer, GITS_TYPER, + DEVBITS) + 1)); + + s->dt.base_addr = baser_base_addr(value, page_sz); + + break; + + case GITS_BASER_TYPE_COLLECTION: + memset(&s->ct, 0 , sizeof(s->ct)); + s->ct.valid = FIELD_EX64(value, GITS_BASER, VALID); + + /* + * GITS_TYPER.HCC is 0 for this implementation + * hence writes are discarded if ct.valid is 0 + */ + if (!s->ct.valid) { + return; + } + + s->ct.page_sz = page_sz; + s->ct.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT); + s->ct.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE); + + if (!s->ct.indirect) { + s->ct.max_entries = (num_pages * page_sz) / s->ct.entry_sz; + } else { + s->ct.max_entries = (((num_pages * page_sz) / + L1TABLE_ENTRY_SIZE) * + (page_sz / s->ct.entry_sz)); + } + + if (FIELD_EX64(s->typer, GITS_TYPER, CIL)) { + s->ct.maxids.max_collids = (1UL << (FIELD_EX64(s->typer, + GITS_TYPER, CIDBITS) + 1)); + } else { + /* 16-bit CollectionId supported when CIL == 0 */ + s->ct.maxids.max_collids = (1UL << 16); + } + + s->ct.base_addr = baser_base_addr(value, page_sz); + + break; + + default: + break; + } + } +} + +static void extract_cmdq_params(GICv3ITSState *s) +{ + uint16_t num_pages = 0; + uint64_t value = s->cbaser; + + num_pages = FIELD_EX64(value, GITS_CBASER, SIZE) + 1; + + memset(&s->cq, 0 , sizeof(s->cq)); + s->cq.valid = FIELD_EX64(value, GITS_CBASER, VALID); + + if (s->cq.valid) { + s->cq.max_entries = (num_pages * GITS_PAGE_SIZE_4K) / + GITS_CMDQ_ENTRY_SIZE; + s->cq.base_addr = FIELD_EX64(value, GITS_CBASER, PHYADDR); + s->cq.base_addr <<= R_GITS_CBASER_PHYADDR_SHIFT; + } +} + +static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset, + uint64_t data, unsigned size, + MemTxAttrs attrs) +{ + GICv3ITSState *s = (GICv3ITSState *)opaque; + bool result = true; + uint32_t devid = 0; + + switch (offset) { + case GITS_TRANSLATER: + if (s->ctlr & ITS_CTLR_ENABLED) { + devid = attrs.requester_id; + result = process_its_cmd(s, data, devid, NONE); + } + break; + default: + break; + } + + if (result) { + return MEMTX_OK; + } else { + return MEMTX_ERROR; + } +} + +static bool its_writel(GICv3ITSState *s, hwaddr offset, + uint64_t value, MemTxAttrs attrs) +{ + bool result = true; + int index; + + switch (offset) { + case GITS_CTLR: + if (value & R_GITS_CTLR_ENABLED_MASK) { + s->ctlr |= ITS_CTLR_ENABLED; + extract_table_params(s); + extract_cmdq_params(s); + s->creadr = 0; + process_cmdq(s); + } else { + s->ctlr &= ~ITS_CTLR_ENABLED; + } + break; + case GITS_CBASER: + /* + * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is + * already enabled + */ + if (!(s->ctlr & ITS_CTLR_ENABLED)) { + s->cbaser = deposit64(s->cbaser, 0, 32, value); + s->creadr = 0; + s->cwriter = s->creadr; + } + break; + case GITS_CBASER + 4: + /* + * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is + * already enabled + */ + if (!(s->ctlr & ITS_CTLR_ENABLED)) { + s->cbaser = deposit64(s->cbaser, 32, 32, value); + s->creadr = 0; + s->cwriter = s->creadr; + } + break; + case GITS_CWRITER: + s->cwriter = deposit64(s->cwriter, 0, 32, + (value & ~R_GITS_CWRITER_RETRY_MASK)); + if (s->cwriter != s->creadr) { + process_cmdq(s); + } + break; + case GITS_CWRITER + 4: + s->cwriter = deposit64(s->cwriter, 32, 32, value); + break; + case GITS_CREADR: + if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) { + s->creadr = deposit64(s->creadr, 0, 32, + (value & ~R_GITS_CREADR_STALLED_MASK)); + } else { + /* RO register, ignore the write */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write to RO register at offset " + TARGET_FMT_plx "\n", __func__, offset); + } + break; + case GITS_CREADR + 4: + if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) { + s->creadr = deposit64(s->creadr, 32, 32, value); + } else { + /* RO register, ignore the write */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write to RO register at offset " + TARGET_FMT_plx "\n", __func__, offset); + } + break; + case GITS_BASER ... GITS_BASER + 0x3f: + /* + * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is + * already enabled + */ + if (!(s->ctlr & ITS_CTLR_ENABLED)) { + index = (offset - GITS_BASER) / 8; + + if (offset & 7) { + value <<= 32; + value &= ~GITS_BASER_RO_MASK; + s->baser[index] &= GITS_BASER_RO_MASK | MAKE_64BIT_MASK(0, 32); + s->baser[index] |= value; + } else { + value &= ~GITS_BASER_RO_MASK; + s->baser[index] &= GITS_BASER_RO_MASK | MAKE_64BIT_MASK(32, 32); + s->baser[index] |= value; + } + } + break; + case GITS_IIDR: + case GITS_IDREGS ... GITS_IDREGS + 0x2f: + /* RO registers, ignore the write */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write to RO register at offset " + TARGET_FMT_plx "\n", __func__, offset); + break; + default: + result = false; + break; + } + return result; +} + +static bool its_readl(GICv3ITSState *s, hwaddr offset, + uint64_t *data, MemTxAttrs attrs) +{ + bool result = true; + int index; + + switch (offset) { + case GITS_CTLR: + *data = s->ctlr; + break; + case GITS_IIDR: + *data = gicv3_iidr(); + break; + case GITS_IDREGS ... GITS_IDREGS + 0x2f: + /* ID registers */ + *data = gicv3_idreg(offset - GITS_IDREGS); + break; + case GITS_TYPER: + *data = extract64(s->typer, 0, 32); + break; + case GITS_TYPER + 4: + *data = extract64(s->typer, 32, 32); + break; + case GITS_CBASER: + *data = extract64(s->cbaser, 0, 32); + break; + case GITS_CBASER + 4: + *data = extract64(s->cbaser, 32, 32); + break; + case GITS_CREADR: + *data = extract64(s->creadr, 0, 32); + break; + case GITS_CREADR + 4: + *data = extract64(s->creadr, 32, 32); + break; + case GITS_CWRITER: + *data = extract64(s->cwriter, 0, 32); + break; + case GITS_CWRITER + 4: + *data = extract64(s->cwriter, 32, 32); + break; + case GITS_BASER ... GITS_BASER + 0x3f: + index = (offset - GITS_BASER) / 8; + if (offset & 7) { + *data = extract64(s->baser[index], 32, 32); + } else { + *data = extract64(s->baser[index], 0, 32); + } + break; + default: + result = false; + break; + } + return result; +} + +static bool its_writell(GICv3ITSState *s, hwaddr offset, + uint64_t value, MemTxAttrs attrs) +{ + bool result = true; + int index; + + switch (offset) { + case GITS_BASER ... GITS_BASER + 0x3f: + /* + * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is + * already enabled + */ + if (!(s->ctlr & ITS_CTLR_ENABLED)) { + index = (offset - GITS_BASER) / 8; + s->baser[index] &= GITS_BASER_RO_MASK; + s->baser[index] |= (value & ~GITS_BASER_RO_MASK); + } + break; + case GITS_CBASER: + /* + * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is + * already enabled + */ + if (!(s->ctlr & ITS_CTLR_ENABLED)) { + s->cbaser = value; + s->creadr = 0; + s->cwriter = s->creadr; + } + break; + case GITS_CWRITER: + s->cwriter = value & ~R_GITS_CWRITER_RETRY_MASK; + if (s->cwriter != s->creadr) { + process_cmdq(s); + } + break; + case GITS_CREADR: + if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) { + s->creadr = value & ~R_GITS_CREADR_STALLED_MASK; + } else { + /* RO register, ignore the write */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write to RO register at offset " + TARGET_FMT_plx "\n", __func__, offset); + } + break; + case GITS_TYPER: + /* RO registers, ignore the write */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write to RO register at offset " + TARGET_FMT_plx "\n", __func__, offset); + break; + default: + result = false; + break; + } + return result; +} + +static bool its_readll(GICv3ITSState *s, hwaddr offset, + uint64_t *data, MemTxAttrs attrs) +{ + bool result = true; + int index; + + switch (offset) { + case GITS_TYPER: + *data = s->typer; + break; + case GITS_BASER ... GITS_BASER + 0x3f: + index = (offset - GITS_BASER) / 8; + *data = s->baser[index]; + break; + case GITS_CBASER: + *data = s->cbaser; + break; + case GITS_CREADR: + *data = s->creadr; + break; + case GITS_CWRITER: + *data = s->cwriter; + break; + default: + result = false; + break; + } + return result; +} + +static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + GICv3ITSState *s = (GICv3ITSState *)opaque; + bool result; + + switch (size) { + case 4: + result = its_readl(s, offset, data, attrs); + break; + case 8: + result = its_readll(s, offset, data, attrs); + break; + default: + result = false; + break; + } + + if (!result) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest read at offset " TARGET_FMT_plx + "size %u\n", __func__, offset, size); + /* + * The spec requires that reserved registers are RAZ/WI; + * so use false returns from leaf functions as a way to + * trigger the guest-error logging but don't return it to + * the caller, or we'll cause a spurious guest data abort. + */ + *data = 0; + } + return MEMTX_OK; +} + +static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data, + unsigned size, MemTxAttrs attrs) +{ + GICv3ITSState *s = (GICv3ITSState *)opaque; + bool result; + + switch (size) { + case 4: + result = its_writel(s, offset, data, attrs); + break; + case 8: + result = its_writell(s, offset, data, attrs); + break; + default: + result = false; + break; + } + + if (!result) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write at offset " TARGET_FMT_plx + "size %u\n", __func__, offset, size); + /* + * The spec requires that reserved registers are RAZ/WI; + * so use false returns from leaf functions as a way to + * trigger the guest-error logging but don't return it to + * the caller, or we'll cause a spurious guest data abort. + */ + } + return MEMTX_OK; +} + +static const MemoryRegionOps gicv3_its_control_ops = { + .read_with_attrs = gicv3_its_read, + .write_with_attrs = gicv3_its_write, + .valid.min_access_size = 4, + .valid.max_access_size = 8, + .impl.min_access_size = 4, + .impl.max_access_size = 8, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const MemoryRegionOps gicv3_its_translation_ops = { + .write_with_attrs = gicv3_its_translation_write, + .valid.min_access_size = 2, + .valid.max_access_size = 4, + .impl.min_access_size = 2, + .impl.max_access_size = 4, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void gicv3_arm_its_realize(DeviceState *dev, Error **errp) +{ + GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev); + int i; + + for (i = 0; i < s->gicv3->num_cpu; i++) { + if (!(s->gicv3->cpu[i].gicr_typer & GICR_TYPER_PLPIS)) { + error_setg(errp, "Physical LPI not supported by CPU %d", i); + return; + } + } + + gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops); + + address_space_init(&s->gicv3->dma_as, s->gicv3->dma, + "gicv3-its-sysmem"); + + /* set the ITS default features supported */ + s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL, + GITS_TYPE_PHYSICAL); + s->typer = FIELD_DP64(s->typer, GITS_TYPER, ITT_ENTRY_SIZE, + ITS_ITT_ENTRY_SIZE - 1); + s->typer = FIELD_DP64(s->typer, GITS_TYPER, IDBITS, ITS_IDBITS); + s->typer = FIELD_DP64(s->typer, GITS_TYPER, DEVBITS, ITS_DEVBITS); + s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIL, 1); + s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIDBITS, ITS_CIDBITS); +} + +static void gicv3_its_reset(DeviceState *dev) +{ + GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev); + GICv3ITSClass *c = ARM_GICV3_ITS_GET_CLASS(s); + + c->parent_reset(dev); + + /* Quiescent bit reset to 1 */ + s->ctlr = FIELD_DP32(s->ctlr, GITS_CTLR, QUIESCENT, 1); + + /* + * setting GITS_BASER0.Type = 0b001 (Device) + * GITS_BASER1.Type = 0b100 (Collection Table) + * GITS_BASER.Type,where n = 3 to 7 are 0b00 (Unimplemented) + * GITS_BASER<0,1>.Page_Size = 64KB + * and default translation table entry size to 16 bytes + */ + s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, TYPE, + GITS_BASER_TYPE_DEVICE); + s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, PAGESIZE, + GITS_BASER_PAGESIZE_64K); + s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, ENTRYSIZE, + GITS_DTE_SIZE - 1); + + s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, TYPE, + GITS_BASER_TYPE_COLLECTION); + s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, PAGESIZE, + GITS_BASER_PAGESIZE_64K); + s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, ENTRYSIZE, + GITS_CTE_SIZE - 1); +} + +static void gicv3_its_post_load(GICv3ITSState *s) +{ + if (s->ctlr & ITS_CTLR_ENABLED) { + extract_table_params(s); + extract_cmdq_params(s); + } +} + +static Property gicv3_its_props[] = { + DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3", + GICv3State *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void gicv3_its_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + GICv3ITSClass *ic = ARM_GICV3_ITS_CLASS(klass); + GICv3ITSCommonClass *icc = ARM_GICV3_ITS_COMMON_CLASS(klass); + + dc->realize = gicv3_arm_its_realize; + device_class_set_props(dc, gicv3_its_props); + device_class_set_parent_reset(dc, gicv3_its_reset, &ic->parent_reset); + icc->post_load = gicv3_its_post_load; +} + +static const TypeInfo gicv3_its_info = { + .name = TYPE_ARM_GICV3_ITS, + .parent = TYPE_ARM_GICV3_ITS_COMMON, + .instance_size = sizeof(GICv3ITSState), + .class_init = gicv3_its_class_init, + .class_size = sizeof(GICv3ITSClass), +}; + +static void gicv3_its_register_types(void) +{ + type_register_static(&gicv3_its_info); +} + +type_init(gicv3_its_register_types) diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c index 66c4c6a1888..90b85f1e25c 100644 --- a/hw/intc/arm_gicv3_its_common.c +++ b/hw/intc/arm_gicv3_its_common.c @@ -99,14 +99,15 @@ static const MemoryRegionOps gicv3_its_trans_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops) +void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops, + const MemoryRegionOps *tops) { SysBusDevice *sbd = SYS_BUS_DEVICE(s); memory_region_init_io(&s->iomem_its_cntrl, OBJECT(s), ops, s, "control", ITS_CONTROL_SIZE); memory_region_init_io(&s->iomem_its_translation, OBJECT(s), - &gicv3_its_trans_ops, s, + tops ? tops : &gicv3_its_trans_ops, s, "translation", ITS_TRANS_SIZE); /* Our two regions are always adjacent, therefore we now combine them diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c index b554d2ede0a..0b4cbed28b3 100644 --- a/hw/intc/arm_gicv3_its_kvm.c +++ b/hw/intc/arm_gicv3_its_kvm.c @@ -106,7 +106,7 @@ static void kvm_arm_its_realize(DeviceState *dev, Error **errp) kvm_arm_register_device(&s->iomem_its_cntrl, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd, 0); - gicv3_its_init_mmio(s, NULL); + gicv3_its_init_mmio(s, NULL, NULL); if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, GITS_CTLR)) { diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 65a4c880a35..5ec5ff9ef6e 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -22,7 +22,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/intc/arm_gicv3_common.h" -#include "hw/sysbus.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "sysemu/kvm.h" @@ -788,11 +787,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) return; } - gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); for (i = 0; i < s->num_cpu; i++) { ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); @@ -830,7 +825,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0); if (!multiple_redist_region_allowed) { - kvm_arm_register_device(&s->iomem_redist[0], -1, + kvm_arm_register_device(&s->redist_regions[0].iomem, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0); } else { @@ -843,7 +838,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) uint64_t addr_ormask = i | ((uint64_t)s->redist_region_count[i] << 52); - kvm_arm_register_device(&s->iomem_redist[i], -1, + kvm_arm_register_device(&s->redist_regions[i].iomem, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, s->dev_fd, addr_ormask); diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c index 8645220d618..c8ff3eca085 100644 --- a/hw/intc/arm_gicv3_redist.c +++ b/hw/intc/arm_gicv3_redist.c @@ -248,10 +248,20 @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset, case GICR_CTLR: /* For our implementation, GICR_TYPER.DPGS is 0 and so all * the DPG bits are RAZ/WI. We don't do anything asynchronously, - * so UWP and RWP are RAZ/WI. And GICR_TYPER.LPIS is 0 (we don't - * implement LPIs) so Enable_LPIs is RES0. So there are no writable - * bits for us. + * so UWP and RWP are RAZ/WI. GICR_TYPER.LPIS is 1 (we + * implement LPIs) so Enable_LPIs is programmable. */ + if (cs->gicr_typer & GICR_TYPER_PLPIS) { + if (value & GICR_CTLR_ENABLE_LPIS) { + cs->gicr_ctlr |= GICR_CTLR_ENABLE_LPIS; + /* Check for any pending interr in pending table */ + gicv3_redist_update_lpi(cs); + } else { + cs->gicr_ctlr &= ~GICR_CTLR_ENABLE_LPIS; + /* cs->hppi might have been an LPI; recalculate */ + gicv3_redist_update(cs); + } + } return MEMTX_OK; case GICR_STATUSR: /* RAZ/WI for our implementation */ @@ -416,22 +426,24 @@ static MemTxResult gicr_writell(GICv3CPUState *cs, hwaddr offset, MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, unsigned size, MemTxAttrs attrs) { - GICv3State *s = opaque; + GICv3RedistRegion *region = opaque; + GICv3State *s = region->gic; GICv3CPUState *cs; MemTxResult r; int cpuidx; assert((offset & (size - 1)) == 0); - /* This region covers all the redistributor pages; there are - * (for GICv3) two 64K pages per CPU. At the moment they are - * all contiguous (ie in this one region), though we might later - * want to allow splitting of redistributor pages into several - * blocks so we can support more CPUs. + /* + * There are (for GICv3) two 64K redistributor pages per CPU. + * In some cases the redistributor pages for all CPUs are not + * contiguous (eg on the virt board they are split into two + * parts if there are too many CPUs to all fit in the same place + * in the memory map); if so then the GIC has multiple MemoryRegions + * for the redistributors. */ - cpuidx = offset / 0x20000; - offset %= 0x20000; - assert(cpuidx < s->num_cpu); + cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE; + offset %= GICV3_REDIST_SIZE; cs = &s->cpu[cpuidx]; @@ -453,7 +465,7 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, if (r == MEMTX_ERROR) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest read at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_redist_badread(gicv3_redist_affid(cs), offset, size, attrs.secure); /* The spec requires that reserved registers are RAZ/WI; @@ -473,22 +485,24 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, unsigned size, MemTxAttrs attrs) { - GICv3State *s = opaque; + GICv3RedistRegion *region = opaque; + GICv3State *s = region->gic; GICv3CPUState *cs; MemTxResult r; int cpuidx; assert((offset & (size - 1)) == 0); - /* This region covers all the redistributor pages; there are - * (for GICv3) two 64K pages per CPU. At the moment they are - * all contiguous (ie in this one region), though we might later - * want to allow splitting of redistributor pages into several - * blocks so we can support more CPUs. + /* + * There are (for GICv3) two 64K redistributor pages per CPU. + * In some cases the redistributor pages for all CPUs are not + * contiguous (eg on the virt board they are split into two + * parts if there are too many CPUs to all fit in the same place + * in the memory map); if so then the GIC has multiple MemoryRegions + * for the redistributors. */ - cpuidx = offset / 0x20000; - offset %= 0x20000; - assert(cpuidx < s->num_cpu); + cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE; + offset %= GICV3_REDIST_SIZE; cs = &s->cpu[cpuidx]; @@ -510,7 +524,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, if (r == MEMTX_ERROR) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_redist_badwrite(gicv3_redist_affid(cs), offset, data, size, attrs.secure); /* The spec requires that reserved registers are RAZ/WI; @@ -526,6 +540,149 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, return r; } +static void gicv3_redist_check_lpi_priority(GICv3CPUState *cs, int irq) +{ + AddressSpace *as = &cs->gic->dma_as; + uint64_t lpict_baddr; + uint8_t lpite; + uint8_t prio; + + lpict_baddr = cs->gicr_propbaser & R_GICR_PROPBASER_PHYADDR_MASK; + + address_space_read(as, lpict_baddr + ((irq - GICV3_LPI_INTID_START) * + sizeof(lpite)), MEMTXATTRS_UNSPECIFIED, &lpite, + sizeof(lpite)); + + if (!(lpite & LPI_CTE_ENABLED)) { + return; + } + + if (cs->gic->gicd_ctlr & GICD_CTLR_DS) { + prio = lpite & LPI_PRIORITY_MASK; + } else { + prio = ((lpite & LPI_PRIORITY_MASK) >> 1) | 0x80; + } + + if ((prio < cs->hpplpi.prio) || + ((prio == cs->hpplpi.prio) && (irq <= cs->hpplpi.irq))) { + cs->hpplpi.irq = irq; + cs->hpplpi.prio = prio; + /* LPIs are always non-secure Grp1 interrupts */ + cs->hpplpi.grp = GICV3_G1NS; + } +} + +void gicv3_redist_update_lpi_only(GICv3CPUState *cs) +{ + /* + * This function scans the LPI pending table and for each pending + * LPI, reads the corresponding entry from LPI configuration table + * to extract the priority info and determine if the current LPI + * priority is lower than the last computed high priority lpi interrupt. + * If yes, replace current LPI as the new high priority lpi interrupt. + */ + AddressSpace *as = &cs->gic->dma_as; + uint64_t lpipt_baddr; + uint32_t pendt_size = 0; + uint8_t pend; + int i, bit; + uint64_t idbits; + + idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS), + GICD_TYPER_IDBITS); + + if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser || + !cs->gicr_pendbaser) { + return; + } + + cs->hpplpi.prio = 0xff; + + lpipt_baddr = cs->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK; + + /* Determine the highest priority pending interrupt among LPIs */ + pendt_size = (1ULL << (idbits + 1)); + + for (i = GICV3_LPI_INTID_START / 8; i < pendt_size / 8; i++) { + address_space_read(as, lpipt_baddr + i, MEMTXATTRS_UNSPECIFIED, &pend, + sizeof(pend)); + + while (pend) { + bit = ctz32(pend); + gicv3_redist_check_lpi_priority(cs, i * 8 + bit); + pend &= ~(1 << bit); + } + } +} + +void gicv3_redist_update_lpi(GICv3CPUState *cs) +{ + gicv3_redist_update_lpi_only(cs); + gicv3_redist_update(cs); +} + +void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level) +{ + /* + * This function updates the pending bit in lpi pending table for + * the irq being activated or deactivated. + */ + AddressSpace *as = &cs->gic->dma_as; + uint64_t lpipt_baddr; + bool ispend = false; + uint8_t pend; + + /* + * get the bit value corresponding to this irq in the + * lpi pending table + */ + lpipt_baddr = cs->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK; + + address_space_read(as, lpipt_baddr + ((irq / 8) * sizeof(pend)), + MEMTXATTRS_UNSPECIFIED, &pend, sizeof(pend)); + + ispend = extract32(pend, irq % 8, 1); + + /* no change in the value of pending bit, return */ + if (ispend == level) { + return; + } + pend = deposit32(pend, irq % 8, 1, level ? 1 : 0); + + address_space_write(as, lpipt_baddr + ((irq / 8) * sizeof(pend)), + MEMTXATTRS_UNSPECIFIED, &pend, sizeof(pend)); + + /* + * check if this LPI is better than the current hpplpi, if yes + * just set hpplpi.prio and .irq without doing a full rescan + */ + if (level) { + gicv3_redist_check_lpi_priority(cs, irq); + gicv3_redist_update(cs); + } else { + if (irq == cs->hpplpi.irq) { + gicv3_redist_update_lpi(cs); + } + } +} + +void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level) +{ + uint64_t idbits; + + idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS), + GICD_TYPER_IDBITS); + + if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser || + !cs->gicr_pendbaser || (irq > (1ULL << (idbits + 1)) - 1) || + irq < GICV3_LPI_INTID_START) { + return; + } + + /* set/clear the pending bit for this irq */ + gicv3_redist_lpi_pending(cs, irq, level); +} + void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level) { /* Update redistributor state for a change in an external PPI input line */ diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 0d8426dafc9..13df002ce4d 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/sysbus.h" #include "migration/vmstate.h" #include "qemu/timer.h" @@ -128,15 +127,14 @@ static bool nvic_isrpending(NVICState *s) { int irq; - /* We can shortcut if the highest priority pending interrupt - * happens to be external or if there is nothing pending. + /* + * We can shortcut if the highest priority pending interrupt + * happens to be external; if not we need to check the whole + * vectors[] array. */ if (s->vectpending > NVIC_FIRST_IRQ) { return true; } - if (s->vectpending == 0) { - return false; - } for (irq = NVIC_FIRST_IRQ; irq < s->num_irq; irq++) { if (s->vectors[irq].pending) { @@ -806,6 +804,16 @@ void armv7m_nvic_acknowledge_irq(void *opaque) nvic_irq_update(s); } +static bool vectpending_targets_secure(NVICState *s) +{ + /* Return true if s->vectpending targets Secure state */ + if (s->vectpending_is_s_banked) { + return true; + } + return !exc_is_banked(s->vectpending) && + exc_targets_secure(s, s->vectpending); +} + void armv7m_nvic_get_pending_irq_info(void *opaque, int *pirq, bool *ptargets_secure) { @@ -815,12 +823,7 @@ void armv7m_nvic_get_pending_irq_info(void *opaque, assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq); - if (s->vectpending_is_s_banked) { - targets_secure = true; - } else { - targets_secure = !exc_is_banked(pending) && - exc_targets_secure(s, pending); - } + targets_secure = vectpending_targets_secure(s); trace_nvic_get_pending_irq_info(pending, targets_secure); @@ -1041,7 +1044,19 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) /* VECTACTIVE */ val = cpu->env.v7m.exception; /* VECTPENDING */ - val |= (s->vectpending & 0xff) << 12; + if (s->vectpending) { + /* + * From v8.1M VECTPENDING must read as 1 if accessed as + * NonSecure and the highest priority pending and enabled + * exception targets Secure. + */ + int vp = s->vectpending; + if (!attrs.secure && arm_feature(&cpu->env, ARM_FEATURE_V8_1M) && + vectpending_targets_secure(s)) { + vp = 1; + } + val |= (vp & 0x1ff) << 12; + } /* ISRPENDING - set if any external IRQ is pending */ if (nvic_isrpending(s)) { val |= (1 << 22); @@ -2455,172 +2470,6 @@ static const MemoryRegionOps nvic_sysreg_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static MemTxResult nvic_sysreg_ns_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size, - MemTxAttrs attrs) -{ - MemoryRegion *mr = opaque; - - if (attrs.secure) { - /* S accesses to the alias act like NS accesses to the real region */ - attrs.secure = 0; - return memory_region_dispatch_write(mr, addr, value, - size_memop(size) | MO_TE, attrs); - } else { - /* NS attrs are RAZ/WI for privileged, and BusFault for user */ - if (attrs.user) { - return MEMTX_ERROR; - } - return MEMTX_OK; - } -} - -static MemTxResult nvic_sysreg_ns_read(void *opaque, hwaddr addr, - uint64_t *data, unsigned size, - MemTxAttrs attrs) -{ - MemoryRegion *mr = opaque; - - if (attrs.secure) { - /* S accesses to the alias act like NS accesses to the real region */ - attrs.secure = 0; - return memory_region_dispatch_read(mr, addr, data, - size_memop(size) | MO_TE, attrs); - } else { - /* NS attrs are RAZ/WI for privileged, and BusFault for user */ - if (attrs.user) { - return MEMTX_ERROR; - } - *data = 0; - return MEMTX_OK; - } -} - -static const MemoryRegionOps nvic_sysreg_ns_ops = { - .read_with_attrs = nvic_sysreg_ns_read, - .write_with_attrs = nvic_sysreg_ns_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static MemTxResult nvic_systick_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size, - MemTxAttrs attrs) -{ - NVICState *s = opaque; - MemoryRegion *mr; - - /* Direct the access to the correct systick */ - mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0); - return memory_region_dispatch_write(mr, addr, value, - size_memop(size) | MO_TE, attrs); -} - -static MemTxResult nvic_systick_read(void *opaque, hwaddr addr, - uint64_t *data, unsigned size, - MemTxAttrs attrs) -{ - NVICState *s = opaque; - MemoryRegion *mr; - - /* Direct the access to the correct systick */ - mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0); - return memory_region_dispatch_read(mr, addr, data, size_memop(size) | MO_TE, - attrs); -} - -static const MemoryRegionOps nvic_systick_ops = { - .read_with_attrs = nvic_systick_read, - .write_with_attrs = nvic_systick_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - - -static MemTxResult ras_read(void *opaque, hwaddr addr, - uint64_t *data, unsigned size, - MemTxAttrs attrs) -{ - if (attrs.user) { - return MEMTX_ERROR; - } - - switch (addr) { - case 0xe10: /* ERRIIDR */ - /* architect field = Arm; product/variant/revision 0 */ - *data = 0x43b; - break; - case 0xfc8: /* ERRDEVID */ - /* Minimal RAS: we implement 0 error record indexes */ - *data = 0; - break; - default: - qemu_log_mask(LOG_UNIMP, "Read RAS register offset 0x%x\n", - (uint32_t)addr); - *data = 0; - break; - } - return MEMTX_OK; -} - -static MemTxResult ras_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size, - MemTxAttrs attrs) -{ - if (attrs.user) { - return MEMTX_ERROR; - } - - switch (addr) { - default: - qemu_log_mask(LOG_UNIMP, "Write to RAS register offset 0x%x\n", - (uint32_t)addr); - break; - } - return MEMTX_OK; -} - -static const MemoryRegionOps ras_ops = { - .read_with_attrs = ras_read, - .write_with_attrs = ras_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -/* - * Unassigned portions of the PPB space are RAZ/WI for privileged - * accesses, and fault for non-privileged accesses. - */ -static MemTxResult ppb_default_read(void *opaque, hwaddr addr, - uint64_t *data, unsigned size, - MemTxAttrs attrs) -{ - qemu_log_mask(LOG_UNIMP, "Read of unassigned area of PPB: offset 0x%x\n", - (uint32_t)addr); - if (attrs.user) { - return MEMTX_ERROR; - } - *data = 0; - return MEMTX_OK; -} - -static MemTxResult ppb_default_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size, - MemTxAttrs attrs) -{ - qemu_log_mask(LOG_UNIMP, "Write of unassigned area of PPB: offset 0x%x\n", - (uint32_t)addr); - if (attrs.user) { - return MEMTX_ERROR; - } - return MEMTX_OK; -} - -static const MemoryRegionOps ppb_default_ops = { - .read_with_attrs = ppb_default_read, - .write_with_attrs = ppb_default_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid.min_access_size = 1, - .valid.max_access_size = 8, -}; - static int nvic_post_load(void *opaque, int version_id) { NVICState *s = opaque; @@ -2836,128 +2685,22 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp) s->num_prio_bits = arm_feature(&s->cpu->env, ARM_FEATURE_V7) ? 8 : 2; - if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), errp)) { - return; - } - sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), 0, - qdev_get_gpio_in_named(dev, "systick-trigger", - M_REG_NS)); - - if (arm_feature(&s->cpu->env, ARM_FEATURE_M_SECURITY)) { - /* We couldn't init the secure systick device in instance_init - * as we didn't know then if the CPU had the security extensions; - * so we have to do it here. - */ - object_initialize_child(OBJECT(dev), "systick-reg-s", - &s->systick[M_REG_S], TYPE_SYSTICK); - - if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_S]), errp)) { - return; - } - sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_S]), 0, - qdev_get_gpio_in_named(dev, "systick-trigger", - M_REG_S)); - } - /* - * This device provides a single sysbus memory region which - * represents the whole of the "System PPB" space. This is the - * range from 0xe0000000 to 0xe00fffff and includes the NVIC, - * the System Control Space (system registers), the systick timer, - * and for CPUs with the Security extension an NS banked version - * of all of these. - * - * The default behaviour for unimplemented registers/ranges - * (for instance the Data Watchpoint and Trace unit at 0xe0001000) - * is to RAZ/WI for privileged access and BusFault for non-privileged - * access. - * - * The NVIC and System Control Space (SCS) starts at 0xe000e000 - * and looks like this: - * 0x004 - ICTR - * 0x010 - 0xff - systick - * 0x100..0x7ec - NVIC - * 0x7f0..0xcff - Reserved - * 0xd00..0xd3c - SCS registers - * 0xd40..0xeff - Reserved or Not implemented - * 0xf00 - STIR - * - * Some registers within this space are banked between security states. - * In v8M there is a second range 0xe002e000..0xe002efff which is the - * NonSecure alias SCS; secure accesses to this behave like NS accesses - * to the main SCS range, and non-secure accesses (including when - * the security extension is not implemented) are RAZ/WI. - * Note that both the main SCS range and the alias range are defined - * to be exempt from memory attribution (R_BLJT) and so the memory - * transaction attribute always matches the current CPU security - * state (attrs.secure == env->v7m.secure). In the nvic_sysreg_ns_ops - * wrappers we change attrs.secure to indicate the NS access; so - * generally code determining which banked register to use should - * use attrs.secure; code determining actual behaviour of the system - * should use env->v7m.secure. - * - * The container covers the whole PPB space. Within it the priority - * of overlapping regions is: - * - default region (for RAZ/WI and BusFault) : -1 - * - system register regions : 0 - * - systick : 1 - * This is because the systick device is a small block of registers - * in the middle of the other system control registers. + * This device provides a single memory region which covers the + * sysreg/NVIC registers from 0xE000E000 .. 0xE000EFFF, with the + * exception of the systick timer registers 0xE000E010 .. 0xE000E0FF. */ - memory_region_init(&s->container, OBJECT(s), "nvic", 0x100000); - memory_region_init_io(&s->defaultmem, OBJECT(s), &ppb_default_ops, s, - "nvic-default", 0x100000); - memory_region_add_subregion_overlap(&s->container, 0, &s->defaultmem, -1); memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s, "nvic_sysregs", 0x1000); - memory_region_add_subregion(&s->container, 0xe000, &s->sysregmem); - - memory_region_init_io(&s->systickmem, OBJECT(s), - &nvic_systick_ops, s, - "nvic_systick", 0xe0); - - memory_region_add_subregion_overlap(&s->container, 0xe010, - &s->systickmem, 1); - - if (arm_feature(&s->cpu->env, ARM_FEATURE_V8)) { - memory_region_init_io(&s->sysreg_ns_mem, OBJECT(s), - &nvic_sysreg_ns_ops, &s->sysregmem, - "nvic_sysregs_ns", 0x1000); - memory_region_add_subregion(&s->container, 0x2e000, &s->sysreg_ns_mem); - memory_region_init_io(&s->systick_ns_mem, OBJECT(s), - &nvic_sysreg_ns_ops, &s->systickmem, - "nvic_systick_ns", 0xe0); - memory_region_add_subregion_overlap(&s->container, 0x2e010, - &s->systick_ns_mem, 1); - } - - if (cpu_isar_feature(aa32_ras, s->cpu)) { - memory_region_init_io(&s->ras_mem, OBJECT(s), - &ras_ops, s, "nvic_ras", 0x1000); - memory_region_add_subregion(&s->container, 0x5000, &s->ras_mem); - } - - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->sysregmem); } static void armv7m_nvic_instance_init(Object *obj) { - /* We have a different default value for the num-irq property - * than our superclass. This function runs after qdev init - * has set the defaults from the Property array and before - * any user-specified property setting, so just modify the - * value in the GICState struct. - */ DeviceState *dev = DEVICE(obj); NVICState *nvic = NVIC(obj); SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - object_initialize_child(obj, "systick-reg-ns", &nvic->systick[M_REG_NS], - TYPE_SYSTICK); - /* We can't initialize the secure systick here, as we don't know - * yet if we need it. - */ - sysbus_init_irq(sbd, &nvic->excpout); qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1); qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h index 05303a55c88..b9c37453b04 100644 --- a/hw/intc/gicv3_internal.h +++ b/hw/intc/gicv3_internal.h @@ -24,6 +24,7 @@ #ifndef QEMU_ARM_GICV3_INTERNAL_H #define QEMU_ARM_GICV3_INTERNAL_H +#include "hw/registerfields.h" #include "hw/intc/arm_gicv3_common.h" /* Distributor registers, as offsets from the distributor base address */ @@ -67,6 +68,11 @@ #define GICD_CTLR_E1NWF (1U << 7) #define GICD_CTLR_RWP (1U << 31) +#define GICD_TYPER_LPIS_SHIFT 17 + +/* 16 bits EventId */ +#define GICD_TYPER_IDBITS 0xf + /* * Redistributor frame offsets from RD_base */ @@ -122,17 +128,19 @@ #define GICR_WAKER_ProcessorSleep (1U << 1) #define GICR_WAKER_ChildrenAsleep (1U << 2) -#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK (7ULL << 56) -#define GICR_PROPBASER_ADDR_MASK (0xfffffffffULL << 12) -#define GICR_PROPBASER_SHAREABILITY_MASK (3U << 10) -#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7) -#define GICR_PROPBASER_IDBITS_MASK (0x1f) +FIELD(GICR_PROPBASER, IDBITS, 0, 5) +FIELD(GICR_PROPBASER, INNERCACHE, 7, 3) +FIELD(GICR_PROPBASER, SHAREABILITY, 10, 2) +FIELD(GICR_PROPBASER, PHYADDR, 12, 40) +FIELD(GICR_PROPBASER, OUTERCACHE, 56, 3) -#define GICR_PENDBASER_PTZ (1ULL << 62) -#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK (7ULL << 56) -#define GICR_PENDBASER_ADDR_MASK (0xffffffffULL << 16) -#define GICR_PENDBASER_SHAREABILITY_MASK (3U << 10) -#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7) +FIELD(GICR_PENDBASER, INNERCACHE, 7, 3) +FIELD(GICR_PENDBASER, SHAREABILITY, 10, 2) +FIELD(GICR_PENDBASER, PHYADDR, 16, 36) +FIELD(GICR_PENDBASER, OUTERCACHE, 56, 3) +FIELD(GICR_PENDBASER, PTZ, 62, 1) + +#define GICR_PROPBASER_IDBITS_THRESHOLD 0xd #define ICC_CTLR_EL1_CBPR (1U << 0) #define ICC_CTLR_EL1_EOIMODE (1U << 1) @@ -239,6 +247,163 @@ #define ICH_VTR_EL2_PREBITS_SHIFT 26 #define ICH_VTR_EL2_PRIBITS_SHIFT 29 +/* ITS Registers */ + +FIELD(GITS_BASER, SIZE, 0, 8) +FIELD(GITS_BASER, PAGESIZE, 8, 2) +FIELD(GITS_BASER, SHAREABILITY, 10, 2) +FIELD(GITS_BASER, PHYADDR, 12, 36) +FIELD(GITS_BASER, PHYADDRL_64K, 16, 32) +FIELD(GITS_BASER, PHYADDRH_64K, 12, 4) +FIELD(GITS_BASER, ENTRYSIZE, 48, 5) +FIELD(GITS_BASER, OUTERCACHE, 53, 3) +FIELD(GITS_BASER, TYPE, 56, 3) +FIELD(GITS_BASER, INNERCACHE, 59, 3) +FIELD(GITS_BASER, INDIRECT, 62, 1) +FIELD(GITS_BASER, VALID, 63, 1) + +FIELD(GITS_CBASER, SIZE, 0, 8) +FIELD(GITS_CBASER, SHAREABILITY, 10, 2) +FIELD(GITS_CBASER, PHYADDR, 12, 40) +FIELD(GITS_CBASER, OUTERCACHE, 53, 3) +FIELD(GITS_CBASER, INNERCACHE, 59, 3) +FIELD(GITS_CBASER, VALID, 63, 1) + +FIELD(GITS_CREADR, STALLED, 0, 1) +FIELD(GITS_CREADR, OFFSET, 5, 15) + +FIELD(GITS_CWRITER, RETRY, 0, 1) +FIELD(GITS_CWRITER, OFFSET, 5, 15) + +FIELD(GITS_CTLR, ENABLED, 0, 1) +FIELD(GITS_CTLR, QUIESCENT, 31, 1) + +FIELD(GITS_TYPER, PHYSICAL, 0, 1) +FIELD(GITS_TYPER, ITT_ENTRY_SIZE, 4, 4) +FIELD(GITS_TYPER, IDBITS, 8, 5) +FIELD(GITS_TYPER, DEVBITS, 13, 5) +FIELD(GITS_TYPER, SEIS, 18, 1) +FIELD(GITS_TYPER, PTA, 19, 1) +FIELD(GITS_TYPER, CIDBITS, 32, 4) +FIELD(GITS_TYPER, CIL, 36, 1) + +#define GITS_IDREGS 0xFFD0 + +#define ITS_CTLR_ENABLED (1U) /* ITS Enabled */ + +#define GITS_BASER_RO_MASK (R_GITS_BASER_ENTRYSIZE_MASK | \ + R_GITS_BASER_TYPE_MASK) + +#define GITS_BASER_PAGESIZE_4K 0 +#define GITS_BASER_PAGESIZE_16K 1 +#define GITS_BASER_PAGESIZE_64K 2 + +#define GITS_BASER_TYPE_DEVICE 1ULL +#define GITS_BASER_TYPE_COLLECTION 4ULL + +#define GITS_PAGE_SIZE_4K 0x1000 +#define GITS_PAGE_SIZE_16K 0x4000 +#define GITS_PAGE_SIZE_64K 0x10000 + +#define L1TABLE_ENTRY_SIZE 8 + +#define LPI_CTE_ENABLED TABLE_ENTRY_VALID_MASK +#define LPI_PRIORITY_MASK 0xfc + +#define GITS_CMDQ_ENTRY_SIZE 32 +#define NUM_BYTES_IN_DW 8 + +#define CMD_MASK 0xff + +/* ITS Commands */ +#define GITS_CMD_CLEAR 0x04 +#define GITS_CMD_DISCARD 0x0F +#define GITS_CMD_INT 0x03 +#define GITS_CMD_MAPC 0x09 +#define GITS_CMD_MAPD 0x08 +#define GITS_CMD_MAPI 0x0B +#define GITS_CMD_MAPTI 0x0A +#define GITS_CMD_INV 0x0C +#define GITS_CMD_INVALL 0x0D +#define GITS_CMD_SYNC 0x05 + +/* MAPC command fields */ +#define ICID_LENGTH 16 +#define ICID_MASK ((1U << ICID_LENGTH) - 1) +FIELD(MAPC, RDBASE, 16, 32) + +#define RDBASE_PROCNUM_LENGTH 16 +#define RDBASE_PROCNUM_MASK ((1ULL << RDBASE_PROCNUM_LENGTH) - 1) + +/* MAPD command fields */ +#define ITTADDR_LENGTH 44 +#define ITTADDR_SHIFT 8 +#define ITTADDR_MASK MAKE_64BIT_MASK(ITTADDR_SHIFT, ITTADDR_LENGTH) +#define SIZE_MASK 0x1f + +/* MAPI command fields */ +#define EVENTID_MASK ((1ULL << 32) - 1) + +/* MAPTI command fields */ +#define pINTID_SHIFT 32 +#define pINTID_MASK MAKE_64BIT_MASK(32, 32) + +#define DEVID_SHIFT 32 +#define DEVID_MASK MAKE_64BIT_MASK(32, 32) + +#define VALID_SHIFT 63 +#define CMD_FIELD_VALID_MASK (1ULL << VALID_SHIFT) +#define L2_TABLE_VALID_MASK CMD_FIELD_VALID_MASK +#define TABLE_ENTRY_VALID_MASK (1ULL << 0) + +/** + * Default features advertised by this version of ITS + */ +/* Physical LPIs supported */ +#define GITS_TYPE_PHYSICAL (1U << 0) + +/* + * 12 bytes Interrupt translation Table Entry size + * as per Table 5.3 in GICv3 spec + * ITE Lower 8 Bytes + * Bits: | 49 ... 26 | 25 ... 2 | 1 | 0 | + * Values: | 1023 | IntNum | IntType | Valid | + * ITE Higher 4 Bytes + * Bits: | 31 ... 16 | 15 ...0 | + * Values: | vPEID | ICID | + */ +#define ITS_ITT_ENTRY_SIZE 0xC +#define ITE_ENTRY_INTTYPE_SHIFT 1 +#define ITE_ENTRY_INTID_SHIFT 2 +#define ITE_ENTRY_INTID_MASK MAKE_64BIT_MASK(2, 24) +#define ITE_ENTRY_INTSP_SHIFT 26 +#define ITE_ENTRY_ICID_MASK MAKE_64BIT_MASK(0, 16) + +/* 16 bits EventId */ +#define ITS_IDBITS GICD_TYPER_IDBITS + +/* 16 bits DeviceId */ +#define ITS_DEVBITS 0xF + +/* 16 bits CollectionId */ +#define ITS_CIDBITS 0xF + +/* + * 8 bytes Device Table Entry size + * Valid = 1 bit,ITTAddr = 44 bits,Size = 5 bits + */ +#define GITS_DTE_SIZE (0x8ULL) +#define GITS_DTE_ITTADDR_SHIFT 6 +#define GITS_DTE_ITTADDR_MASK MAKE_64BIT_MASK(GITS_DTE_ITTADDR_SHIFT, \ + ITTADDR_LENGTH) + +/* + * 8 bytes Collection Table Entry size + * Valid = 1 bit,RDBase = 36 bits(considering max RDBASE) + */ +#define GITS_CTE_SIZE (0x8ULL) +#define GITS_CTE_RDBASE_PROCNUM_MASK MAKE_64BIT_MASK(1, RDBASE_PROCNUM_LENGTH) + /* Special interrupt IDs */ #define INTID_SECURE 1020 #define INTID_NONSECURE 1021 @@ -246,6 +411,19 @@ /* Functions internal to the emulated GICv3 */ +/** + * gicv3_intid_is_special: + * @intid: interrupt ID + * + * Return true if @intid is a special interrupt ID (1020 to + * 1023 inclusive). This corresponds to the GIC spec pseudocode + * IsSpecial() function. + */ +static inline bool gicv3_intid_is_special(int intid) +{ + return intid >= INTID_SECURE && intid <= INTID_SPURIOUS; +} + /** * gicv3_redist_update: * @cs: GICv3CPUState for this redistributor @@ -296,6 +474,26 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, unsigned size, MemTxAttrs attrs); void gicv3_dist_set_irq(GICv3State *s, int irq, int level); void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level); +void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level); +void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level); +/** + * gicv3_redist_update_lpi: + * @cs: GICv3CPUState + * + * Scan the LPI pending table and recalculate the highest priority + * pending LPI and also the overall highest priority pending interrupt. + */ +void gicv3_redist_update_lpi(GICv3CPUState *cs); +/** + * gicv3_redist_update_lpi_only: + * @cs: GICv3CPUState + * + * Scan the LPI pending table and recalculate cs->hpplpi only, + * without calling gicv3_redist_update() to recalculate the overall + * highest priority pending interrupt. This should be called after + * an incoming migration has loaded new state. + */ +void gicv3_redist_update_lpi_only(GICv3CPUState *cs); void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); void gicv3_init_cpuif(GICv3State *s); diff --git a/hw/intc/goldfish_pic.c b/hw/intc/goldfish_pic.c index e3b43a69f16..dfd53275f69 100644 --- a/hw/intc/goldfish_pic.c +++ b/hw/intc/goldfish_pic.c @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * Goldfish PIC * diff --git a/hw/intc/grlib_irqmp.c b/hw/intc/grlib_irqmp.c index 984334fa7bf..3bfe2544b7c 100644 --- a/hw/intc/grlib_irqmp.c +++ b/hw/intc/grlib_irqmp.c @@ -27,7 +27,6 @@ #include "qemu/osdep.h" #include "hw/irq.h" #include "hw/sysbus.h" -#include "cpu.h" #include "hw/qdev-properties.h" #include "hw/sparc/grlib.h" diff --git a/hw/intc/ibex_plic.c b/hw/intc/ibex_plic.c deleted file mode 100644 index c1b72fcab0b..00000000000 --- a/hw/intc/ibex_plic.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * QEMU RISC-V lowRISC Ibex PLIC - * - * Copyright (c) 2020 Western Digital - * - * Documentation avaliable: https://docs.opentitan.org/hw/ip/rv_plic/doc/ - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2 or later, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#include "qemu/osdep.h" -#include "qemu/log.h" -#include "hw/qdev-properties.h" -#include "hw/core/cpu.h" -#include "hw/boards.h" -#include "hw/pci/msi.h" -#include "target/riscv/cpu_bits.h" -#include "target/riscv/cpu.h" -#include "hw/intc/ibex_plic.h" - -static bool addr_between(uint32_t addr, uint32_t base, uint32_t num) -{ - uint32_t end = base + (num * 0x04); - - if (addr >= base && addr < end) { - return true; - } - - return false; -} - -static void ibex_plic_irqs_set_pending(IbexPlicState *s, int irq, bool level) -{ - int pending_num = irq / 32; - - if (!level) { - /* - * If the level is low make sure we clear the hidden_pending. - */ - s->hidden_pending[pending_num] &= ~(1 << (irq % 32)); - } - - if (s->claimed[pending_num] & 1 << (irq % 32)) { - /* - * The interrupt has been claimed, but not completed. - * The pending bit can't be set. - * Save the pending level for after the interrupt is completed. - */ - s->hidden_pending[pending_num] |= level << (irq % 32); - } else { - s->pending[pending_num] |= level << (irq % 32); - } -} - -static bool ibex_plic_irqs_pending(IbexPlicState *s, uint32_t context) -{ - int i; - uint32_t max_irq = 0; - uint32_t max_prio = s->threshold; - - for (i = 0; i < s->pending_num; i++) { - uint32_t irq_num = ctz64(s->pending[i]) + (i * 32); - - if (!(s->pending[i] & s->enable[i])) { - /* No pending and enabled IRQ */ - continue; - } - - if (s->priority[irq_num] > max_prio) { - max_irq = irq_num; - max_prio = s->priority[irq_num]; - } - } - - if (max_irq) { - s->claim = max_irq; - return true; - } - - return false; -} - -static void ibex_plic_update(IbexPlicState *s) -{ - CPUState *cpu; - int level, i; - - for (i = 0; i < s->num_cpus; i++) { - cpu = qemu_get_cpu(i); - - if (!cpu) { - continue; - } - - level = ibex_plic_irqs_pending(s, 0); - - riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MEIP, BOOL_TO_MASK(level)); - } -} - -static void ibex_plic_reset(DeviceState *dev) -{ - IbexPlicState *s = IBEX_PLIC(dev); - - s->threshold = 0x00000000; - s->claim = 0x00000000; -} - -static uint64_t ibex_plic_read(void *opaque, hwaddr addr, - unsigned int size) -{ - IbexPlicState *s = opaque; - int offset; - uint32_t ret = 0; - - if (addr_between(addr, s->pending_base, s->pending_num)) { - offset = (addr - s->pending_base) / 4; - ret = s->pending[offset]; - } else if (addr_between(addr, s->source_base, s->source_num)) { - qemu_log_mask(LOG_UNIMP, - "%s: Interrupt source mode not supported\n", __func__); - } else if (addr_between(addr, s->priority_base, s->priority_num)) { - offset = (addr - s->priority_base) / 4; - ret = s->priority[offset]; - } else if (addr_between(addr, s->enable_base, s->enable_num)) { - offset = (addr - s->enable_base) / 4; - ret = s->enable[offset]; - } else if (addr_between(addr, s->threshold_base, 1)) { - ret = s->threshold; - } else if (addr_between(addr, s->claim_base, 1)) { - int pending_num = s->claim / 32; - s->pending[pending_num] &= ~(1 << (s->claim % 32)); - - /* Set the interrupt as claimed, but not completed */ - s->claimed[pending_num] |= 1 << (s->claim % 32); - - /* Return the current claimed interrupt */ - ret = s->claim; - - /* Clear the claimed interrupt */ - s->claim = 0x00000000; - - /* Update the interrupt status after the claim */ - ibex_plic_update(s); - } - - return ret; -} - -static void ibex_plic_write(void *opaque, hwaddr addr, - uint64_t value, unsigned int size) -{ - IbexPlicState *s = opaque; - - if (addr_between(addr, s->pending_base, s->pending_num)) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Pending registers are read only\n", __func__); - } else if (addr_between(addr, s->source_base, s->source_num)) { - qemu_log_mask(LOG_UNIMP, - "%s: Interrupt source mode not supported\n", __func__); - } else if (addr_between(addr, s->priority_base, s->priority_num)) { - uint32_t irq = ((addr - s->priority_base) >> 2) + 1; - s->priority[irq] = value & 7; - ibex_plic_update(s); - } else if (addr_between(addr, s->enable_base, s->enable_num)) { - uint32_t enable_reg = (addr - s->enable_base) / 4; - - s->enable[enable_reg] = value; - } else if (addr_between(addr, s->threshold_base, 1)) { - s->threshold = value & 3; - } else if (addr_between(addr, s->claim_base, 1)) { - if (s->claim == value) { - /* Interrupt was completed */ - s->claim = 0; - } - if (s->claimed[value / 32] & 1 << (value % 32)) { - int pending_num = value / 32; - - /* This value was already claimed, clear it. */ - s->claimed[pending_num] &= ~(1 << (value % 32)); - - if (s->hidden_pending[pending_num] & (1 << (value % 32))) { - /* - * If the bit in hidden_pending is set then that means we - * received an interrupt between claiming and completing - * the interrupt that hasn't since been de-asserted. - * On hardware this would trigger an interrupt, so let's - * trigger one here as well. - */ - s->pending[pending_num] |= 1 << (value % 32); - } - } - } - - ibex_plic_update(s); -} - -static const MemoryRegionOps ibex_plic_ops = { - .read = ibex_plic_read, - .write = ibex_plic_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 4, - .max_access_size = 4 - } -}; - -static void ibex_plic_irq_request(void *opaque, int irq, int level) -{ - IbexPlicState *s = opaque; - - ibex_plic_irqs_set_pending(s, irq, level > 0); - ibex_plic_update(s); -} - -static Property ibex_plic_properties[] = { - DEFINE_PROP_UINT32("num-cpus", IbexPlicState, num_cpus, 1), - DEFINE_PROP_UINT32("num-sources", IbexPlicState, num_sources, 80), - - DEFINE_PROP_UINT32("pending-base", IbexPlicState, pending_base, 0), - DEFINE_PROP_UINT32("pending-num", IbexPlicState, pending_num, 3), - - DEFINE_PROP_UINT32("source-base", IbexPlicState, source_base, 0x0c), - DEFINE_PROP_UINT32("source-num", IbexPlicState, source_num, 3), - - DEFINE_PROP_UINT32("priority-base", IbexPlicState, priority_base, 0x18), - DEFINE_PROP_UINT32("priority-num", IbexPlicState, priority_num, 80), - - DEFINE_PROP_UINT32("enable-base", IbexPlicState, enable_base, 0x200), - DEFINE_PROP_UINT32("enable-num", IbexPlicState, enable_num, 3), - - DEFINE_PROP_UINT32("threshold-base", IbexPlicState, threshold_base, 0x20c), - - DEFINE_PROP_UINT32("claim-base", IbexPlicState, claim_base, 0x210), - DEFINE_PROP_END_OF_LIST(), -}; - -static void ibex_plic_init(Object *obj) -{ - IbexPlicState *s = IBEX_PLIC(obj); - - memory_region_init_io(&s->mmio, obj, &ibex_plic_ops, s, - TYPE_IBEX_PLIC, 0x400); - sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); -} - -static void ibex_plic_realize(DeviceState *dev, Error **errp) -{ - IbexPlicState *s = IBEX_PLIC(dev); - int i; - - s->pending = g_new0(uint32_t, s->pending_num); - s->hidden_pending = g_new0(uint32_t, s->pending_num); - s->claimed = g_new0(uint32_t, s->pending_num); - s->source = g_new0(uint32_t, s->source_num); - s->priority = g_new0(uint32_t, s->priority_num); - s->enable = g_new0(uint32_t, s->enable_num); - - qdev_init_gpio_in(dev, ibex_plic_irq_request, s->num_sources); - - /* - * We can't allow the supervisor to control SEIP as this would allow the - * supervisor to clear a pending external interrupt which will result in - * a lost interrupt in the case a PLIC is attached. The SEIP bit must be - * hardware controlled when a PLIC is attached. - */ - MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int smp_cpus = ms->smp.cpus; - for (i = 0; i < smp_cpus; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(i)); - if (riscv_cpu_claim_interrupts(cpu, MIP_SEIP) < 0) { - error_report("SEIP already claimed"); - exit(1); - } - } - - msi_nonbroken = true; -} - -static void ibex_plic_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = ibex_plic_reset; - device_class_set_props(dc, ibex_plic_properties); - dc->realize = ibex_plic_realize; -} - -static const TypeInfo ibex_plic_info = { - .name = TYPE_IBEX_PLIC, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(IbexPlicState), - .instance_init = ibex_plic_init, - .class_init = ibex_plic_class_init, -}; - -static void ibex_plic_register_types(void) -{ - type_register_static(&ibex_plic_info); -} - -type_init(ibex_plic_register_types) diff --git a/hw/intc/imx_gpcv2.c b/hw/intc/imx_gpcv2.c index 17007a40780..237d5f97eba 100644 --- a/hw/intc/imx_gpcv2.c +++ b/hw/intc/imx_gpcv2.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "hw/intc/imx_gpcv2.h" #include "migration/vmstate.h" -#include "qemu/log.h" #include "qemu/module.h" #define GPC_PU_PGC_SW_PUP_REQ 0x0f8 diff --git a/hw/intc/lm32_pic.c b/hw/intc/lm32_pic.c deleted file mode 100644 index 991a90bc99e..00000000000 --- a/hw/intc/lm32_pic.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * LatticeMico32 CPU interrupt controller logic. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" - -#include "migration/vmstate.h" -#include "monitor/monitor.h" -#include "qemu/module.h" -#include "hw/sysbus.h" -#include "trace.h" -#include "hw/lm32/lm32_pic.h" -#include "hw/intc/intc.h" -#include "hw/irq.h" -#include "qom/object.h" - -#define TYPE_LM32_PIC "lm32-pic" -OBJECT_DECLARE_SIMPLE_TYPE(LM32PicState, LM32_PIC) - -struct LM32PicState { - SysBusDevice parent_obj; - - qemu_irq parent_irq; - uint32_t im; /* interrupt mask */ - uint32_t ip; /* interrupt pending */ - uint32_t irq_state; - - /* statistics */ - uint64_t stats_irq_count[32]; -}; - -static void update_irq(LM32PicState *s) -{ - s->ip |= s->irq_state; - - if (s->ip & s->im) { - trace_lm32_pic_raise_irq(); - qemu_irq_raise(s->parent_irq); - } else { - trace_lm32_pic_lower_irq(); - qemu_irq_lower(s->parent_irq); - } -} - -static void irq_handler(void *opaque, int irq, int level) -{ - LM32PicState *s = opaque; - - assert(irq < 32); - trace_lm32_pic_interrupt(irq, level); - - if (level) { - s->irq_state |= (1 << irq); - s->stats_irq_count[irq]++; - } else { - s->irq_state &= ~(1 << irq); - } - - update_irq(s); -} - -void lm32_pic_set_im(DeviceState *d, uint32_t im) -{ - LM32PicState *s = LM32_PIC(d); - - trace_lm32_pic_set_im(im); - s->im = im; - - update_irq(s); -} - -void lm32_pic_set_ip(DeviceState *d, uint32_t ip) -{ - LM32PicState *s = LM32_PIC(d); - - trace_lm32_pic_set_ip(ip); - - /* ack interrupt */ - s->ip &= ~ip; - - update_irq(s); -} - -uint32_t lm32_pic_get_im(DeviceState *d) -{ - LM32PicState *s = LM32_PIC(d); - - trace_lm32_pic_get_im(s->im); - return s->im; -} - -uint32_t lm32_pic_get_ip(DeviceState *d) -{ - LM32PicState *s = LM32_PIC(d); - - trace_lm32_pic_get_ip(s->ip); - return s->ip; -} - -static void pic_reset(DeviceState *d) -{ - LM32PicState *s = LM32_PIC(d); - int i; - - s->im = 0; - s->ip = 0; - s->irq_state = 0; - for (i = 0; i < 32; i++) { - s->stats_irq_count[i] = 0; - } -} - -static bool lm32_get_statistics(InterruptStatsProvider *obj, - uint64_t **irq_counts, unsigned int *nb_irqs) -{ - LM32PicState *s = LM32_PIC(obj); - *irq_counts = s->stats_irq_count; - *nb_irqs = ARRAY_SIZE(s->stats_irq_count); - return true; -} - -static void lm32_print_info(InterruptStatsProvider *obj, Monitor *mon) -{ - LM32PicState *s = LM32_PIC(obj); - monitor_printf(mon, "lm32-pic: im=%08x ip=%08x irq_state=%08x\n", - s->im, s->ip, s->irq_state); -} - -static void lm32_pic_init(Object *obj) -{ - DeviceState *dev = DEVICE(obj); - LM32PicState *s = LM32_PIC(obj); - SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - - qdev_init_gpio_in(dev, irq_handler, 32); - sysbus_init_irq(sbd, &s->parent_irq); -} - -static const VMStateDescription vmstate_lm32_pic = { - .name = "lm32-pic", - .version_id = 2, - .minimum_version_id = 2, - .fields = (VMStateField[]) { - VMSTATE_UINT32(im, LM32PicState), - VMSTATE_UINT32(ip, LM32PicState), - VMSTATE_UINT32(irq_state, LM32PicState), - VMSTATE_UINT64_ARRAY(stats_irq_count, LM32PicState, 32), - VMSTATE_END_OF_LIST() - } -}; - -static void lm32_pic_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); - - dc->reset = pic_reset; - dc->vmsd = &vmstate_lm32_pic; - ic->get_statistics = lm32_get_statistics; - ic->print_info = lm32_print_info; -} - -static const TypeInfo lm32_pic_info = { - .name = TYPE_LM32_PIC, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(LM32PicState), - .instance_init = lm32_pic_init, - .class_init = lm32_pic_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_INTERRUPT_STATS_PROVIDER }, - { } - }, -}; - -static void lm32_pic_register_types(void) -{ - type_register_static(&lm32_pic_info); -} - -type_init(lm32_pic_register_types) diff --git a/hw/intc/m68k_irqc.c b/hw/intc/m68k_irqc.c index 2133d2a698a..0c515e4ecb7 100644 --- a/hw/intc/m68k_irqc.c +++ b/hw/intc/m68k_irqc.c @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * QEMU Motorola 680x0 IRQ Controller * diff --git a/hw/intc/meson.build b/hw/intc/meson.build index 1c299039f65..c89d2ca180e 100644 --- a/hw/intc/meson.build +++ b/hw/intc/meson.build @@ -8,16 +8,15 @@ softmmu_ss.add(when: 'CONFIG_ARM_GIC', if_true: files( 'arm_gicv3_dist.c', 'arm_gicv3_its_common.c', 'arm_gicv3_redist.c', + 'arm_gicv3_its.c', )) softmmu_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_pic.c')) softmmu_ss.add(when: 'CONFIG_HEATHROW_PIC', if_true: files('heathrow_pic.c')) softmmu_ss.add(when: 'CONFIG_I8259', if_true: files('i8259_common.c', 'i8259.c')) softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_avic.c', 'imx_gpcv2.c')) softmmu_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic_common.c')) -softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_pic.c')) softmmu_ss.add(when: 'CONFIG_OPENPIC', if_true: files('openpic.c')) softmmu_ss.add(when: 'CONFIG_PL190', if_true: files('pl190.c')) -softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_intc.c')) softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_gic.c')) softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_intctl.c')) softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_intc.c')) @@ -33,7 +32,6 @@ specific_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m_nvic.c')) specific_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_vic.c')) specific_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_gic.c', 'exynos4210_combiner.c')) specific_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_irqmp.c')) -specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_plic.c')) specific_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c')) specific_ss.add(when: 'CONFIG_LOONGSON_LIOINTC', if_true: files('loongson_liointc.c')) specific_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('mips_gic.c')) @@ -48,7 +46,7 @@ specific_ss.add(when: 'CONFIG_RX_ICU', if_true: files('rx_icu.c')) specific_ss.add(when: 'CONFIG_S390_FLIC', if_true: files('s390_flic.c')) specific_ss.add(when: 'CONFIG_S390_FLIC_KVM', if_true: files('s390_flic_kvm.c')) specific_ss.add(when: 'CONFIG_SH_INTC', if_true: files('sh_intc.c')) -specific_ss.add(when: 'CONFIG_SIFIVE_CLINT', if_true: files('sifive_clint.c')) +specific_ss.add(when: 'CONFIG_RISCV_ACLINT', if_true: files('riscv_aclint.c')) specific_ss.add(when: 'CONFIG_SIFIVE_PLIC', if_true: files('sifive_plic.c')) specific_ss.add(when: 'CONFIG_XICS', if_true: files('xics.c')) specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XICS'], diff --git a/hw/intc/ompic.c b/hw/intc/ompic.c index 1731a106838..1f10314807d 100644 --- a/hw/intc/ompic.c +++ b/hw/intc/ompic.c @@ -7,7 +7,6 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/module.h" #include "qapi/error.h" #include "hw/irq.h" diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c index 65970e1b378..49504e740f3 100644 --- a/hw/intc/openpic.c +++ b/hw/intc/openpic.c @@ -25,12 +25,8 @@ /* * * Based on OpenPic implementations: - * - Intel GW80314 I/O companion chip developer's manual * - Motorola MPC8245 & MPC8540 user manuals. - * - Motorola MCP750 (aka Raven) programmer manual. - * - Motorola Harrier programmer manuel - * - * Serial interrupts, as implemented in Raven chipset are not supported yet. + * - Motorola Harrier programmer manual * */ @@ -47,12 +43,11 @@ #include "qapi/error.h" #include "qemu/bitops.h" #include "qapi/qmp/qerror.h" -#include "qemu/log.h" #include "qemu/module.h" #include "qemu/timer.h" #include "qemu/error-report.h" -//#define DEBUG_OPENPIC +/* #define DEBUG_OPENPIC */ #ifdef DEBUG_OPENPIC static const int debug_openpic = 1; @@ -123,7 +118,8 @@ static FslMpicInfo fsl_mpic_42 = { #define ILR_INTTGT_CINT 0x01 /* critical */ #define ILR_INTTGT_MCP 0x02 /* machine check */ -/* The currently supported INTTGT values happen to be the same as QEMU's +/* + * The currently supported INTTGT values happen to be the same as QEMU's * openpic output codes, but don't depend on this. The output codes * could change (unlikely, but...) or support could be added for * more INTTGT values. @@ -182,10 +178,11 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, uint32_t val, int idx); static void openpic_reset(DeviceState *d); -/* Convert between openpic clock ticks and nanosecs. In the hardware the clock - frequency is driven by board inputs to the PIC which the PIC would then - divide by 4 or 8. For now hard code to 25MZ. -*/ +/* + * Convert between openpic clock ticks and nanosecs. In the hardware the clock + * frequency is driven by board inputs to the PIC which the PIC would then + * divide by 4 or 8. For now hard code to 25MZ. + */ #define OPENPIC_TIMER_FREQ_MHZ 25 #define OPENPIC_TIMER_NS_PER_TICK (1000 / OPENPIC_TIMER_FREQ_MHZ) static inline uint64_t ns_to_ticks(uint64_t ns) @@ -258,7 +255,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, __func__, src->output, n_IRQ, active, was_active, dst->outputs_active[src->output]); - /* On Freescale MPIC, critical interrupts ignore priority, + /* + * On Freescale MPIC, critical interrupts ignore priority, * IACK, EOI, etc. Before MPIC v4.1 they also ignore * masking. */ @@ -281,7 +279,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, priority = IVPR_PRIORITY(src->ivpr); - /* Even if the interrupt doesn't have enough priority, + /* + * Even if the interrupt doesn't have enough priority, * it is still raised, in case ctpr is lowered later. */ if (active) { @@ -413,7 +412,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) } if (src->output != OPENPIC_OUTPUT_INT) { - /* Edge-triggered interrupts shouldn't be used + /* + * Edge-triggered interrupts shouldn't be used * with non-INT delivery, but just in case, * try to make it do something sane rather than * cause an interrupt storm. This is close to @@ -506,7 +506,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val) { uint32_t mask; - /* NOTE when implementing newer FSL MPIC models: starting with v4.0, + /* + * NOTE when implementing newer FSL MPIC models: starting with v4.0, * the polarity bit is read-only on internal interrupts. */ mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK | @@ -516,7 +517,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val) opp->src[n_IRQ].ivpr = (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask); - /* For FSL internal interrupts, The sense bit is reserved and zero, + /* + * For FSL internal interrupts, The sense bit is reserved and zero, * and the interrupt is always level-triggered. Timers and IPIs * have no sense or polarity bits, and are edge-triggered. */ @@ -700,16 +702,20 @@ static void qemu_timer_cb(void *opaque) openpic_set_irq(opp, n_IRQ, 0); } -/* If enabled is true, arranges for an interrupt to be raised val clocks into - the future, if enabled is false cancels the timer. */ +/* + * If enabled is true, arranges for an interrupt to be raised val clocks into + * the future, if enabled is false cancels the timer. + */ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled) { uint64_t ns = ticks_to_ns(val & ~TCCR_TOG); - /* A count of zero causes a timer to be set to expire immediately. This - effectively stops the simulation since the timer is constantly expiring - which prevents guest code execution, so we don't honor that - configuration. On real hardware, this situation would generate an - interrupt on every clock cycle if the interrupt was unmasked. */ + /* + * A count of zero causes a timer to be set to expire immediately. This + * effectively stops the simulation since the timer is constantly expiring + * which prevents guest code execution, so we don't honor that + * configuration. On real hardware, this situation would generate an + * interrupt on every clock cycle if the interrupt was unmasked. + */ if ((ns == 0) || !enabled) { tmr->qemu_timer_active = false; tmr->tccr = tmr->tccr & TCCR_TOG; @@ -722,8 +728,10 @@ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled) } } -/* Returns the currrent tccr value, i.e., timer value (in clocks) with - appropriate TOG. */ +/* + * Returns the currrent tccr value, i.e., timer value (in clocks) with + * appropriate TOG. + */ static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr) { uint64_t retval; @@ -1277,6 +1285,15 @@ static void openpic_reset(DeviceState *d) break; } + /* Mask all IPI interrupts for Freescale OpenPIC */ + if ((opp->model == OPENPIC_MODEL_FSL_MPIC_20) || + (opp->model == OPENPIC_MODEL_FSL_MPIC_42)) { + if (i >= opp->irq_ipi0 && i < opp->irq_tim0) { + write_IRQreg_idr(opp, i, 0); + continue; + } + } + write_IRQreg_idr(opp, i, opp->idr_reset); } /* Initialise IRQ destinations */ @@ -1305,7 +1322,7 @@ static void openpic_reset(DeviceState *d) typedef struct MemReg { const char *name; MemoryRegionOps const *ops; - hwaddr start_addr; + hwaddr start_addr; ram_addr_t size; } MemReg; @@ -1556,28 +1573,6 @@ static void openpic_realize(DeviceState *dev, Error **errp) break; - case OPENPIC_MODEL_RAVEN: - opp->nb_irqs = RAVEN_MAX_EXT; - opp->vid = VID_REVISION_1_3; - opp->vir = VIR_GENERIC; - opp->vector_mask = 0xFF; - opp->tfrr_reset = 4160000; - opp->ivpr_reset = IVPR_MASK_MASK | IVPR_MODE_MASK; - opp->idr_reset = 0; - opp->max_irq = RAVEN_MAX_IRQ; - opp->irq_ipi0 = RAVEN_IPI_IRQ; - opp->irq_tim0 = RAVEN_TMR_IRQ; - opp->brr1 = -1; - opp->mpic_mode_mask = GCR_MODE_MIXED; - - if (opp->nb_cpus != 1) { - error_setg(errp, "Only UP supported today"); - return; - } - - map_list(opp, list_le, &list_count); - break; - case OPENPIC_MODEL_KEYLARGO: opp->nb_irqs = KEYLARGO_MAX_EXT; opp->vid = VID_REVISION_1_2; diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c index e1a39e33cb1..557dd0c2bf5 100644 --- a/hw/intc/openpic_kvm.c +++ b/hw/intc/openpic_kvm.c @@ -24,9 +24,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include -#include "exec/address-spaces.h" #include "hw/ppc/openpic.h" #include "hw/ppc/openpic_kvm.h" #include "hw/pci/msi.h" @@ -236,6 +234,7 @@ static void kvm_openpic_realize(DeviceState *dev, Error **errp) opp->mem_listener.region_add = kvm_openpic_region_add; opp->mem_listener.region_del = kvm_openpic_region_del; + opp->mem_listener.name = "openpic-kvm"; memory_listener_register(&opp->mem_listener, &address_space_memory); /* indicate pic capabilities */ diff --git a/hw/intc/ppc-uic.c b/hw/intc/ppc-uic.c index 7171de7b355..60013f2dde3 100644 --- a/hw/intc/ppc-uic.c +++ b/hw/intc/ppc-uic.c @@ -23,7 +23,7 @@ */ #include "qemu/osdep.h" -#include "include/hw/intc/ppc-uic.h" +#include "hw/intc/ppc-uic.h" #include "hw/irq.h" #include "cpu.h" #include "hw/ppc/ppc.h" diff --git a/hw/intc/puv3_intc.c b/hw/intc/puv3_intc.c deleted file mode 100644 index 65226f5e7c4..00000000000 --- a/hw/intc/puv3_intc.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * INTC device simulation in PKUnity SoC - * - * Copyright (C) 2010-2012 Guan Xuetao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation, or any later version. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/sysbus.h" -#include "qom/object.h" - -#undef DEBUG_PUV3 -#include "hw/unicore32/puv3.h" -#include "qemu/module.h" -#include "qemu/log.h" - -#define TYPE_PUV3_INTC "puv3_intc" -OBJECT_DECLARE_SIMPLE_TYPE(PUV3INTCState, PUV3_INTC) - -struct PUV3INTCState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - qemu_irq parent_irq; - - uint32_t reg_ICMR; - uint32_t reg_ICPR; -}; - -/* Update interrupt status after enabled or pending bits have been changed. */ -static void puv3_intc_update(PUV3INTCState *s) -{ - if (s->reg_ICMR & s->reg_ICPR) { - qemu_irq_raise(s->parent_irq); - } else { - qemu_irq_lower(s->parent_irq); - } -} - -/* Process a change in an external INTC input. */ -static void puv3_intc_handler(void *opaque, int irq, int level) -{ - PUV3INTCState *s = opaque; - - DPRINTF("irq 0x%x, level 0x%x\n", irq, level); - if (level) { - s->reg_ICPR |= (1 << irq); - } else { - s->reg_ICPR &= ~(1 << irq); - } - puv3_intc_update(s); -} - -static uint64_t puv3_intc_read(void *opaque, hwaddr offset, - unsigned size) -{ - PUV3INTCState *s = opaque; - uint32_t ret = 0; - - switch (offset) { - case 0x04: /* INTC_ICMR */ - ret = s->reg_ICMR; - break; - case 0x0c: /* INTC_ICIP */ - ret = s->reg_ICPR; /* the same value with ICPR */ - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad read offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } - DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); - return ret; -} - -static void puv3_intc_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) -{ - PUV3INTCState *s = opaque; - - DPRINTF("offset 0x%x, value 0x%x\n", offset, value); - switch (offset) { - case 0x00: /* INTC_ICLR */ - case 0x14: /* INTC_ICCR */ - break; - case 0x04: /* INTC_ICMR */ - s->reg_ICMR = value; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad write offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - return; - } - puv3_intc_update(s); -} - -static const MemoryRegionOps puv3_intc_ops = { - .read = puv3_intc_read, - .write = puv3_intc_write, - .impl = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void puv3_intc_realize(DeviceState *dev, Error **errp) -{ - PUV3INTCState *s = PUV3_INTC(dev); - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - - qdev_init_gpio_in(dev, puv3_intc_handler, PUV3_IRQS_NR); - sysbus_init_irq(sbd, &s->parent_irq); - - s->reg_ICMR = 0; - s->reg_ICPR = 0; - - memory_region_init_io(&s->iomem, OBJECT(s), &puv3_intc_ops, s, "puv3_intc", - PUV3_REGS_OFFSET); - sysbus_init_mmio(sbd, &s->iomem); -} - -static void puv3_intc_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - dc->realize = puv3_intc_realize; -} - -static const TypeInfo puv3_intc_info = { - .name = TYPE_PUV3_INTC, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(PUV3INTCState), - .class_init = puv3_intc_class_init, -}; - -static void puv3_intc_register_type(void) -{ - type_register_static(&puv3_intc_info); -} - -type_init(puv3_intc_register_type) diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c new file mode 100644 index 00000000000..f1a5d3d284f --- /dev/null +++ b/hw/intc/riscv_aclint.c @@ -0,0 +1,460 @@ +/* + * RISC-V ACLINT (Advanced Core Local Interruptor) + * URL: https://github.com/riscv/riscv-aclint + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017 SiFive, Inc. + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * This provides real-time clock, timer and interprocessor interrupts. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/sysbus.h" +#include "target/riscv/cpu.h" +#include "hw/qdev-properties.h" +#include "hw/intc/riscv_aclint.h" +#include "qemu/timer.h" +#include "hw/irq.h" + +typedef struct riscv_aclint_mtimer_callback { + RISCVAclintMTimerState *s; + int num; +} riscv_aclint_mtimer_callback; + +static uint64_t cpu_riscv_read_rtc(uint32_t timebase_freq) +{ + return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + timebase_freq, NANOSECONDS_PER_SECOND); +} + +/* + * Called when timecmp is written to update the QEMU timer or immediately + * trigger timer interrupt if mtimecmp <= current timer value. + */ +static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, + RISCVCPU *cpu, + int hartid, + uint64_t value, + uint32_t timebase_freq) +{ + uint64_t next; + uint64_t diff; + + uint64_t rtc_r = cpu_riscv_read_rtc(timebase_freq); + + cpu->env.timecmp = value; + if (cpu->env.timecmp <= rtc_r) { + /* + * If we're setting an MTIMECMP value in the "past", + * immediately raise the timer interrupt + */ + qemu_irq_raise(mtimer->timer_irqs[hartid - mtimer->hartid_base]); + return; + } + + /* otherwise, set up the future timer interrupt */ + qemu_irq_lower(mtimer->timer_irqs[hartid - mtimer->hartid_base]); + diff = cpu->env.timecmp - rtc_r; + /* back to ns (note args switched in muldiv64) */ + uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq); + + /* + * check if ns_diff overflowed and check if the addition would potentially + * overflow + */ + if ((NANOSECONDS_PER_SECOND > timebase_freq && ns_diff < diff) || + ns_diff > INT64_MAX) { + next = INT64_MAX; + } else { + /* + * as it is very unlikely qemu_clock_get_ns will return a value + * greater than INT64_MAX, no additional check is needed for an + * unsigned integer overflow. + */ + next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns_diff; + /* + * if ns_diff is INT64_MAX next may still be outside the range + * of a signed integer. + */ + next = MIN(next, INT64_MAX); + } + + timer_mod(cpu->env.timer, next); +} + +/* + * Callback used when the timer set using timer_mod expires. + * Should raise the timer interrupt line + */ +static void riscv_aclint_mtimer_cb(void *opaque) +{ + riscv_aclint_mtimer_callback *state = opaque; + + qemu_irq_raise(state->s->timer_irqs[state->num]); +} + +/* CPU read MTIMER register */ +static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr, + unsigned size) +{ + RISCVAclintMTimerState *mtimer = opaque; + + if (addr >= mtimer->timecmp_base && + addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { + size_t hartid = mtimer->hartid_base + + ((addr - mtimer->timecmp_base) >> 3); + CPUState *cpu = qemu_get_cpu(hartid); + CPURISCVState *env = cpu ? cpu->env_ptr : NULL; + if (!env) { + qemu_log_mask(LOG_GUEST_ERROR, + "aclint-mtimer: invalid hartid: %zu", hartid); + } else if ((addr & 0x7) == 0) { + /* timecmp_lo */ + uint64_t timecmp = env->timecmp; + return timecmp & 0xFFFFFFFF; + } else if ((addr & 0x7) == 4) { + /* timecmp_hi */ + uint64_t timecmp = env->timecmp; + return (timecmp >> 32) & 0xFFFFFFFF; + } else { + qemu_log_mask(LOG_UNIMP, + "aclint-mtimer: invalid read: %08x", (uint32_t)addr); + return 0; + } + } else if (addr == mtimer->time_base) { + /* time_lo */ + return cpu_riscv_read_rtc(mtimer->timebase_freq) & 0xFFFFFFFF; + } else if (addr == mtimer->time_base + 4) { + /* time_hi */ + return (cpu_riscv_read_rtc(mtimer->timebase_freq) >> 32) & 0xFFFFFFFF; + } + + qemu_log_mask(LOG_UNIMP, + "aclint-mtimer: invalid read: %08x", (uint32_t)addr); + return 0; +} + +/* CPU write MTIMER register */ +static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + RISCVAclintMTimerState *mtimer = opaque; + + if (addr >= mtimer->timecmp_base && + addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { + size_t hartid = mtimer->hartid_base + + ((addr - mtimer->timecmp_base) >> 3); + CPUState *cpu = qemu_get_cpu(hartid); + CPURISCVState *env = cpu ? cpu->env_ptr : NULL; + if (!env) { + qemu_log_mask(LOG_GUEST_ERROR, + "aclint-mtimer: invalid hartid: %zu", hartid); + } else if ((addr & 0x7) == 0) { + /* timecmp_lo */ + uint64_t timecmp_hi = env->timecmp >> 32; + riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid, + timecmp_hi << 32 | (value & 0xFFFFFFFF), + mtimer->timebase_freq); + return; + } else if ((addr & 0x7) == 4) { + /* timecmp_hi */ + uint64_t timecmp_lo = env->timecmp; + riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid, + value << 32 | (timecmp_lo & 0xFFFFFFFF), + mtimer->timebase_freq); + } else { + qemu_log_mask(LOG_UNIMP, + "aclint-mtimer: invalid timecmp write: %08x", + (uint32_t)addr); + } + return; + } else if (addr == mtimer->time_base) { + /* time_lo */ + qemu_log_mask(LOG_UNIMP, + "aclint-mtimer: time_lo write not implemented"); + return; + } else if (addr == mtimer->time_base + 4) { + /* time_hi */ + qemu_log_mask(LOG_UNIMP, + "aclint-mtimer: time_hi write not implemented"); + return; + } + + qemu_log_mask(LOG_UNIMP, + "aclint-mtimer: invalid write: %08x", (uint32_t)addr); +} + +static const MemoryRegionOps riscv_aclint_mtimer_ops = { + .read = riscv_aclint_mtimer_read, + .write = riscv_aclint_mtimer_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8 + } +}; + +static Property riscv_aclint_mtimer_properties[] = { + DEFINE_PROP_UINT32("hartid-base", RISCVAclintMTimerState, + hartid_base, 0), + DEFINE_PROP_UINT32("num-harts", RISCVAclintMTimerState, num_harts, 1), + DEFINE_PROP_UINT32("timecmp-base", RISCVAclintMTimerState, + timecmp_base, RISCV_ACLINT_DEFAULT_MTIMECMP), + DEFINE_PROP_UINT32("time-base", RISCVAclintMTimerState, + time_base, RISCV_ACLINT_DEFAULT_MTIME), + DEFINE_PROP_UINT32("aperture-size", RISCVAclintMTimerState, + aperture_size, RISCV_ACLINT_DEFAULT_MTIMER_SIZE), + DEFINE_PROP_UINT32("timebase-freq", RISCVAclintMTimerState, + timebase_freq, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) +{ + RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev); + int i; + + memory_region_init_io(&s->mmio, OBJECT(dev), &riscv_aclint_mtimer_ops, + s, TYPE_RISCV_ACLINT_MTIMER, s->aperture_size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio); + + s->timer_irqs = g_malloc(sizeof(qemu_irq) * s->num_harts); + qdev_init_gpio_out(dev, s->timer_irqs, s->num_harts); + + /* Claim timer interrupt bits */ + for (i = 0; i < s->num_harts; i++) { + RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i)); + if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) { + error_report("MTIP already claimed"); + exit(1); + } + } +} + +static void riscv_aclint_mtimer_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = riscv_aclint_mtimer_realize; + device_class_set_props(dc, riscv_aclint_mtimer_properties); +} + +static const TypeInfo riscv_aclint_mtimer_info = { + .name = TYPE_RISCV_ACLINT_MTIMER, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(RISCVAclintMTimerState), + .class_init = riscv_aclint_mtimer_class_init, +}; + +/* + * Create ACLINT MTIMER device. + */ +DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size, + uint32_t hartid_base, uint32_t num_harts, + uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq, + bool provide_rdtime) +{ + int i; + DeviceState *dev = qdev_new(TYPE_RISCV_ACLINT_MTIMER); + + assert(num_harts <= RISCV_ACLINT_MAX_HARTS); + assert(!(addr & 0x7)); + assert(!(timecmp_base & 0x7)); + assert(!(time_base & 0x7)); + + qdev_prop_set_uint32(dev, "hartid-base", hartid_base); + qdev_prop_set_uint32(dev, "num-harts", num_harts); + qdev_prop_set_uint32(dev, "timecmp-base", timecmp_base); + qdev_prop_set_uint32(dev, "time-base", time_base); + qdev_prop_set_uint32(dev, "aperture-size", size); + qdev_prop_set_uint32(dev, "timebase-freq", timebase_freq); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + + for (i = 0; i < num_harts; i++) { + CPUState *cpu = qemu_get_cpu(hartid_base + i); + RISCVCPU *rvcpu = RISCV_CPU(cpu); + CPURISCVState *env = cpu ? cpu->env_ptr : NULL; + riscv_aclint_mtimer_callback *cb = + g_malloc0(sizeof(riscv_aclint_mtimer_callback)); + + if (!env) { + g_free(cb); + continue; + } + if (provide_rdtime) { + riscv_cpu_set_rdtime_fn(env, cpu_riscv_read_rtc, timebase_freq); + } + + cb->s = RISCV_ACLINT_MTIMER(dev); + cb->num = i; + env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + &riscv_aclint_mtimer_cb, cb); + env->timecmp = 0; + + qdev_connect_gpio_out(dev, i, + qdev_get_gpio_in(DEVICE(rvcpu), IRQ_M_TIMER)); + } + + return dev; +} + +/* CPU read [M|S]SWI register */ +static uint64_t riscv_aclint_swi_read(void *opaque, hwaddr addr, + unsigned size) +{ + RISCVAclintSwiState *swi = opaque; + + if (addr < (swi->num_harts << 2)) { + size_t hartid = swi->hartid_base + (addr >> 2); + CPUState *cpu = qemu_get_cpu(hartid); + CPURISCVState *env = cpu ? cpu->env_ptr : NULL; + if (!env) { + qemu_log_mask(LOG_GUEST_ERROR, + "aclint-swi: invalid hartid: %zu", hartid); + } else if ((addr & 0x3) == 0) { + return (swi->sswi) ? 0 : ((env->mip & MIP_MSIP) > 0); + } + } + + qemu_log_mask(LOG_UNIMP, + "aclint-swi: invalid read: %08x", (uint32_t)addr); + return 0; +} + +/* CPU write [M|S]SWI register */ +static void riscv_aclint_swi_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + RISCVAclintSwiState *swi = opaque; + + if (addr < (swi->num_harts << 2)) { + size_t hartid = swi->hartid_base + (addr >> 2); + CPUState *cpu = qemu_get_cpu(hartid); + CPURISCVState *env = cpu ? cpu->env_ptr : NULL; + if (!env) { + qemu_log_mask(LOG_GUEST_ERROR, + "aclint-swi: invalid hartid: %zu", hartid); + } else if ((addr & 0x3) == 0) { + if (value & 0x1) { + qemu_irq_raise(swi->soft_irqs[hartid - swi->hartid_base]); + } else { + if (!swi->sswi) { + qemu_irq_lower(swi->soft_irqs[hartid - swi->hartid_base]); + } + } + return; + } + } + + qemu_log_mask(LOG_UNIMP, + "aclint-swi: invalid write: %08x", (uint32_t)addr); +} + +static const MemoryRegionOps riscv_aclint_swi_ops = { + .read = riscv_aclint_swi_read, + .write = riscv_aclint_swi_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4 + } +}; + +static Property riscv_aclint_swi_properties[] = { + DEFINE_PROP_UINT32("hartid-base", RISCVAclintSwiState, hartid_base, 0), + DEFINE_PROP_UINT32("num-harts", RISCVAclintSwiState, num_harts, 1), + DEFINE_PROP_UINT32("sswi", RISCVAclintSwiState, sswi, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void riscv_aclint_swi_realize(DeviceState *dev, Error **errp) +{ + RISCVAclintSwiState *swi = RISCV_ACLINT_SWI(dev); + int i; + + memory_region_init_io(&swi->mmio, OBJECT(dev), &riscv_aclint_swi_ops, swi, + TYPE_RISCV_ACLINT_SWI, RISCV_ACLINT_SWI_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &swi->mmio); + + swi->soft_irqs = g_malloc(sizeof(qemu_irq) * swi->num_harts); + qdev_init_gpio_out(dev, swi->soft_irqs, swi->num_harts); + + /* Claim software interrupt bits */ + for (i = 0; i < swi->num_harts; i++) { + RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(swi->hartid_base + i)); + /* We don't claim mip.SSIP because it is writeable by software */ + if (riscv_cpu_claim_interrupts(cpu, swi->sswi ? 0 : MIP_MSIP) < 0) { + error_report("MSIP already claimed"); + exit(1); + } + } +} + +static void riscv_aclint_swi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = riscv_aclint_swi_realize; + device_class_set_props(dc, riscv_aclint_swi_properties); +} + +static const TypeInfo riscv_aclint_swi_info = { + .name = TYPE_RISCV_ACLINT_SWI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(RISCVAclintSwiState), + .class_init = riscv_aclint_swi_class_init, +}; + +/* + * Create ACLINT [M|S]SWI device. + */ +DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base, + uint32_t num_harts, bool sswi) +{ + int i; + DeviceState *dev = qdev_new(TYPE_RISCV_ACLINT_SWI); + + assert(num_harts <= RISCV_ACLINT_MAX_HARTS); + assert(!(addr & 0x3)); + + qdev_prop_set_uint32(dev, "hartid-base", hartid_base); + qdev_prop_set_uint32(dev, "num-harts", num_harts); + qdev_prop_set_uint32(dev, "sswi", sswi ? true : false); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + + for (i = 0; i < num_harts; i++) { + CPUState *cpu = qemu_get_cpu(hartid_base + i); + RISCVCPU *rvcpu = RISCV_CPU(cpu); + + qdev_connect_gpio_out(dev, i, + qdev_get_gpio_in(DEVICE(rvcpu), + (sswi) ? IRQ_S_SOFT : IRQ_M_SOFT)); + } + + return dev; +} + +static void riscv_aclint_register_types(void) +{ + type_register_static(&riscv_aclint_mtimer_info); + type_register_static(&riscv_aclint_swi_info); +} + +type_init(riscv_aclint_register_types) diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c index aacdb1bbc26..74e02858d43 100644 --- a/hw/intc/s390_flic.c +++ b/hw/intc/s390_flic.c @@ -20,7 +20,6 @@ #include "hw/qdev-properties.h" #include "hw/s390x/css.h" #include "trace.h" -#include "cpu.h" #include "qapi/error.h" #include "hw/s390x/s390-virtio-ccw.h" diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c index b3fb9f83952..efe5054182c 100644 --- a/hw/intc/s390_flic_kvm.c +++ b/hw/intc/s390_flic_kvm.c @@ -11,13 +11,11 @@ */ #include "qemu/osdep.h" -#include "cpu.h" -#include "kvm_s390x.h" +#include "kvm/kvm_s390x.h" #include #include "qemu/error-report.h" #include "qemu/module.h" #include "qapi/error.h" -#include "hw/sysbus.h" #include "sysemu/kvm.h" #include "hw/s390x/s390_flic.h" #include "hw/s390x/adapter.h" diff --git a/hw/intc/sh_intc.c b/hw/intc/sh_intc.c index 72a55e32dd4..c9b0b0c1ecc 100644 --- a/hw/intc/sh_intc.c +++ b/hw/intc/sh_intc.c @@ -9,40 +9,37 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "cpu.h" #include "hw/sh4/sh_intc.h" #include "hw/irq.h" #include "hw/sh4/sh.h" - -//#define DEBUG_INTC -//#define DEBUG_INTC_SOURCES - -#define INTC_A7(x) ((x) & 0x1fffffff) +#include "trace.h" void sh_intc_toggle_source(struct intc_source *source, - int enable_adj, int assert_adj) + int enable_adj, int assert_adj) { int enable_changed = 0; int pending_changed = 0; int old_pending; - if ((source->enable_count == source->enable_max) && (enable_adj == -1)) + if (source->enable_count == source->enable_max && enable_adj == -1) { enable_changed = -1; - + } source->enable_count += enable_adj; - if (source->enable_count == source->enable_max) + if (source->enable_count == source->enable_max) { enable_changed = 1; - + } source->asserted += assert_adj; old_pending = source->pending; source->pending = source->asserted && (source->enable_count == source->enable_max); - if (old_pending != source->pending) + if (old_pending != source->pending) { pending_changed = 1; - + } if (pending_changed) { if (source->pending) { source->parent->pending++; @@ -54,35 +51,30 @@ void sh_intc_toggle_source(struct intc_source *source, if (source->parent->pending == 0) { cpu_reset_interrupt(first_cpu, CPU_INTERRUPT_HARD); } - } + } } - if (enable_changed || assert_adj || pending_changed) { -#ifdef DEBUG_INTC_SOURCES - printf("sh_intc: (%d/%d/%d/%d) interrupt source 0x%x %s%s%s\n", - source->parent->pending, - source->asserted, - source->enable_count, - source->enable_max, - source->vect, - source->asserted ? "asserted " : - assert_adj ? "deasserted" : "", - enable_changed == 1 ? "enabled " : - enable_changed == -1 ? "disabled " : "", - source->pending ? "pending" : ""); -#endif - } + if (enable_changed || assert_adj || pending_changed) { + trace_sh_intc_sources(source->parent->pending, source->asserted, + source->enable_count, source->enable_max, + source->vect, source->asserted ? "asserted " : + assert_adj ? "deasserted" : "", + enable_changed == 1 ? "enabled " : + enable_changed == -1 ? "disabled " : "", + source->pending ? "pending" : ""); + } } -static void sh_intc_set_irq (void *opaque, int n, int level) +static void sh_intc_set_irq(void *opaque, int n, int level) { - struct intc_desc *desc = opaque; - struct intc_source *source = &(desc->sources[n]); + struct intc_desc *desc = opaque; + struct intc_source *source = &desc->sources[n]; - if (level && !source->asserted) - sh_intc_toggle_source(source, 0, 1); - else if (!level && source->asserted) - sh_intc_toggle_source(source, 0, -1); + if (level && !source->asserted) { + sh_intc_toggle_source(source, 0, 1); + } else if (!level && source->asserted) { + sh_intc_toggle_source(source, 0, -1); + } } int sh_intc_get_pending_vector(struct intc_desc *desc, int imask) @@ -97,147 +89,124 @@ int sh_intc_get_pending_vector(struct intc_desc *desc, int imask) } for (i = 0; i < desc->nr_sources; i++) { - struct intc_source *source = desc->sources + i; + struct intc_source *source = &desc->sources[i]; - if (source->pending) { -#ifdef DEBUG_INTC_SOURCES - printf("sh_intc: (%d) returning interrupt source 0x%x\n", - desc->pending, source->vect); -#endif + if (source->pending) { + trace_sh_intc_pending(desc->pending, source->vect); return source->vect; - } + } } - - abort(); + g_assert_not_reached(); } -#define INTC_MODE_NONE 0 -#define INTC_MODE_DUAL_SET 1 -#define INTC_MODE_DUAL_CLR 2 -#define INTC_MODE_ENABLE_REG 3 -#define INTC_MODE_MASK_REG 4 -#define INTC_MODE_IS_PRIO 8 - -static unsigned int sh_intc_mode(unsigned long address, - unsigned long set_reg, unsigned long clr_reg) +typedef enum { + INTC_MODE_NONE, + INTC_MODE_DUAL_SET, + INTC_MODE_DUAL_CLR, + INTC_MODE_ENABLE_REG, + INTC_MODE_MASK_REG, +} SHIntCMode; +#define INTC_MODE_IS_PRIO 0x80 + +static SHIntCMode sh_intc_mode(unsigned long address, unsigned long set_reg, + unsigned long clr_reg) { - if ((address != INTC_A7(set_reg)) && - (address != INTC_A7(clr_reg))) + if (address != A7ADDR(set_reg) && address != A7ADDR(clr_reg)) { return INTC_MODE_NONE; - + } if (set_reg && clr_reg) { - if (address == INTC_A7(set_reg)) - return INTC_MODE_DUAL_SET; - else - return INTC_MODE_DUAL_CLR; + return address == A7ADDR(set_reg) ? + INTC_MODE_DUAL_SET : INTC_MODE_DUAL_CLR; } - - if (set_reg) - return INTC_MODE_ENABLE_REG; - else - return INTC_MODE_MASK_REG; + return set_reg ? INTC_MODE_ENABLE_REG : INTC_MODE_MASK_REG; } static void sh_intc_locate(struct intc_desc *desc, - unsigned long address, - unsigned long **datap, - intc_enum **enums, - unsigned int *first, - unsigned int *width, - unsigned int *modep) + unsigned long address, + unsigned long **datap, + intc_enum **enums, + unsigned int *first, + unsigned int *width, + unsigned int *modep) { - unsigned int i, mode; + SHIntCMode mode; + unsigned int i; /* this is slow but works for now */ if (desc->mask_regs) { for (i = 0; i < desc->nr_mask_regs; i++) { - struct intc_mask_reg *mr = desc->mask_regs + i; - - mode = sh_intc_mode(address, mr->set_reg, mr->clr_reg); - if (mode == INTC_MODE_NONE) - continue; - - *modep = mode; - *datap = &mr->value; - *enums = mr->enum_ids; - *first = mr->reg_width - 1; - *width = 1; - return; - } + struct intc_mask_reg *mr = &desc->mask_regs[i]; + + mode = sh_intc_mode(address, mr->set_reg, mr->clr_reg); + if (mode != INTC_MODE_NONE) { + *modep = mode; + *datap = &mr->value; + *enums = mr->enum_ids; + *first = mr->reg_width - 1; + *width = 1; + return; + } + } } if (desc->prio_regs) { for (i = 0; i < desc->nr_prio_regs; i++) { - struct intc_prio_reg *pr = desc->prio_regs + i; - - mode = sh_intc_mode(address, pr->set_reg, pr->clr_reg); - if (mode == INTC_MODE_NONE) - continue; - - *modep = mode | INTC_MODE_IS_PRIO; - *datap = &pr->value; - *enums = pr->enum_ids; - *first = (pr->reg_width / pr->field_width) - 1; - *width = pr->field_width; - return; - } + struct intc_prio_reg *pr = &desc->prio_regs[i]; + + mode = sh_intc_mode(address, pr->set_reg, pr->clr_reg); + if (mode != INTC_MODE_NONE) { + *modep = mode | INTC_MODE_IS_PRIO; + *datap = &pr->value; + *enums = pr->enum_ids; + *first = pr->reg_width / pr->field_width - 1; + *width = pr->field_width; + return; + } + } } - - abort(); + g_assert_not_reached(); } static void sh_intc_toggle_mask(struct intc_desc *desc, intc_enum id, - int enable, int is_group) + int enable, int is_group) { - struct intc_source *source = desc->sources + id; - - if (!id) - return; + struct intc_source *source = &desc->sources[id]; + if (!id) { + return; + } if (!source->next_enum_id && (!source->enable_max || !source->vect)) { -#ifdef DEBUG_INTC_SOURCES - printf("sh_intc: reserved interrupt source %d modified\n", id); -#endif - return; + qemu_log_mask(LOG_UNIMP, + "sh_intc: reserved interrupt source %d modified\n", id); + return; } - if (source->vect) + if (source->vect) { sh_intc_toggle_source(source, enable ? 1 : -1, 0); - -#ifdef DEBUG_INTC - else { - printf("setting interrupt group %d to %d\n", id, !!enable); } -#endif if ((is_group || !source->vect) && source->next_enum_id) { sh_intc_toggle_mask(desc, source->next_enum_id, enable, 1); } -#ifdef DEBUG_INTC if (!source->vect) { - printf("setting interrupt group %d to %d - done\n", id, !!enable); + trace_sh_intc_set(id, !!enable); } -#endif } -static uint64_t sh_intc_read(void *opaque, hwaddr offset, - unsigned size) +static uint64_t sh_intc_read(void *opaque, hwaddr offset, unsigned size) { struct intc_desc *desc = opaque; - intc_enum *enum_ids = NULL; - unsigned int first = 0; - unsigned int width = 0; - unsigned int mode = 0; + intc_enum *enum_ids; + unsigned int first; + unsigned int width; + unsigned int mode; unsigned long *valuep; -#ifdef DEBUG_INTC - printf("sh_intc_read 0x%lx\n", (unsigned long) offset); -#endif - - sh_intc_locate(desc, (unsigned long)offset, &valuep, - &enum_ids, &first, &width, &mode); + sh_intc_locate(desc, (unsigned long)offset, &valuep, + &enum_ids, &first, &width, &mode); + trace_sh_intc_read(size, (uint64_t)offset, *valuep); return *valuep; } @@ -245,45 +214,40 @@ static void sh_intc_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { struct intc_desc *desc = opaque; - intc_enum *enum_ids = NULL; - unsigned int first = 0; - unsigned int width = 0; - unsigned int mode = 0; - unsigned int k; + intc_enum *enum_ids; + unsigned int first; + unsigned int width; + unsigned int mode; unsigned long *valuep; + unsigned int k; unsigned long mask; -#ifdef DEBUG_INTC - printf("sh_intc_write 0x%lx 0x%08x\n", (unsigned long) offset, value); -#endif - - sh_intc_locate(desc, (unsigned long)offset, &valuep, - &enum_ids, &first, &width, &mode); - + trace_sh_intc_write(size, (uint64_t)offset, value); + sh_intc_locate(desc, (unsigned long)offset, &valuep, + &enum_ids, &first, &width, &mode); switch (mode) { - case INTC_MODE_ENABLE_REG | INTC_MODE_IS_PRIO: break; - case INTC_MODE_DUAL_SET: value |= *valuep; break; - case INTC_MODE_DUAL_CLR: value = *valuep & ~value; break; - default: abort(); + case INTC_MODE_ENABLE_REG | INTC_MODE_IS_PRIO: + break; + case INTC_MODE_DUAL_SET: + value |= *valuep; + break; + case INTC_MODE_DUAL_CLR: + value = *valuep & ~value; + break; + default: + g_assert_not_reached(); } for (k = 0; k <= first; k++) { - mask = ((1 << width) - 1) << ((first - k) * width); - - if ((*valuep & mask) == (value & mask)) - continue; -#if 0 - printf("k = %d, first = %d, enum = %d, mask = 0x%08x\n", - k, first, enum_ids[k], (unsigned int)mask); -#endif - sh_intc_toggle_mask(desc, enum_ids[k], value & mask, 0); + mask = (1 << width) - 1; + mask <<= (first - k) * width; + + if ((*valuep & mask) != (value & mask)) { + sh_intc_toggle_mask(desc, enum_ids[k], value & mask, 0); + } } *valuep = value; - -#ifdef DEBUG_INTC - printf("sh_intc_write 0x%lx -> 0x%08x\n", (unsigned long) offset, value); -#endif } static const MemoryRegionOps sh_intc_ops = { @@ -292,155 +256,136 @@ static const MemoryRegionOps sh_intc_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -struct intc_source *sh_intc_source(struct intc_desc *desc, intc_enum id) -{ - if (id) - return desc->sources + id; - - return NULL; -} - -static unsigned int sh_intc_register(MemoryRegion *sysmem, - struct intc_desc *desc, - const unsigned long address, - const char *type, - const char *action, - const unsigned int index) -{ - char name[60]; - MemoryRegion *iomem, *iomem_p4, *iomem_a7; - - if (!address) { - return 0; - } - - iomem = &desc->iomem; - iomem_p4 = desc->iomem_aliases + index; - iomem_a7 = iomem_p4 + 1; - -#define SH_INTC_IOMEM_FORMAT "interrupt-controller-%s-%s-%s" - snprintf(name, sizeof(name), SH_INTC_IOMEM_FORMAT, type, action, "p4"); - memory_region_init_alias(iomem_p4, NULL, name, iomem, INTC_A7(address), 4); - memory_region_add_subregion(sysmem, P4ADDR(address), iomem_p4); - - snprintf(name, sizeof(name), SH_INTC_IOMEM_FORMAT, type, action, "a7"); - memory_region_init_alias(iomem_a7, NULL, name, iomem, INTC_A7(address), 4); - memory_region_add_subregion(sysmem, A7ADDR(address), iomem_a7); -#undef SH_INTC_IOMEM_FORMAT - - /* used to increment aliases index */ - return 2; -} - static void sh_intc_register_source(struct intc_desc *desc, - intc_enum source, - struct intc_group *groups, - int nr_groups) + intc_enum source, + struct intc_group *groups, + int nr_groups) { unsigned int i, k; - struct intc_source *s; + intc_enum id; if (desc->mask_regs) { for (i = 0; i < desc->nr_mask_regs; i++) { - struct intc_mask_reg *mr = desc->mask_regs + i; - - for (k = 0; k < ARRAY_SIZE(mr->enum_ids); k++) { - if (mr->enum_ids[k] != source) - continue; + struct intc_mask_reg *mr = &desc->mask_regs[i]; - s = sh_intc_source(desc, mr->enum_ids[k]); - if (s) - s->enable_max++; - } - } + for (k = 0; k < ARRAY_SIZE(mr->enum_ids); k++) { + id = mr->enum_ids[k]; + if (id && id == source) { + desc->sources[id].enable_max++; + } + } + } } if (desc->prio_regs) { for (i = 0; i < desc->nr_prio_regs; i++) { - struct intc_prio_reg *pr = desc->prio_regs + i; - - for (k = 0; k < ARRAY_SIZE(pr->enum_ids); k++) { - if (pr->enum_ids[k] != source) - continue; + struct intc_prio_reg *pr = &desc->prio_regs[i]; - s = sh_intc_source(desc, pr->enum_ids[k]); - if (s) - s->enable_max++; - } - } + for (k = 0; k < ARRAY_SIZE(pr->enum_ids); k++) { + id = pr->enum_ids[k]; + if (id && id == source) { + desc->sources[id].enable_max++; + } + } + } } if (groups) { for (i = 0; i < nr_groups; i++) { - struct intc_group *gr = groups + i; + struct intc_group *gr = &groups[i]; - for (k = 0; k < ARRAY_SIZE(gr->enum_ids); k++) { - if (gr->enum_ids[k] != source) - continue; - - s = sh_intc_source(desc, gr->enum_ids[k]); - if (s) - s->enable_max++; - } - } + for (k = 0; k < ARRAY_SIZE(gr->enum_ids); k++) { + id = gr->enum_ids[k]; + if (id && id == source) { + desc->sources[id].enable_max++; + } + } + } } } void sh_intc_register_sources(struct intc_desc *desc, - struct intc_vect *vectors, - int nr_vectors, - struct intc_group *groups, - int nr_groups) + struct intc_vect *vectors, + int nr_vectors, + struct intc_group *groups, + int nr_groups) { unsigned int i, k; + intc_enum id; struct intc_source *s; for (i = 0; i < nr_vectors; i++) { - struct intc_vect *vect = vectors + i; + struct intc_vect *vect = &vectors[i]; - sh_intc_register_source(desc, vect->enum_id, groups, nr_groups); - s = sh_intc_source(desc, vect->enum_id); - if (s) { + sh_intc_register_source(desc, vect->enum_id, groups, nr_groups); + id = vect->enum_id; + if (id) { + s = &desc->sources[id]; s->vect = vect->vect; - -#ifdef DEBUG_INTC_SOURCES - printf("sh_intc: registered source %d -> 0x%04x (%d/%d)\n", - vect->enum_id, s->vect, s->enable_count, s->enable_max); -#endif + trace_sh_intc_register("source", vect->enum_id, s->vect, + s->enable_count, s->enable_max); } } if (groups) { for (i = 0; i < nr_groups; i++) { - struct intc_group *gr = groups + i; + struct intc_group *gr = &groups[i]; + + id = gr->enum_id; + s = &desc->sources[id]; + s->next_enum_id = gr->enum_ids[0]; + + for (k = 1; k < ARRAY_SIZE(gr->enum_ids); k++) { + if (gr->enum_ids[k]) { + id = gr->enum_ids[k - 1]; + s = &desc->sources[id]; + s->next_enum_id = gr->enum_ids[k]; + } + } + trace_sh_intc_register("group", gr->enum_id, 0xffff, + s->enable_count, s->enable_max); + } + } +} + +static unsigned int sh_intc_register(MemoryRegion *sysmem, + struct intc_desc *desc, + const unsigned long address, + const char *type, + const char *action, + const unsigned int index) +{ + char name[60]; + MemoryRegion *iomem, *iomem_p4, *iomem_a7; - s = sh_intc_source(desc, gr->enum_id); - s->next_enum_id = gr->enum_ids[0]; + if (!address) { + return 0; + } - for (k = 1; k < ARRAY_SIZE(gr->enum_ids); k++) { - if (!gr->enum_ids[k]) - continue; + iomem = &desc->iomem; + iomem_p4 = &desc->iomem_aliases[index]; + iomem_a7 = iomem_p4 + 1; - s = sh_intc_source(desc, gr->enum_ids[k - 1]); - s->next_enum_id = gr->enum_ids[k]; - } + snprintf(name, sizeof(name), "intc-%s-%s-%s", type, action, "p4"); + memory_region_init_alias(iomem_p4, NULL, name, iomem, A7ADDR(address), 4); + memory_region_add_subregion(sysmem, P4ADDR(address), iomem_p4); -#ifdef DEBUG_INTC_SOURCES - printf("sh_intc: registered group %d (%d/%d)\n", - gr->enum_id, s->enable_count, s->enable_max); -#endif - } - } + snprintf(name, sizeof(name), "intc-%s-%s-%s", type, action, "a7"); + memory_region_init_alias(iomem_a7, NULL, name, iomem, A7ADDR(address), 4); + memory_region_add_subregion(sysmem, A7ADDR(address), iomem_a7); + + /* used to increment aliases index */ + return 2; } int sh_intc_init(MemoryRegion *sysmem, - struct intc_desc *desc, - int nr_sources, - struct intc_mask_reg *mask_regs, - int nr_mask_regs, - struct intc_prio_reg *prio_regs, - int nr_prio_regs) + struct intc_desc *desc, + int nr_sources, + struct intc_mask_reg *mask_regs, + int nr_mask_regs, + struct intc_prio_reg *prio_regs, + int nr_prio_regs) { unsigned int i, j; @@ -450,65 +395,55 @@ int sh_intc_init(MemoryRegion *sysmem, desc->nr_mask_regs = nr_mask_regs; desc->prio_regs = prio_regs; desc->nr_prio_regs = nr_prio_regs; - /* Allocate 4 MemoryRegions per register (2 actions * 2 aliases). - **/ + /* Allocate 4 MemoryRegions per register (2 actions * 2 aliases) */ desc->iomem_aliases = g_new0(MemoryRegion, (nr_mask_regs + nr_prio_regs) * 4); - - j = 0; - i = sizeof(struct intc_source) * nr_sources; - desc->sources = g_malloc0(i); - - for (i = 0; i < desc->nr_sources; i++) { - struct intc_source *source = desc->sources + i; - - source->parent = desc; + desc->sources = g_new0(struct intc_source, nr_sources); + for (i = 0; i < nr_sources; i++) { + desc->sources[i].parent = desc; } - desc->irqs = qemu_allocate_irqs(sh_intc_set_irq, desc, nr_sources); - - memory_region_init_io(&desc->iomem, NULL, &sh_intc_ops, desc, - "interrupt-controller", 0x100000000ULL); - -#define INT_REG_PARAMS(reg_struct, type, action, j) \ - reg_struct->action##_reg, #type, #action, j + memory_region_init_io(&desc->iomem, NULL, &sh_intc_ops, desc, "intc", + 0x100000000ULL); + j = 0; if (desc->mask_regs) { for (i = 0; i < desc->nr_mask_regs; i++) { - struct intc_mask_reg *mr = desc->mask_regs + i; + struct intc_mask_reg *mr = &desc->mask_regs[i]; - j += sh_intc_register(sysmem, desc, - INT_REG_PARAMS(mr, mask, set, j)); - j += sh_intc_register(sysmem, desc, - INT_REG_PARAMS(mr, mask, clr, j)); - } + j += sh_intc_register(sysmem, desc, mr->set_reg, "mask", "set", j); + j += sh_intc_register(sysmem, desc, mr->clr_reg, "mask", "clr", j); + } } if (desc->prio_regs) { for (i = 0; i < desc->nr_prio_regs; i++) { - struct intc_prio_reg *pr = desc->prio_regs + i; + struct intc_prio_reg *pr = &desc->prio_regs[i]; - j += sh_intc_register(sysmem, desc, - INT_REG_PARAMS(pr, prio, set, j)); - j += sh_intc_register(sysmem, desc, - INT_REG_PARAMS(pr, prio, clr, j)); - } + j += sh_intc_register(sysmem, desc, pr->set_reg, "prio", "set", j); + j += sh_intc_register(sysmem, desc, pr->clr_reg, "prio", "clr", j); + } } -#undef INT_REG_PARAMS return 0; } -/* Assert level IRL interrupt. - 0:deassert. 1:lowest priority,... 15:highest priority. */ +/* + * Assert level IRL interrupt. + * 0:deassert. 1:lowest priority,... 15:highest priority + */ void sh_intc_set_irl(void *opaque, int n, int level) { struct intc_source *s = opaque; int i, irl = level ^ 15; - for (i = 0; (s = sh_intc_source(s->parent, s->next_enum_id)); i++) { - if (i == irl) - sh_intc_toggle_source(s, s->enable_count?0:1, s->asserted?0:1); - else - if (s->asserted) - sh_intc_toggle_source(s, 0, -1); + intc_enum id = s->next_enum_id; + + for (i = 0; id; id = s->next_enum_id, i++) { + s = &s->parent->sources[id]; + if (i == irl) { + sh_intc_toggle_source(s, s->enable_count ? 0 : 1, + s->asserted ? 0 : 1); + } else if (s->asserted) { + sh_intc_toggle_source(s, 0, -1); + } } } diff --git a/hw/intc/sifive_clint.c b/hw/intc/sifive_clint.c deleted file mode 100644 index 0f41e5ea1ce..00000000000 --- a/hw/intc/sifive_clint.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * SiFive CLINT (Core Local Interruptor) - * - * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu - * Copyright (c) 2017 SiFive, Inc. - * - * This provides real-time clock, timer and interprocessor interrupts. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2 or later, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "hw/sysbus.h" -#include "target/riscv/cpu.h" -#include "hw/qdev-properties.h" -#include "hw/intc/sifive_clint.h" -#include "qemu/timer.h" - -static uint64_t cpu_riscv_read_rtc(uint32_t timebase_freq) -{ - return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - timebase_freq, NANOSECONDS_PER_SECOND); -} - -/* - * Called when timecmp is written to update the QEMU timer or immediately - * trigger timer interrupt if mtimecmp <= current timer value. - */ -static void sifive_clint_write_timecmp(RISCVCPU *cpu, uint64_t value, - uint32_t timebase_freq) -{ - uint64_t next; - uint64_t diff; - - uint64_t rtc_r = cpu_riscv_read_rtc(timebase_freq); - - cpu->env.timecmp = value; - if (cpu->env.timecmp <= rtc_r) { - /* if we're setting an MTIMECMP value in the "past", - immediately raise the timer interrupt */ - riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(1)); - return; - } - - /* otherwise, set up the future timer interrupt */ - riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(0)); - diff = cpu->env.timecmp - rtc_r; - /* back to ns (note args switched in muldiv64) */ - next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq); - timer_mod(cpu->env.timer, next); -} - -/* - * Callback used when the timer set using timer_mod expires. - * Should raise the timer interrupt line - */ -static void sifive_clint_timer_cb(void *opaque) -{ - RISCVCPU *cpu = opaque; - riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(1)); -} - -/* CPU wants to read rtc or timecmp register */ -static uint64_t sifive_clint_read(void *opaque, hwaddr addr, unsigned size) -{ - SiFiveCLINTState *clint = opaque; - if (addr >= clint->sip_base && - addr < clint->sip_base + (clint->num_harts << 2)) { - size_t hartid = clint->hartid_base + ((addr - clint->sip_base) >> 2); - CPUState *cpu = qemu_get_cpu(hartid); - CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - if (!env) { - error_report("clint: invalid timecmp hartid: %zu", hartid); - } else if ((addr & 0x3) == 0) { - return (env->mip & MIP_MSIP) > 0; - } else { - error_report("clint: invalid read: %08x", (uint32_t)addr); - return 0; - } - } else if (addr >= clint->timecmp_base && - addr < clint->timecmp_base + (clint->num_harts << 3)) { - size_t hartid = clint->hartid_base + - ((addr - clint->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); - CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - if (!env) { - error_report("clint: invalid timecmp hartid: %zu", hartid); - } else if ((addr & 0x7) == 0) { - /* timecmp_lo */ - uint64_t timecmp = env->timecmp; - return timecmp & 0xFFFFFFFF; - } else if ((addr & 0x7) == 4) { - /* timecmp_hi */ - uint64_t timecmp = env->timecmp; - return (timecmp >> 32) & 0xFFFFFFFF; - } else { - error_report("clint: invalid read: %08x", (uint32_t)addr); - return 0; - } - } else if (addr == clint->time_base) { - /* time_lo */ - return cpu_riscv_read_rtc(clint->timebase_freq) & 0xFFFFFFFF; - } else if (addr == clint->time_base + 4) { - /* time_hi */ - return (cpu_riscv_read_rtc(clint->timebase_freq) >> 32) & 0xFFFFFFFF; - } - - error_report("clint: invalid read: %08x", (uint32_t)addr); - return 0; -} - -/* CPU wrote to rtc or timecmp register */ -static void sifive_clint_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - SiFiveCLINTState *clint = opaque; - - if (addr >= clint->sip_base && - addr < clint->sip_base + (clint->num_harts << 2)) { - size_t hartid = clint->hartid_base + ((addr - clint->sip_base) >> 2); - CPUState *cpu = qemu_get_cpu(hartid); - CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - if (!env) { - error_report("clint: invalid timecmp hartid: %zu", hartid); - } else if ((addr & 0x3) == 0) { - riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MSIP, BOOL_TO_MASK(value)); - } else { - error_report("clint: invalid sip write: %08x", (uint32_t)addr); - } - return; - } else if (addr >= clint->timecmp_base && - addr < clint->timecmp_base + (clint->num_harts << 3)) { - size_t hartid = clint->hartid_base + - ((addr - clint->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); - CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - if (!env) { - error_report("clint: invalid timecmp hartid: %zu", hartid); - } else if ((addr & 0x7) == 0) { - /* timecmp_lo */ - uint64_t timecmp_hi = env->timecmp >> 32; - sifive_clint_write_timecmp(RISCV_CPU(cpu), - timecmp_hi << 32 | (value & 0xFFFFFFFF), clint->timebase_freq); - return; - } else if ((addr & 0x7) == 4) { - /* timecmp_hi */ - uint64_t timecmp_lo = env->timecmp; - sifive_clint_write_timecmp(RISCV_CPU(cpu), - value << 32 | (timecmp_lo & 0xFFFFFFFF), clint->timebase_freq); - } else { - error_report("clint: invalid timecmp write: %08x", (uint32_t)addr); - } - return; - } else if (addr == clint->time_base) { - /* time_lo */ - error_report("clint: time_lo write not implemented"); - return; - } else if (addr == clint->time_base + 4) { - /* time_hi */ - error_report("clint: time_hi write not implemented"); - return; - } - - error_report("clint: invalid write: %08x", (uint32_t)addr); -} - -static const MemoryRegionOps sifive_clint_ops = { - .read = sifive_clint_read, - .write = sifive_clint_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .valid = { - .min_access_size = 4, - .max_access_size = 8 - } -}; - -static Property sifive_clint_properties[] = { - DEFINE_PROP_UINT32("hartid-base", SiFiveCLINTState, hartid_base, 0), - DEFINE_PROP_UINT32("num-harts", SiFiveCLINTState, num_harts, 0), - DEFINE_PROP_UINT32("sip-base", SiFiveCLINTState, sip_base, 0), - DEFINE_PROP_UINT32("timecmp-base", SiFiveCLINTState, timecmp_base, 0), - DEFINE_PROP_UINT32("time-base", SiFiveCLINTState, time_base, 0), - DEFINE_PROP_UINT32("aperture-size", SiFiveCLINTState, aperture_size, 0), - DEFINE_PROP_UINT32("timebase-freq", SiFiveCLINTState, timebase_freq, 0), - DEFINE_PROP_END_OF_LIST(), -}; - -static void sifive_clint_realize(DeviceState *dev, Error **errp) -{ - SiFiveCLINTState *s = SIFIVE_CLINT(dev); - memory_region_init_io(&s->mmio, OBJECT(dev), &sifive_clint_ops, s, - TYPE_SIFIVE_CLINT, s->aperture_size); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio); -} - -static void sifive_clint_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - dc->realize = sifive_clint_realize; - device_class_set_props(dc, sifive_clint_properties); -} - -static const TypeInfo sifive_clint_info = { - .name = TYPE_SIFIVE_CLINT, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(SiFiveCLINTState), - .class_init = sifive_clint_class_init, -}; - -static void sifive_clint_register_types(void) -{ - type_register_static(&sifive_clint_info); -} - -type_init(sifive_clint_register_types) - - -/* - * Create CLINT device. - */ -DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, - uint32_t hartid_base, uint32_t num_harts, uint32_t sip_base, - uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq, - bool provide_rdtime) -{ - int i; - for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); - CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - if (!env) { - continue; - } - if (provide_rdtime) { - riscv_cpu_set_rdtime_fn(env, cpu_riscv_read_rtc, timebase_freq); - } - env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - &sifive_clint_timer_cb, cpu); - env->timecmp = 0; - } - - DeviceState *dev = qdev_new(TYPE_SIFIVE_CLINT); - qdev_prop_set_uint32(dev, "hartid-base", hartid_base); - qdev_prop_set_uint32(dev, "num-harts", num_harts); - qdev_prop_set_uint32(dev, "sip-base", sip_base); - qdev_prop_set_uint32(dev, "timecmp-base", timecmp_base); - qdev_prop_set_uint32(dev, "time-base", time_base); - qdev_prop_set_uint32(dev, "aperture-size", size); - qdev_prop_set_uint32(dev, "timebase-freq", timebase_freq); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); - return dev; -} diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c index 97a1a27a9ac..877e76877cc 100644 --- a/hw/intc/sifive_plic.c +++ b/hw/intc/sifive_plic.c @@ -25,12 +25,11 @@ #include "qemu/error-report.h" #include "hw/sysbus.h" #include "hw/pci/msi.h" -#include "hw/boards.h" #include "hw/qdev-properties.h" #include "hw/intc/sifive_plic.h" #include "target/riscv/cpu.h" -#include "sysemu/sysemu.h" #include "migration/vmstate.h" +#include "hw/irq.h" #define RISCV_DEBUG_PLIC 0 @@ -141,18 +140,14 @@ static void sifive_plic_update(SiFivePLICState *plic) for (addrid = 0; addrid < plic->num_addrs; addrid++) { uint32_t hartid = plic->addr_config[addrid].hartid; PLICMode mode = plic->addr_config[addrid].mode; - CPUState *cpu = qemu_get_cpu(hartid); - CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - if (!env) { - continue; - } int level = sifive_plic_irqs_pending(plic, addrid); + switch (mode) { case PLICMode_M: - riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MEIP, BOOL_TO_MASK(level)); + qemu_set_irq(plic->m_external_irqs[hartid - plic->hartid_base], level); break; case PLICMode_S: - riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_SEIP, BOOL_TO_MASK(level)); + qemu_set_irq(plic->s_external_irqs[hartid - plic->hartid_base], level); break; default: break; @@ -360,21 +355,6 @@ static const MemoryRegionOps sifive_plic_ops = { } }; -static Property sifive_plic_properties[] = { - DEFINE_PROP_STRING("hart-config", SiFivePLICState, hart_config), - DEFINE_PROP_UINT32("hartid-base", SiFivePLICState, hartid_base, 0), - DEFINE_PROP_UINT32("num-sources", SiFivePLICState, num_sources, 0), - DEFINE_PROP_UINT32("num-priorities", SiFivePLICState, num_priorities, 0), - DEFINE_PROP_UINT32("priority-base", SiFivePLICState, priority_base, 0), - DEFINE_PROP_UINT32("pending-base", SiFivePLICState, pending_base, 0), - DEFINE_PROP_UINT32("enable-base", SiFivePLICState, enable_base, 0), - DEFINE_PROP_UINT32("enable-stride", SiFivePLICState, enable_stride, 0), - DEFINE_PROP_UINT32("context-base", SiFivePLICState, context_base, 0), - DEFINE_PROP_UINT32("context-stride", SiFivePLICState, context_stride, 0), - DEFINE_PROP_UINT32("aperture-size", SiFivePLICState, aperture_size, 0), - DEFINE_PROP_END_OF_LIST(), -}; - /* * parse PLIC hart/mode address offset config * @@ -432,39 +412,46 @@ static void parse_hart_config(SiFivePLICState *plic) static void sifive_plic_irq_request(void *opaque, int irq, int level) { - SiFivePLICState *plic = opaque; - if (RISCV_DEBUG_PLIC) { - qemu_log("sifive_plic_irq_request: irq=%d level=%d\n", irq, level); - } - sifive_plic_set_pending(plic, irq, level > 0); - sifive_plic_update(plic); + SiFivePLICState *s = opaque; + + sifive_plic_set_pending(s, irq, level > 0); + sifive_plic_update(s); } static void sifive_plic_realize(DeviceState *dev, Error **errp) { - SiFivePLICState *plic = SIFIVE_PLIC(dev); + SiFivePLICState *s = SIFIVE_PLIC(dev); int i; - memory_region_init_io(&plic->mmio, OBJECT(dev), &sifive_plic_ops, plic, - TYPE_SIFIVE_PLIC, plic->aperture_size); - parse_hart_config(plic); - plic->bitfield_words = (plic->num_sources + 31) >> 5; - plic->num_enables = plic->bitfield_words * plic->num_addrs; - plic->source_priority = g_new0(uint32_t, plic->num_sources); - plic->target_priority = g_new(uint32_t, plic->num_addrs); - plic->pending = g_new0(uint32_t, plic->bitfield_words); - plic->claimed = g_new0(uint32_t, plic->bitfield_words); - plic->enable = g_new0(uint32_t, plic->num_enables); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &plic->mmio); - qdev_init_gpio_in(dev, sifive_plic_irq_request, plic->num_sources); + memory_region_init_io(&s->mmio, OBJECT(dev), &sifive_plic_ops, s, + TYPE_SIFIVE_PLIC, s->aperture_size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio); + + parse_hart_config(s); + + s->bitfield_words = (s->num_sources + 31) >> 5; + s->num_enables = s->bitfield_words * s->num_addrs; + s->source_priority = g_new0(uint32_t, s->num_sources); + s->target_priority = g_new(uint32_t, s->num_addrs); + s->pending = g_new0(uint32_t, s->bitfield_words); + s->claimed = g_new0(uint32_t, s->bitfield_words); + s->enable = g_new0(uint32_t, s->num_enables); + + qdev_init_gpio_in(dev, sifive_plic_irq_request, s->num_sources); + + s->s_external_irqs = g_malloc(sizeof(qemu_irq) * s->num_harts); + qdev_init_gpio_out(dev, s->s_external_irqs, s->num_harts); + + s->m_external_irqs = g_malloc(sizeof(qemu_irq) * s->num_harts); + qdev_init_gpio_out(dev, s->m_external_irqs, s->num_harts); /* We can't allow the supervisor to control SEIP as this would allow the * supervisor to clear a pending external interrupt which will result in * lost a interrupt in the case a PLIC is attached. The SEIP bit must be * hardware controlled when a PLIC is attached. */ - for (i = 0; i < plic->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(plic->hartid_base + i)); + for (i = 0; i < s->num_harts; i++) { + RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, MIP_SEIP) < 0) { error_report("SEIP already claimed"); exit(1); @@ -495,6 +482,21 @@ static const VMStateDescription vmstate_sifive_plic = { } }; +static Property sifive_plic_properties[] = { + DEFINE_PROP_STRING("hart-config", SiFivePLICState, hart_config), + DEFINE_PROP_UINT32("hartid-base", SiFivePLICState, hartid_base, 0), + DEFINE_PROP_UINT32("num-sources", SiFivePLICState, num_sources, 0), + DEFINE_PROP_UINT32("num-priorities", SiFivePLICState, num_priorities, 0), + DEFINE_PROP_UINT32("priority-base", SiFivePLICState, priority_base, 0), + DEFINE_PROP_UINT32("pending-base", SiFivePLICState, pending_base, 0), + DEFINE_PROP_UINT32("enable-base", SiFivePLICState, enable_base, 0), + DEFINE_PROP_UINT32("enable-stride", SiFivePLICState, enable_stride, 0), + DEFINE_PROP_UINT32("context-base", SiFivePLICState, context_base, 0), + DEFINE_PROP_UINT32("context-stride", SiFivePLICState, context_stride, 0), + DEFINE_PROP_UINT32("aperture-size", SiFivePLICState, aperture_size, 0), + DEFINE_PROP_END_OF_LIST(), +}; + static void sifive_plic_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -522,6 +524,7 @@ type_init(sifive_plic_register_types) * Create PLIC device. */ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, + uint32_t num_harts, uint32_t hartid_base, uint32_t num_sources, uint32_t num_priorities, uint32_t priority_base, uint32_t pending_base, uint32_t enable_base, @@ -529,6 +532,8 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, uint32_t context_stride, uint32_t aperture_size) { DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC); + int i; + assert(enable_stride == (enable_stride & -enable_stride)); assert(context_stride == (context_stride & -context_stride)); qdev_prop_set_string(dev, "hart-config", hart_config); @@ -544,5 +549,15 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, qdev_prop_set_uint32(dev, "aperture-size", aperture_size); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + + for (i = 0; i < num_harts; i++) { + CPUState *cpu = qemu_get_cpu(hartid_base + i); + + qdev_connect_gpio_out(dev, i, + qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT)); + qdev_connect_gpio_out(dev, num_harts + i, + qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT)); + } + return dev; } diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 801bc193416..4ec659b93e1 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -185,7 +185,7 @@ static void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI", pq & XIVE_ESB_VAL_P ? 'P' : '-', pq & XIVE_ESB_VAL_Q ? 'Q' : '-', - xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ', + xive_source_is_asserted(xsrc, i) ? 'A' : ' ', xive_eas_is_masked(eas) ? "M" : " ", (int) xive_get_field64(EAS_END_DATA, eas->w)); @@ -1798,7 +1798,7 @@ static target_ulong h_int_reset(PowerPCCPU *cpu, return H_PARAMETER; } - device_legacy_reset(DEVICE(xive)); + device_cold_reset(DEVICE(xive)); if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index c0083311607..61fe7bd2d32 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -236,11 +236,13 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) SpaprXive *xive = SPAPR_XIVE(xsrc->xive); uint64_t state = 0; + trace_kvm_xive_source_reset(srcno); + assert(xive->fd != -1); if (xive_source_irq_is_lsi(xsrc, srcno)) { state |= KVM_XIVE_LEVEL_SENSITIVE; - if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { + if (xive_source_is_asserted(xsrc, srcno)) { state |= KVM_XIVE_LEVEL_ASSERTED; } } @@ -297,11 +299,9 @@ static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; } -static void xive_esb_trigger(XiveSource *xsrc, int srcno) +static void kvmppc_xive_esb_trigger(XiveSource *xsrc, int srcno) { - uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); - - *addr = 0x0; + xive_esb_rw(xsrc, srcno, 0, 0, true); } uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, @@ -311,8 +311,6 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, return xive_esb_rw(xsrc, srcno, offset, data, 1); } - trace_kvm_xive_source_reset(srcno); - /* * Special Load EOI handling for LSI sources. Q bit is never set * and the interrupt should be re-triggered if the level is still @@ -321,8 +319,8 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, if (xive_source_irq_is_lsi(xsrc, srcno) && offset == XIVE_ESB_LOAD_EOI) { xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); - if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { - xive_esb_trigger(xsrc, srcno); + if (xive_source_is_asserted(xsrc, srcno)) { + kvmppc_xive_esb_trigger(xsrc, srcno); } return 0; } else { @@ -359,14 +357,10 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) return; } } else { - if (val) { - xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; - } else { - xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; - } + xive_source_set_asserted(xsrc, srcno, val); } - xive_esb_trigger(xsrc, srcno); + kvmppc_xive_esb_trigger(xsrc, srcno); } /* @@ -533,7 +527,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, bool running, * generate a trigger. */ if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { - xive_esb_trigger(xsrc, i); + kvmppc_xive_esb_trigger(xsrc, i); } } diff --git a/hw/intc/trace-events b/hw/intc/trace-events index c9ab17234b4..9aba7e3a7a4 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # i8259.c pic_update_irq(bool master, uint8_t imr, uint8_t irr, uint8_t padd) "master %d imr %"PRIu8" irr %"PRIu8" padd %"PRIu8 @@ -51,15 +51,6 @@ grlib_irqmp_set_irq(int irq) "Raise CPU IRQ %d" grlib_irqmp_readl_unknown(uint64_t addr) "addr 0x%"PRIx64 grlib_irqmp_writel_unknown(uint64_t addr, uint32_t value) "addr 0x%"PRIx64" value 0x%x" -# lm32_pic.c -lm32_pic_raise_irq(void) "Raise CPU interrupt" -lm32_pic_lower_irq(void) "Lower CPU interrupt" -lm32_pic_interrupt(int irq, int level) "Set IRQ%d %d" -lm32_pic_set_im(uint32_t im) "im 0x%08x" -lm32_pic_set_ip(uint32_t ip) "ip 0x%08x" -lm32_pic_get_im(uint32_t im) "im 0x%08x" -lm32_pic_get_ip(uint32_t ip) "ip 0x%08x" - # xics.c xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=0x%x" xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr) "icp_accept: XIRR 0x%"PRIx32"->0x%"PRIx32 @@ -228,14 +219,14 @@ kvm_xive_source_reset(uint32_t srcno) "IRQ 0x%x" xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK" xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !" xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x" -xive_source_esb_read(uint64_t addr, uint32_t srcno, uint64_t value) "@0x0x%"PRIx64" IRQ 0x%x val=0x0x%"PRIx64 -xive_source_esb_write(uint64_t addr, uint32_t srcno, uint64_t value) "@0x0x%"PRIx64" IRQ 0x%x val=0x0x%"PRIx64 +xive_source_esb_read(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64 +xive_source_esb_write(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64 xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "END 0x%02x/0x%04x -> enqueue 0x%08x" xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 0x%02x/0x%04x data 0x%08x" -xive_tctx_tm_write(uint64_t offset, unsigned int size, uint64_t value) "@0x0x%"PRIx64" sz=%d val=0x%" PRIx64 -xive_tctx_tm_read(uint64_t offset, unsigned int size, uint64_t value) "@0x0x%"PRIx64" sz=%d val=0x%" PRIx64 +xive_tctx_tm_write(uint64_t offset, unsigned int size, uint64_t value) "@0x%"PRIx64" sz=%d val=0x%" PRIx64 +xive_tctx_tm_read(uint64_t offset, unsigned int size, uint64_t value) "@0x%"PRIx64" sz=%d val=0x%" PRIx64 xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring) "found NVT 0x%x/0x%x ring=0x%x" -xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x/0x%x @0x0x%"PRIx64 +xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x/0x%x @0x%"PRIx64 # pnv_xive.c pnv_xive_ic_hw_trigger(uint64_t addr, uint64_t val) "@0x%"PRIx64" val=0x%"PRIx64 @@ -247,3 +238,11 @@ goldfish_pic_write(void *dev, int idx, unsigned int addr, unsigned int size, uin goldfish_pic_reset(void *dev, int idx) "pic: %p goldfish-irq.%d" goldfish_pic_realize(void *dev, int idx) "pic: %p goldfish-irq.%d" goldfish_pic_instance_init(void *dev) "pic: %p goldfish-irq" + +# sh_intc.c +sh_intc_sources(int p, int a, int c, int m, unsigned short v, const char *s1, const char *s2, const char *s3) "(%d/%d/%d/%d) interrupt source 0x%x %s%s%s" +sh_intc_pending(int p, unsigned short v) "(%d) returning interrupt source 0x%x" +sh_intc_register(const char *s, int id, unsigned short v, int c, int m) "%s %u -> 0x%04x (%d/%d)" +sh_intc_read(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " -> 0x%lx" +sh_intc_write(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " <- 0x%lx" +sh_intc_set(int id, int enable) "setting interrupt group %d to %d" diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 68f9d44feb4..48a835eab7c 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -27,7 +27,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "trace.h" #include "qemu/timer.h" #include "hw/ppc/xics.h" diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 570d635bcc0..f5bfc501bc1 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -28,7 +28,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" -#include "cpu.h" #include "trace.h" #include "sysemu/kvm.h" #include "hw/ppc/spapr.h" diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index 8ae4f41459c..37b2d99977a 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -26,7 +26,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "trace.h" #include "qemu/timer.h" #include "hw/ppc/spapr.h" diff --git a/hw/intc/xive.c b/hw/intc/xive.c index eeb4e62ba95..190194d27f8 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -27,17 +27,6 @@ * XIVE Thread Interrupt Management context */ -/* - * Convert a priority number to an Interrupt Pending Buffer (IPB) - * register, which indicates a pending interrupt at the priority - * corresponding to the bit number - */ -static uint8_t priority_to_ipb(uint8_t priority) -{ - return priority > XIVE_PRIORITY_MAX ? - 0 : 1 << (XIVE_PRIORITY_MAX - priority); -} - /* * Convert an Interrupt Pending Buffer (IPB) register to a Pending * Interrupt Priority Register (PIPR), which contains the priority of @@ -89,7 +78,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) regs[TM_CPPR] = cppr; /* Reset the pending buffer bit */ - regs[TM_IPB] &= ~priority_to_ipb(cppr); + regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]); /* Drop Exception bit */ @@ -152,11 +141,6 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb) xive_tctx_notify(tctx, ring); } -static inline uint32_t xive_tctx_word2(uint8_t *ring) -{ - return *((uint32_t *) &ring[TM_WORD2]); -} - /* * XIVE Thread Interrupt Management Area (TIMA) */ @@ -353,7 +337,7 @@ static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx, static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size) { - xive_tctx_ipb_update(tctx, TM_QW1_OS, priority_to_ipb(value & 0xff)); + xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff)); } static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk, @@ -816,7 +800,7 @@ void xive_tctx_destroy(XiveTCTX *tctx) * XIVE ESB helpers */ -static uint8_t xive_esb_set(uint8_t *pq, uint8_t value) +uint8_t xive_esb_set(uint8_t *pq, uint8_t value) { uint8_t old_pq = *pq & 0x3; @@ -826,7 +810,7 @@ static uint8_t xive_esb_set(uint8_t *pq, uint8_t value) return old_pq; } -static bool xive_esb_trigger(uint8_t *pq) +bool xive_esb_trigger(uint8_t *pq) { uint8_t old_pq = *pq & 0x3; @@ -846,7 +830,7 @@ static bool xive_esb_trigger(uint8_t *pq) } } -static bool xive_esb_eoi(uint8_t *pq) +bool xive_esb_eoi(uint8_t *pq) { uint8_t old_pq = *pq & 0x3; @@ -891,7 +875,7 @@ static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno) { uint8_t old_pq = xive_source_esb_get(xsrc, srcno); - xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; + xive_source_set_asserted(xsrc, srcno, true); switch (old_pq) { case XIVE_ESB_RESET: @@ -939,7 +923,7 @@ static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno) * notification */ if (xive_source_irq_is_lsi(xsrc, srcno) && - xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { + xive_source_is_asserted(xsrc, srcno)) { ret = xive_source_lsi_trigger(xsrc, srcno); } @@ -1120,7 +1104,7 @@ void xive_source_set_irq(void *opaque, int srcno, int val) if (val) { notify = xive_source_lsi_trigger(xsrc, srcno); } else { - xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; + xive_source_set_asserted(xsrc, srcno, false); } } else { if (val) { @@ -1149,7 +1133,7 @@ void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon) xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI", pq & XIVE_ESB_VAL_P ? 'P' : '-', pq & XIVE_ESB_VAL_Q ? 'Q' : '-', - xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' '); + xive_source_is_asserted(xsrc, i) ? 'A' : ' '); } } @@ -1514,10 +1498,10 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, * * The parameters represent what is sent on the PowerBus */ -static bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool cam_ignore, uint8_t priority, - uint32_t logic_serv) +bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv) { XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb); XiveTCTXMatch match = { .tctx = NULL, .ring = 0 }; @@ -1535,7 +1519,8 @@ static bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, /* handle CPU exception delivery */ if (count) { trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring); - xive_tctx_ipb_update(match.tctx, match.ring, priority_to_ipb(priority)); + xive_tctx_ipb_update(match.tctx, match.ring, + xive_priority_to_ipb(priority)); } return !!count; @@ -1682,7 +1667,8 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, * use. The presenter will resend the interrupt when the vCPU * is dispatched again on a HW thread. */ - ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) | priority_to_ipb(priority); + ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) | + xive_priority_to_ipb(priority); nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb); xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4); diff --git a/hw/ipack/ipack.c b/hw/ipack/ipack.c index f19ecaeb1cf..ae20f36da68 100644 --- a/hw/ipack/ipack.c +++ b/hw/ipack/ipack.c @@ -30,12 +30,12 @@ IPackDevice *ipack_device_find(IPackBus *bus, int32_t slot) return NULL; } -void ipack_bus_new_inplace(IPackBus *bus, size_t bus_size, - DeviceState *parent, - const char *name, uint8_t n_slots, - qemu_irq_handler handler) +void ipack_bus_init(IPackBus *bus, size_t bus_size, + DeviceState *parent, + uint8_t n_slots, + qemu_irq_handler handler) { - qbus_create_inplace(bus, bus_size, TYPE_IPACK_BUS, parent, name); + qbus_init(bus, bus_size, TYPE_IPACK_BUS, parent, NULL); bus->n_slots = n_slots; bus->set_irq = handler; } diff --git a/hw/ipack/tpci200.c b/hw/ipack/tpci200.c index d107e134c4e..1f764fc85ba 100644 --- a/hw/ipack/tpci200.c +++ b/hw/ipack/tpci200.c @@ -611,8 +611,8 @@ static void tpci200_realize(PCIDevice *pci_dev, Error **errp) pci_register_bar(&s->dev, 4, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->las2); pci_register_bar(&s->dev, 5, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->las3); - ipack_bus_new_inplace(&s->bus, sizeof(s->bus), DEVICE(pci_dev), NULL, - N_MODULES, tpci200_set_irq); + ipack_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), + N_MODULES, tpci200_set_irq); } static const VMStateDescription vmstate_tpci200 = { diff --git a/hw/ipmi/ipmi_bmc_sim.c b/hw/ipmi/ipmi_bmc_sim.c index 55fb81fa5a9..905e091094b 100644 --- a/hw/ipmi/ipmi_bmc_sim.c +++ b/hw/ipmi/ipmi_bmc_sim.c @@ -189,7 +189,7 @@ struct IPMIBmcSim { uint8_t watchdog_use; uint8_t watchdog_action; uint8_t watchdog_pretimeout; /* In seconds */ - bool watchdog_expired; + uint8_t watchdog_expired; uint16_t watchdog_timeout; /* in 100's of milliseconds */ bool watchdog_running; @@ -2110,7 +2110,7 @@ static const VMStateDescription vmstate_ipmi_sim = { VMSTATE_UINT8(watchdog_use, IPMIBmcSim), VMSTATE_UINT8(watchdog_action, IPMIBmcSim), VMSTATE_UINT8(watchdog_pretimeout, IPMIBmcSim), - VMSTATE_BOOL(watchdog_expired, IPMIBmcSim), + VMSTATE_UINT8(watchdog_expired, IPMIBmcSim), VMSTATE_UINT16(watchdog_timeout, IPMIBmcSim), VMSTATE_BOOL(watchdog_running, IPMIBmcSim), VMSTATE_BOOL(watchdog_preaction_ran, IPMIBmcSim), diff --git a/hw/ipmi/isa_ipmi_bt.c b/hw/ipmi/isa_ipmi_bt.c index b7c2ad557b2..02625eb94ed 100644 --- a/hw/ipmi/isa_ipmi_bt.c +++ b/hw/ipmi/isa_ipmi_bt.c @@ -23,7 +23,6 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/module.h" #include "qapi/error.h" #include "hw/irq.h" diff --git a/hw/ipmi/isa_ipmi_kcs.c b/hw/ipmi/isa_ipmi_kcs.c index 7dd6bf0040a..3b23ad08b34 100644 --- a/hw/ipmi/isa_ipmi_kcs.c +++ b/hw/ipmi/isa_ipmi_kcs.c @@ -23,7 +23,6 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/module.h" #include "qapi/error.h" #include "hw/irq.h" diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index 55e0003ce40..d42143a991e 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -17,6 +17,7 @@ config ISA_SUPERIO bool select ISA_BUS select PCKBD + select FDC_ISA config PC87312 bool @@ -27,7 +28,7 @@ config PC87312 select MC146818RTC select SERIAL_ISA select PARALLEL - select FDC + select FDC_ISA select IDE_ISA config PIIX3 @@ -46,16 +47,21 @@ config VT82C686 select ISA_SUPERIO select ACPI_SMBUS select SERIAL_ISA - select FDC + select FDC_ISA select USB_UHCI select APM + select I8254 + select I8257 + select I8259 + select MC146818RTC + select PARALLEL config SMC37C669 bool select ISA_SUPERIO select SERIAL_ISA select PARALLEL - select FDC + select FDC_ISA config LPC_ICH9 bool diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c index 7820068e6e1..6c31398dda6 100644 --- a/hw/isa/isa-bus.c +++ b/hw/isa/isa-bus.c @@ -64,7 +64,7 @@ ISABus *isa_bus_new(DeviceState *dev, MemoryRegion* address_space, sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); } - isabus = ISA_BUS(qbus_create(TYPE_ISA_BUS, dev, NULL)); + isabus = ISA_BUS(qbus_new(TYPE_ISA_BUS, dev, NULL)); isabus->address_space = address_space; isabus->address_space_io = address_space_io; return isabus; @@ -131,13 +131,17 @@ void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start) isa_init_ioport(dev, start); } -void isa_register_portio_list(ISADevice *dev, - PortioList *piolist, uint16_t start, - const MemoryRegionPortio *pio_start, - void *opaque, const char *name) +int isa_register_portio_list(ISADevice *dev, + PortioList *piolist, uint16_t start, + const MemoryRegionPortio *pio_start, + void *opaque, const char *name) { assert(piolist && !piolist->owner); + if (!isabus) { + return -ENODEV; + } + /* START is how we should treat DEV, regardless of the actual contents of the portio array. This is how the old code actually handled e.g. the FDC device. */ @@ -145,6 +149,8 @@ void isa_register_portio_list(ISADevice *dev, portio_list_init(piolist, OBJECT(dev), pio_start, opaque, name); portio_list_add(piolist, isabus->address_space_io, start); + + return 0; } static void isa_device_init(Object *obj) diff --git a/hw/isa/isa-superio.c b/hw/isa/isa-superio.c index 179c1856956..c81bfe58ef8 100644 --- a/hw/isa/isa-superio.c +++ b/hw/isa/isa-superio.c @@ -14,7 +14,6 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "qapi/error.h" -#include "sysemu/sysemu.h" #include "sysemu/blockdev.h" #include "chardev/char.h" #include "hw/block/fdc.h" diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 3963b735207..5f143dca17a 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -31,10 +31,10 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" +#include "qapi/error.h" #include "qapi/visitor.h" #include "qemu/range.h" #include "hw/isa/isa.h" -#include "hw/sysbus.h" #include "migration/vmstate.h" #include "hw/irq.h" #include "hw/isa/apm.h" @@ -45,7 +45,6 @@ #include "hw/acpi/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" -#include "exec/address-spaces.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "hw/core/cpu.h" @@ -678,6 +677,18 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) DeviceState *dev = DEVICE(d); ISABus *isa_bus; + if ((lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT)) && + !(lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT))) { + /* + * smi_features_ok_callback() throws an error on this. + * + * So bail out here instead of advertizing the invalid + * configuration and get obscure firmware failures from that. + */ + error_setg(errp, "cpu hot-unplug requires cpu hot-plug"); + return; + } + isa_bus = isa_bus_new(DEVICE(d), get_system_memory(), get_system_io(), errp); if (!isa_bus) { diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index f46ccae25cf..dab901c9ad9 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -29,7 +29,6 @@ #include "hw/isa/isa.h" #include "hw/xen/xen.h" #include "sysemu/xen.h" -#include "sysemu/sysemu.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" #include "migration/vmstate.h" diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c index b3b6a4378a3..0fe7b69bc4c 100644 --- a/hw/isa/piix4.c +++ b/hw/isa/piix4.c @@ -29,7 +29,6 @@ #include "hw/southbridge/piix.h" #include "hw/pci/pci.h" #include "hw/isa/isa.h" -#include "hw/sysbus.h" #include "hw/intc/i8259.h" #include "hw/dma/i8257.h" #include "hw/timer/i8254.h" @@ -268,8 +267,9 @@ DeviceState *piix4_create(PCIBus *pci_bus, ISABus **isa_bus, I2CBus **smbus) pci_create_simple(pci_bus, devfn + 2, "piix4-usb-uhci"); if (smbus) { *smbus = piix4_pm_init(pci_bus, devfn + 3, 0x1100, - isa_get_irq(NULL, 9), NULL, 0, NULL); - } + qdev_get_gpio_in_named(dev, "isa", 9), + NULL, 0, NULL); + } return dev; } diff --git a/hw/isa/trace-events b/hw/isa/trace-events index 641d69eedf7..b8f877e1ed8 100644 --- a/hw/isa/trace-events +++ b/hw/isa/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # isa-superio.c superio_create_parallel(int id, uint16_t base, unsigned int irq) "id=%d, base 0x%03x, irq %u" diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index 98325bb32b3..8f656251b8d 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -8,6 +8,9 @@ * * Contributions after 2012-01-13 are licensed under the terms of the * GNU GPL, version 2 or (at your option) any later version. + * + * VT8231 south bridge support and general clean up to allow it + * Copyright (c) 2018-2020 BALATON Zoltan */ #include "qemu/osdep.h" @@ -30,7 +33,6 @@ #include "qemu/module.h" #include "qemu/range.h" #include "qemu/timer.h" -#include "exec/address-spaces.h" #include "trace.h" #define TYPE_VIA_PM "via-pm" @@ -265,15 +267,80 @@ static const TypeInfo vt8231_pm_info = { }; -typedef struct SuperIOConfig { +#define TYPE_VIA_SUPERIO "via-superio" +OBJECT_DECLARE_SIMPLE_TYPE(ViaSuperIOState, VIA_SUPERIO) + +struct ViaSuperIOState { + ISASuperIODevice superio; uint8_t regs[0x100]; + const MemoryRegionOps *io_ops; MemoryRegion io; -} SuperIOConfig; +}; + +static inline void via_superio_io_enable(ViaSuperIOState *s, bool enable) +{ + memory_region_set_enabled(&s->io, enable); +} + +static void via_superio_realize(DeviceState *d, Error **errp) +{ + ViaSuperIOState *s = VIA_SUPERIO(d); + ISASuperIOClass *ic = ISA_SUPERIO_GET_CLASS(s); + Error *local_err = NULL; + + assert(s->io_ops); + ic->parent_realize(d, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + memory_region_init_io(&s->io, OBJECT(d), s->io_ops, s, "via-superio", 2); + memory_region_set_enabled(&s->io, false); + /* The floppy also uses 0x3f0 and 0x3f1 but this seems to work anyway */ + memory_region_add_subregion(isa_address_space_io(ISA_DEVICE(s)), 0x3f0, + &s->io); +} + +static uint64_t via_superio_cfg_read(void *opaque, hwaddr addr, unsigned size) +{ + ViaSuperIOState *sc = opaque; + uint8_t idx = sc->regs[0]; + uint8_t val = sc->regs[idx]; + + if (addr == 0) { + return idx; + } + if (addr == 1 && idx == 0) { + val = 0; /* reading reg 0 where we store index value */ + } + trace_via_superio_read(idx, val); + return val; +} + +static void via_superio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass); + + sc->parent_realize = dc->realize; + dc->realize = via_superio_realize; +} + +static const TypeInfo via_superio_info = { + .name = TYPE_VIA_SUPERIO, + .parent = TYPE_ISA_SUPERIO, + .instance_size = sizeof(ViaSuperIOState), + .class_size = sizeof(ISASuperIOClass), + .class_init = via_superio_class_init, + .abstract = true, +}; -static void superio_cfg_write(void *opaque, hwaddr addr, uint64_t data, - unsigned size) +#define TYPE_VT82C686B_SUPERIO "vt82c686b-superio" + +static void vt82c686b_superio_cfg_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) { - SuperIOConfig *sc = opaque; + ViaSuperIOState *sc = opaque; uint8_t idx = sc->regs[0]; if (addr == 0) { /* config index register */ @@ -304,25 +371,105 @@ static void superio_cfg_write(void *opaque, hwaddr addr, uint64_t data, sc->regs[idx] = data; } -static uint64_t superio_cfg_read(void *opaque, hwaddr addr, unsigned size) +static const MemoryRegionOps vt82c686b_superio_cfg_ops = { + .read = via_superio_cfg_read, + .write = vt82c686b_superio_cfg_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static void vt82c686b_superio_reset(DeviceState *dev) { - SuperIOConfig *sc = opaque; + ViaSuperIOState *s = VIA_SUPERIO(dev); + + memset(s->regs, 0, sizeof(s->regs)); + /* Device ID */ + vt82c686b_superio_cfg_write(s, 0, 0xe0, 1); + vt82c686b_superio_cfg_write(s, 1, 0x3c, 1); + /* Function select - all disabled */ + vt82c686b_superio_cfg_write(s, 0, 0xe2, 1); + vt82c686b_superio_cfg_write(s, 1, 0x03, 1); + /* Floppy ctrl base addr 0x3f0-7 */ + vt82c686b_superio_cfg_write(s, 0, 0xe3, 1); + vt82c686b_superio_cfg_write(s, 1, 0xfc, 1); + /* Parallel port base addr 0x378-f */ + vt82c686b_superio_cfg_write(s, 0, 0xe6, 1); + vt82c686b_superio_cfg_write(s, 1, 0xde, 1); + /* Serial port 1 base addr 0x3f8-f */ + vt82c686b_superio_cfg_write(s, 0, 0xe7, 1); + vt82c686b_superio_cfg_write(s, 1, 0xfe, 1); + /* Serial port 2 base addr 0x2f8-f */ + vt82c686b_superio_cfg_write(s, 0, 0xe8, 1); + vt82c686b_superio_cfg_write(s, 1, 0xbe, 1); + + vt82c686b_superio_cfg_write(s, 0, 0, 1); +} + +static void vt82c686b_superio_init(Object *obj) +{ + VIA_SUPERIO(obj)->io_ops = &vt82c686b_superio_cfg_ops; +} + +static void vt82c686b_superio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass); + + dc->reset = vt82c686b_superio_reset; + sc->serial.count = 2; + sc->parallel.count = 1; + sc->ide.count = 0; /* emulated by via-ide */ + sc->floppy.count = 1; +} + +static const TypeInfo vt82c686b_superio_info = { + .name = TYPE_VT82C686B_SUPERIO, + .parent = TYPE_VIA_SUPERIO, + .instance_size = sizeof(ViaSuperIOState), + .instance_init = vt82c686b_superio_init, + .class_size = sizeof(ISASuperIOClass), + .class_init = vt82c686b_superio_class_init, +}; + + +#define TYPE_VT8231_SUPERIO "vt8231-superio" + +static void vt8231_superio_cfg_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + ViaSuperIOState *sc = opaque; uint8_t idx = sc->regs[0]; - uint8_t val = sc->regs[idx]; - if (addr == 0) { - return idx; + if (addr == 0) { /* config index register */ + sc->regs[0] = data; + return; } - if (addr == 1 && idx == 0) { - val = 0; /* reading reg 0 where we store index value */ + + /* config data register */ + trace_via_superio_write(idx, data); + switch (idx) { + case 0x00 ... 0xdf: + case 0xe7 ... 0xef: + case 0xf0 ... 0xf1: + case 0xf5: + case 0xf8: + case 0xfd: + /* ignore write to read only registers */ + return; + default: + qemu_log_mask(LOG_UNIMP, + "via_superio_cfg: unimplemented register 0x%x\n", idx); + break; } - trace_via_superio_read(idx, val); - return val; + sc->regs[idx] = data; } -static const MemoryRegionOps superio_cfg_ops = { - .read = superio_cfg_read, - .write = superio_cfg_write, +static const MemoryRegionOps vt8231_superio_cfg_ops = { + .read = via_superio_cfg_read, + .write = vt8231_superio_cfg_write, .endianness = DEVICE_NATIVE_ENDIAN, .impl = { .min_access_size = 1, @@ -330,47 +477,151 @@ static const MemoryRegionOps superio_cfg_ops = { }, }; +static void vt8231_superio_reset(DeviceState *dev) +{ + ViaSuperIOState *s = VIA_SUPERIO(dev); + + memset(s->regs, 0, sizeof(s->regs)); + /* Device ID */ + s->regs[0xf0] = 0x3c; + /* Device revision */ + s->regs[0xf1] = 0x01; + /* Function select - all disabled */ + vt8231_superio_cfg_write(s, 0, 0xf2, 1); + vt8231_superio_cfg_write(s, 1, 0x03, 1); + /* Serial port base addr */ + vt8231_superio_cfg_write(s, 0, 0xf4, 1); + vt8231_superio_cfg_write(s, 1, 0xfe, 1); + /* Parallel port base addr */ + vt8231_superio_cfg_write(s, 0, 0xf6, 1); + vt8231_superio_cfg_write(s, 1, 0xde, 1); + /* Floppy ctrl base addr */ + vt8231_superio_cfg_write(s, 0, 0xf7, 1); + vt8231_superio_cfg_write(s, 1, 0xfc, 1); + + vt8231_superio_cfg_write(s, 0, 0, 1); +} + +static void vt8231_superio_init(Object *obj) +{ + VIA_SUPERIO(obj)->io_ops = &vt8231_superio_cfg_ops; +} + +static uint16_t vt8231_superio_serial_iobase(ISASuperIODevice *sio, + uint8_t index) +{ + return 0x2f8; /* FIXME: This should be settable via registers f2-f4 */ +} + +static void vt8231_superio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass); + + dc->reset = vt8231_superio_reset; + sc->serial.count = 1; + sc->serial.get_iobase = vt8231_superio_serial_iobase; + sc->parallel.count = 1; + sc->ide.count = 0; /* emulated by via-ide */ + sc->floppy.count = 1; +} + +static const TypeInfo vt8231_superio_info = { + .name = TYPE_VT8231_SUPERIO, + .parent = TYPE_VIA_SUPERIO, + .instance_size = sizeof(ViaSuperIOState), + .instance_init = vt8231_superio_init, + .class_size = sizeof(ISASuperIOClass), + .class_init = vt8231_superio_class_init, +}; + -OBJECT_DECLARE_SIMPLE_TYPE(VT82C686BISAState, VT82C686B_ISA) +#define TYPE_VIA_ISA "via-isa" +OBJECT_DECLARE_SIMPLE_TYPE(ViaISAState, VIA_ISA) -struct VT82C686BISAState { +struct ViaISAState { PCIDevice dev; qemu_irq cpu_intr; - SuperIOConfig superio_cfg; + qemu_irq *isa_irqs; + ISABus *isa_bus; + ViaSuperIOState *via_sio; +}; + +static const VMStateDescription vmstate_via = { + .name = "via-isa", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(dev, ViaISAState), + VMSTATE_END_OF_LIST() + } }; +static const TypeInfo via_isa_info = { + .name = TYPE_VIA_ISA, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(ViaISAState), + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + +void via_isa_set_irq(PCIDevice *d, int n, int level) +{ + ViaISAState *s = VIA_ISA(d); + qemu_set_irq(s->isa_irqs[n], level); +} + static void via_isa_request_i8259_irq(void *opaque, int irq, int level) { - VT82C686BISAState *s = opaque; + ViaISAState *s = opaque; qemu_set_irq(s->cpu_intr, level); } +static void via_isa_realize(PCIDevice *d, Error **errp) +{ + ViaISAState *s = VIA_ISA(d); + DeviceState *dev = DEVICE(d); + qemu_irq *isa_irq; + int i; + + qdev_init_gpio_out(dev, &s->cpu_intr, 1); + isa_irq = qemu_allocate_irqs(via_isa_request_i8259_irq, s, 1); + s->isa_bus = isa_bus_new(dev, get_system_memory(), pci_address_space_io(d), + &error_fatal); + s->isa_irqs = i8259_init(s->isa_bus, *isa_irq); + isa_bus_irqs(s->isa_bus, s->isa_irqs); + i8254_pit_init(s->isa_bus, 0x40, 0, NULL); + i8257_dma_init(s->isa_bus, 0); + mc146818_rtc_init(s->isa_bus, 2000, NULL); + + for (i = 0; i < PCI_CONFIG_HEADER_SIZE; i++) { + if (i < PCI_COMMAND || i >= PCI_REVISION_ID) { + d->wmask[i] = 0; + } + } +} + +/* TYPE_VT82C686B_ISA */ + static void vt82c686b_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len) { - VT82C686BISAState *s = VT82C686B_ISA(d); + ViaISAState *s = VIA_ISA(d); trace_via_isa_write(addr, val, len); pci_default_write_config(d, addr, val, len); if (addr == 0x85) { /* BIT(1): enable or disable superio config io ports */ - memory_region_set_enabled(&s->superio_cfg.io, val & BIT(1)); + via_superio_io_enable(s->via_sio, val & BIT(1)); } } -static const VMStateDescription vmstate_via = { - .name = "vt82c686b", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(dev, VT82C686BISAState), - VMSTATE_END_OF_LIST() - } -}; - static void vt82c686b_isa_reset(DeviceState *dev) { - VT82C686BISAState *s = VT82C686B_ISA(dev); + ViaISAState *s = VIA_ISA(dev); uint8_t *pci_conf = s->dev.config; pci_set_long(pci_conf + PCI_CAPABILITY_LIST, 0x000000c0); @@ -386,51 +637,18 @@ static void vt82c686b_isa_reset(DeviceState *dev) pci_conf[0x5a] = 0x04; /* KBC/RTC Control*/ pci_conf[0x5f] = 0x04; pci_conf[0x77] = 0x10; /* GPIO Control 1/2/3/4 */ - - s->superio_cfg.regs[0xe0] = 0x3c; /* Device ID */ - s->superio_cfg.regs[0xe2] = 0x03; /* Function select */ - s->superio_cfg.regs[0xe3] = 0xfc; /* Floppy ctrl base addr */ - s->superio_cfg.regs[0xe6] = 0xde; /* Parallel port base addr */ - s->superio_cfg.regs[0xe7] = 0xfe; /* Serial port 1 base addr */ - s->superio_cfg.regs[0xe8] = 0xbe; /* Serial port 2 base addr */ } static void vt82c686b_realize(PCIDevice *d, Error **errp) { - VT82C686BISAState *s = VT82C686B_ISA(d); - DeviceState *dev = DEVICE(d); - ISABus *isa_bus; - qemu_irq *isa_irq; - int i; + ViaISAState *s = VIA_ISA(d); - qdev_init_gpio_out(dev, &s->cpu_intr, 1); - isa_irq = qemu_allocate_irqs(via_isa_request_i8259_irq, s, 1); - isa_bus = isa_bus_new(dev, get_system_memory(), pci_address_space_io(d), - &error_fatal); - isa_bus_irqs(isa_bus, i8259_init(isa_bus, *isa_irq)); - i8254_pit_init(isa_bus, 0x40, 0, NULL); - i8257_dma_init(isa_bus, 0); - isa_create_simple(isa_bus, TYPE_VT82C686B_SUPERIO); - mc146818_rtc_init(isa_bus, 2000, NULL); - - for (i = 0; i < PCI_CONFIG_HEADER_SIZE; i++) { - if (i < PCI_COMMAND || i >= PCI_REVISION_ID) { - d->wmask[i] = 0; - } - } - - memory_region_init_io(&s->superio_cfg.io, OBJECT(d), &superio_cfg_ops, - &s->superio_cfg, "superio_cfg", 2); - memory_region_set_enabled(&s->superio_cfg.io, false); - /* - * The floppy also uses 0x3f0 and 0x3f1. - * But we do not emulate a floppy, so just set it here. - */ - memory_region_add_subregion(isa_bus->address_space_io, 0x3f0, - &s->superio_cfg.io); + via_isa_realize(d, errp); + s->via_sio = VIA_SUPERIO(isa_create_simple(s->isa_bus, + TYPE_VT82C686B_SUPERIO)); } -static void via_class_init(ObjectClass *klass, void *data) +static void vt82c686b_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -438,47 +656,85 @@ static void via_class_init(ObjectClass *klass, void *data) k->realize = vt82c686b_realize; k->config_write = vt82c686b_write_config; k->vendor_id = PCI_VENDOR_ID_VIA; - k->device_id = PCI_DEVICE_ID_VIA_ISA_BRIDGE; + k->device_id = PCI_DEVICE_ID_VIA_82C686B_ISA; k->class_id = PCI_CLASS_BRIDGE_ISA; k->revision = 0x40; dc->reset = vt82c686b_isa_reset; dc->desc = "ISA bridge"; dc->vmsd = &vmstate_via; - /* - * Reason: part of VIA VT82C686 southbridge, needs to be wired up, - * e.g. by mips_fuloong2e_init() - */ + /* Reason: part of VIA VT82C686 southbridge, needs to be wired up */ dc->user_creatable = false; } -static const TypeInfo via_info = { +static const TypeInfo vt82c686b_isa_info = { .name = TYPE_VT82C686B_ISA, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(VT82C686BISAState), - .class_init = via_class_init, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { }, - }, + .parent = TYPE_VIA_ISA, + .instance_size = sizeof(ViaISAState), + .class_init = vt82c686b_class_init, }; +/* TYPE_VT8231_ISA */ -static void vt82c686b_superio_class_init(ObjectClass *klass, void *data) +static void vt8231_write_config(PCIDevice *d, uint32_t addr, + uint32_t val, int len) { - ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass); + ViaISAState *s = VIA_ISA(d); - sc->serial.count = 2; - sc->parallel.count = 1; - sc->ide.count = 0; - sc->floppy.count = 1; + trace_via_isa_write(addr, val, len); + pci_default_write_config(d, addr, val, len); + if (addr == 0x50) { + /* BIT(2): enable or disable superio config io ports */ + via_superio_io_enable(s->via_sio, val & BIT(2)); + } } -static const TypeInfo via_superio_info = { - .name = TYPE_VT82C686B_SUPERIO, - .parent = TYPE_ISA_SUPERIO, - .instance_size = sizeof(ISASuperIODevice), - .class_size = sizeof(ISASuperIOClass), - .class_init = vt82c686b_superio_class_init, +static void vt8231_isa_reset(DeviceState *dev) +{ + ViaISAState *s = VIA_ISA(dev); + uint8_t *pci_conf = s->dev.config; + + pci_set_long(pci_conf + PCI_CAPABILITY_LIST, 0x000000c0); + pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | PCI_COMMAND_SPECIAL); + pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_DEVSEL_MEDIUM); + + pci_conf[0x58] = 0x40; /* Miscellaneous Control 0 */ + pci_conf[0x67] = 0x08; /* Fast IR Config */ + pci_conf[0x6b] = 0x01; /* Fast IR I/O Base */ +} + +static void vt8231_realize(PCIDevice *d, Error **errp) +{ + ViaISAState *s = VIA_ISA(d); + + via_isa_realize(d, errp); + s->via_sio = VIA_SUPERIO(isa_create_simple(s->isa_bus, + TYPE_VT8231_SUPERIO)); +} + +static void vt8231_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = vt8231_realize; + k->config_write = vt8231_write_config; + k->vendor_id = PCI_VENDOR_ID_VIA; + k->device_id = PCI_DEVICE_ID_VIA_8231_ISA; + k->class_id = PCI_CLASS_BRIDGE_ISA; + k->revision = 0x10; + dc->reset = vt8231_isa_reset; + dc->desc = "ISA bridge"; + dc->vmsd = &vmstate_via; + /* Reason: part of VIA VT8231 southbridge, needs to be wired up */ + dc->user_creatable = false; +} + +static const TypeInfo vt8231_isa_info = { + .name = TYPE_VT8231_ISA, + .parent = TYPE_VIA_ISA, + .instance_size = sizeof(ViaISAState), + .class_init = vt8231_class_init, }; @@ -487,8 +743,12 @@ static void vt82c686b_register_types(void) type_register_static(&via_pm_info); type_register_static(&vt82c686b_pm_info); type_register_static(&vt8231_pm_info); - type_register_static(&via_info); type_register_static(&via_superio_info); + type_register_static(&vt82c686b_superio_info); + type_register_static(&vt8231_superio_info); + type_register_static(&via_isa_info); + type_register_static(&vt82c686b_isa_info); + type_register_static(&vt8231_isa_info); } type_init(vt82c686b_register_types) diff --git a/hw/lm32/Kconfig b/hw/lm32/Kconfig deleted file mode 100644 index 8ac94205d71..00000000000 --- a/hw/lm32/Kconfig +++ /dev/null @@ -1,18 +0,0 @@ -config LM32_DEVICES - bool - select PTIMER - -config MILKYMIST - bool - # FIXME: disabling it results in compile-time errors - select MILKYMIST_TMU2 if OPENGL && X11 - select PFLASH_CFI01 - select FRAMEBUFFER - select SD - select USB_OHCI - select LM32_DEVICES - -config LM32_EVR - bool - select LM32_DEVICES - select PFLASH_CFI02 diff --git a/hw/lm32/lm32.h b/hw/lm32/lm32.h deleted file mode 100644 index 7b4f6255b91..00000000000 --- a/hw/lm32/lm32.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef HW_LM32_H -#define HW_LM32_H - -#include "hw/char/lm32_juart.h" -#include "hw/qdev-properties.h" -#include "qapi/error.h" - -static inline DeviceState *lm32_pic_init(qemu_irq cpu_irq) -{ - DeviceState *dev; - SysBusDevice *d; - - dev = qdev_new("lm32-pic"); - d = SYS_BUS_DEVICE(dev); - sysbus_realize_and_unref(d, &error_fatal); - sysbus_connect_irq(d, 0, cpu_irq); - - return dev; -} - -static inline DeviceState *lm32_juart_init(Chardev *chr) -{ - DeviceState *dev; - - dev = qdev_new(TYPE_LM32_JUART); - qdev_prop_set_chr(dev, "chardev", chr); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - - return dev; -} - -static inline DeviceState *lm32_uart_create(hwaddr addr, - qemu_irq irq, - Chardev *chr) -{ - DeviceState *dev; - SysBusDevice *s; - - dev = qdev_new("lm32-uart"); - s = SYS_BUS_DEVICE(dev); - qdev_prop_set_chr(dev, "chardev", chr); - sysbus_realize_and_unref(s, &error_fatal); - sysbus_mmio_map(s, 0, addr); - sysbus_connect_irq(s, 0, irq); - return dev; -} - -#endif diff --git a/hw/lm32/lm32_boards.c b/hw/lm32/lm32_boards.c deleted file mode 100644 index b5d97dd53ed..00000000000 --- a/hw/lm32/lm32_boards.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * QEMU models for LatticeMico32 uclinux and evr32 boards. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include "qemu/units.h" -#include "qemu/cutils.h" -#include "qemu/error-report.h" -#include "cpu.h" -#include "hw/sysbus.h" -#include "hw/irq.h" -#include "hw/block/flash.h" -#include "hw/boards.h" -#include "hw/loader.h" -#include "elf.h" -#include "lm32_hwsetup.h" -#include "lm32.h" -#include "exec/address-spaces.h" -#include "sysemu/reset.h" -#include "sysemu/sysemu.h" - -typedef struct { - LM32CPU *cpu; - hwaddr bootstrap_pc; - hwaddr flash_base; - hwaddr hwsetup_base; - hwaddr initrd_base; - size_t initrd_size; - hwaddr cmdline_base; -} ResetInfo; - -static void cpu_irq_handler(void *opaque, int irq, int level) -{ - LM32CPU *cpu = opaque; - CPUState *cs = CPU(cpu); - - if (level) { - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } -} - -static void main_cpu_reset(void *opaque) -{ - ResetInfo *reset_info = opaque; - CPULM32State *env = &reset_info->cpu->env; - - cpu_reset(CPU(reset_info->cpu)); - - /* init defaults */ - env->pc = (uint32_t)reset_info->bootstrap_pc; - env->regs[R_R1] = (uint32_t)reset_info->hwsetup_base; - env->regs[R_R2] = (uint32_t)reset_info->cmdline_base; - env->regs[R_R3] = (uint32_t)reset_info->initrd_base; - env->regs[R_R4] = (uint32_t)(reset_info->initrd_base + - reset_info->initrd_size); - env->eba = reset_info->flash_base; - env->deba = reset_info->flash_base; -} - -static void lm32_evr_init(MachineState *machine) -{ - MachineClass *mc = MACHINE_GET_CLASS(machine); - const char *kernel_filename = machine->kernel_filename; - LM32CPU *cpu; - CPULM32State *env; - DriveInfo *dinfo; - MemoryRegion *address_space_mem = get_system_memory(); - qemu_irq irq[32]; - ResetInfo *reset_info; - int i; - - if (machine->ram_size != mc->default_ram_size) { - char *sz = size_to_str(mc->default_ram_size); - error_report("Invalid RAM size, should be %s", sz); - g_free(sz); - exit(EXIT_FAILURE); - } - - /* memory map */ - hwaddr flash_base = 0x04000000; - size_t flash_sector_size = 256 * KiB; - size_t flash_size = 32 * MiB; - hwaddr ram_base = 0x08000000; - hwaddr timer0_base = 0x80002000; - hwaddr uart0_base = 0x80006000; - hwaddr timer1_base = 0x8000a000; - int uart0_irq = 0; - int timer0_irq = 1; - int timer1_irq = 3; - - reset_info = g_malloc0(sizeof(ResetInfo)); - - cpu = LM32_CPU(cpu_create(machine->cpu_type)); - - env = &cpu->env; - reset_info->cpu = cpu; - - reset_info->flash_base = flash_base; - - memory_region_add_subregion(address_space_mem, ram_base, machine->ram); - - dinfo = drive_get(IF_PFLASH, 0, 0); - /* Spansion S29NS128P */ - pflash_cfi02_register(flash_base, "lm32_evr.flash", flash_size, - dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, - flash_sector_size, - 1, 2, 0x01, 0x7e, 0x43, 0x00, 0x555, 0x2aa, 1); - - /* create irq lines */ - env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, cpu, 0)); - for (i = 0; i < 32; i++) { - irq[i] = qdev_get_gpio_in(env->pic_state, i); - } - - lm32_uart_create(uart0_base, irq[uart0_irq], serial_hd(0)); - sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]); - sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]); - - /* make sure juart isn't the first chardev */ - env->juart_state = lm32_juart_init(serial_hd(1)); - - reset_info->bootstrap_pc = flash_base; - - if (kernel_filename) { - uint64_t entry; - int kernel_size; - - kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, - &entry, NULL, NULL, NULL, - 1, EM_LATTICEMICO32, 0, 0); - reset_info->bootstrap_pc = entry; - - if (kernel_size < 0) { - kernel_size = load_image_targphys(kernel_filename, ram_base, - machine->ram_size); - reset_info->bootstrap_pc = ram_base; - } - - if (kernel_size < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - } - - qemu_register_reset(main_cpu_reset, reset_info); -} - -static void lm32_uclinux_init(MachineState *machine) -{ - MachineClass *mc = MACHINE_GET_CLASS(machine); - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - const char *initrd_filename = machine->initrd_filename; - LM32CPU *cpu; - CPULM32State *env; - DriveInfo *dinfo; - MemoryRegion *address_space_mem = get_system_memory(); - qemu_irq irq[32]; - HWSetup *hw; - ResetInfo *reset_info; - int i; - - if (machine->ram_size != mc->default_ram_size) { - char *sz = size_to_str(mc->default_ram_size); - error_report("Invalid RAM size, should be %s", sz); - g_free(sz); - exit(EXIT_FAILURE); - } - - /* memory map */ - hwaddr flash_base = 0x04000000; - size_t flash_sector_size = 256 * KiB; - size_t flash_size = 32 * MiB; - hwaddr ram_base = 0x08000000; - hwaddr uart0_base = 0x80000000; - hwaddr timer0_base = 0x80002000; - hwaddr timer1_base = 0x80010000; - hwaddr timer2_base = 0x80012000; - int uart0_irq = 0; - int timer0_irq = 1; - int timer1_irq = 20; - int timer2_irq = 21; - hwaddr hwsetup_base = 0x0bffe000; - hwaddr cmdline_base = 0x0bfff000; - hwaddr initrd_base = 0x08400000; - size_t initrd_max = 0x01000000; - - reset_info = g_malloc0(sizeof(ResetInfo)); - - cpu = LM32_CPU(cpu_create(machine->cpu_type)); - - env = &cpu->env; - reset_info->cpu = cpu; - - reset_info->flash_base = flash_base; - - memory_region_add_subregion(address_space_mem, ram_base, machine->ram); - - dinfo = drive_get(IF_PFLASH, 0, 0); - /* Spansion S29NS128P */ - pflash_cfi02_register(flash_base, "lm32_uclinux.flash", flash_size, - dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, - flash_sector_size, - 1, 2, 0x01, 0x7e, 0x43, 0x00, 0x555, 0x2aa, 1); - - /* create irq lines */ - env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, env, 0)); - for (i = 0; i < 32; i++) { - irq[i] = qdev_get_gpio_in(env->pic_state, i); - } - - lm32_uart_create(uart0_base, irq[uart0_irq], serial_hd(0)); - sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]); - sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]); - sysbus_create_simple("lm32-timer", timer2_base, irq[timer2_irq]); - - /* make sure juart isn't the first chardev */ - env->juart_state = lm32_juart_init(serial_hd(1)); - - reset_info->bootstrap_pc = flash_base; - - if (kernel_filename) { - uint64_t entry; - int kernel_size; - - kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, - &entry, NULL, NULL, NULL, - 1, EM_LATTICEMICO32, 0, 0); - reset_info->bootstrap_pc = entry; - - if (kernel_size < 0) { - kernel_size = load_image_targphys(kernel_filename, ram_base, - machine->ram_size); - reset_info->bootstrap_pc = ram_base; - } - - if (kernel_size < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - } - - /* generate a rom with the hardware description */ - hw = hwsetup_init(); - hwsetup_add_cpu(hw, "LM32", 75000000); - hwsetup_add_flash(hw, "flash", flash_base, flash_size); - hwsetup_add_ddr_sdram(hw, "ddr_sdram", ram_base, machine->ram_size); - hwsetup_add_timer(hw, "timer0", timer0_base, timer0_irq); - hwsetup_add_timer(hw, "timer1_dev_only", timer1_base, timer1_irq); - hwsetup_add_timer(hw, "timer2_dev_only", timer2_base, timer2_irq); - hwsetup_add_uart(hw, "uart", uart0_base, uart0_irq); - hwsetup_add_trailer(hw); - hwsetup_create_rom(hw, hwsetup_base); - hwsetup_free(hw); - - reset_info->hwsetup_base = hwsetup_base; - - if (kernel_cmdline && strlen(kernel_cmdline)) { - pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE, - kernel_cmdline); - reset_info->cmdline_base = cmdline_base; - } - - if (initrd_filename) { - size_t initrd_size; - initrd_size = load_image_targphys(initrd_filename, initrd_base, - initrd_max); - reset_info->initrd_base = initrd_base; - reset_info->initrd_size = initrd_size; - } - - qemu_register_reset(main_cpu_reset, reset_info); -} - -static void lm32_evr_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "LatticeMico32 EVR32 eval system"; - mc->init = lm32_evr_init; - mc->is_default = true; - mc->default_cpu_type = LM32_CPU_TYPE_NAME("lm32-full"); - mc->default_ram_size = 64 * MiB; - mc->default_ram_id = "lm32_evr.sdram"; -} - -static const TypeInfo lm32_evr_type = { - .name = MACHINE_TYPE_NAME("lm32-evr"), - .parent = TYPE_MACHINE, - .class_init = lm32_evr_class_init, -}; - -static void lm32_uclinux_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "lm32 platform for uClinux and u-boot by Theobroma Systems"; - mc->init = lm32_uclinux_init; - mc->default_cpu_type = LM32_CPU_TYPE_NAME("lm32-full"); - mc->default_ram_size = 64 * MiB; - mc->default_ram_id = "lm32_uclinux.sdram"; -} - -static const TypeInfo lm32_uclinux_type = { - .name = MACHINE_TYPE_NAME("lm32-uclinux"), - .parent = TYPE_MACHINE, - .class_init = lm32_uclinux_class_init, -}; - -static void lm32_machine_init(void) -{ - type_register_static(&lm32_evr_type); - type_register_static(&lm32_uclinux_type); -} - -type_init(lm32_machine_init) diff --git a/hw/lm32/lm32_hwsetup.h b/hw/lm32/lm32_hwsetup.h deleted file mode 100644 index e6cd30ad686..00000000000 --- a/hw/lm32/lm32_hwsetup.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * LatticeMico32 hwsetup helper functions. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -/* - * These are helper functions for creating the hardware description blob used - * in the Theobroma's uClinux port. - */ - -#ifndef QEMU_HW_LM32_HWSETUP_H -#define QEMU_HW_LM32_HWSETUP_H - -#include "qemu/cutils.h" -#include "hw/loader.h" - -typedef struct { - void *data; - void *ptr; -} HWSetup; - -enum hwsetup_tag { - HWSETUP_TAG_EOL = 0, - HWSETUP_TAG_CPU = 1, - HWSETUP_TAG_ASRAM = 2, - HWSETUP_TAG_FLASH = 3, - HWSETUP_TAG_SDRAM = 4, - HWSETUP_TAG_OCM = 5, - HWSETUP_TAG_DDR_SDRAM = 6, - HWSETUP_TAG_DDR2_SDRAM = 7, - HWSETUP_TAG_TIMER = 8, - HWSETUP_TAG_UART = 9, - HWSETUP_TAG_GPIO = 10, - HWSETUP_TAG_TRISPEEDMAC = 11, - HWSETUP_TAG_I2CM = 12, - HWSETUP_TAG_LEDS = 13, - HWSETUP_TAG_7SEG = 14, - HWSETUP_TAG_SPI_S = 15, - HWSETUP_TAG_SPI_M = 16, -}; - -static inline HWSetup *hwsetup_init(void) -{ - HWSetup *hw; - - hw = g_malloc(sizeof(HWSetup)); - hw->data = g_malloc0(TARGET_PAGE_SIZE); - hw->ptr = hw->data; - - return hw; -} - -static inline void hwsetup_free(HWSetup *hw) -{ - g_free(hw->data); - g_free(hw); -} - -static inline void hwsetup_create_rom(HWSetup *hw, - hwaddr base) -{ - rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE, base, NULL, NULL, NULL, NULL, true); -} - -static inline void hwsetup_add_u8(HWSetup *hw, uint8_t u) -{ - stb_p(hw->ptr, u); - hw->ptr += 1; -} - -static inline void hwsetup_add_u32(HWSetup *hw, uint32_t u) -{ - stl_p(hw->ptr, u); - hw->ptr += 4; -} - -static inline void hwsetup_add_tag(HWSetup *hw, enum hwsetup_tag t) -{ - stl_p(hw->ptr, t); - hw->ptr += 4; -} - -static inline void hwsetup_add_str(HWSetup *hw, const char *str) -{ - pstrcpy(hw->ptr, 32, str); - hw->ptr += 32; -} - -static inline void hwsetup_add_trailer(HWSetup *hw) -{ - hwsetup_add_u32(hw, 8); /* size */ - hwsetup_add_tag(hw, HWSETUP_TAG_EOL); -} - -static inline void hwsetup_add_cpu(HWSetup *hw, - const char *name, uint32_t frequency) -{ - hwsetup_add_u32(hw, 44); /* size */ - hwsetup_add_tag(hw, HWSETUP_TAG_CPU); - hwsetup_add_str(hw, name); - hwsetup_add_u32(hw, frequency); -} - -static inline void hwsetup_add_flash(HWSetup *hw, - const char *name, uint32_t base, uint32_t size) -{ - hwsetup_add_u32(hw, 52); /* size */ - hwsetup_add_tag(hw, HWSETUP_TAG_FLASH); - hwsetup_add_str(hw, name); - hwsetup_add_u32(hw, base); - hwsetup_add_u32(hw, size); - hwsetup_add_u8(hw, 8); /* read latency */ - hwsetup_add_u8(hw, 8); /* write latency */ - hwsetup_add_u8(hw, 25); /* address width */ - hwsetup_add_u8(hw, 32); /* data width */ -} - -static inline void hwsetup_add_ddr_sdram(HWSetup *hw, - const char *name, uint32_t base, uint32_t size) -{ - hwsetup_add_u32(hw, 48); /* size */ - hwsetup_add_tag(hw, HWSETUP_TAG_DDR_SDRAM); - hwsetup_add_str(hw, name); - hwsetup_add_u32(hw, base); - hwsetup_add_u32(hw, size); -} - -static inline void hwsetup_add_timer(HWSetup *hw, - const char *name, uint32_t base, uint32_t irq) -{ - hwsetup_add_u32(hw, 56); /* size */ - hwsetup_add_tag(hw, HWSETUP_TAG_TIMER); - hwsetup_add_str(hw, name); - hwsetup_add_u32(hw, base); - hwsetup_add_u8(hw, 1); /* wr_tickcount */ - hwsetup_add_u8(hw, 1); /* rd_tickcount */ - hwsetup_add_u8(hw, 1); /* start_stop_control */ - hwsetup_add_u8(hw, 32); /* counter_width */ - hwsetup_add_u32(hw, 20); /* reload_ticks */ - hwsetup_add_u8(hw, irq); - hwsetup_add_u8(hw, 0); /* padding */ - hwsetup_add_u8(hw, 0); /* padding */ - hwsetup_add_u8(hw, 0); /* padding */ -} - -static inline void hwsetup_add_uart(HWSetup *hw, - const char *name, uint32_t base, uint32_t irq) -{ - hwsetup_add_u32(hw, 56); /* size */ - hwsetup_add_tag(hw, HWSETUP_TAG_UART); - hwsetup_add_str(hw, name); - hwsetup_add_u32(hw, base); - hwsetup_add_u32(hw, 115200); /* baudrate */ - hwsetup_add_u8(hw, 8); /* databits */ - hwsetup_add_u8(hw, 1); /* stopbits */ - hwsetup_add_u8(hw, 1); /* use_interrupt */ - hwsetup_add_u8(hw, 1); /* block_on_transmit */ - hwsetup_add_u8(hw, 1); /* block_on_receive */ - hwsetup_add_u8(hw, 4); /* rx_buffer_size */ - hwsetup_add_u8(hw, 4); /* tx_buffer_size */ - hwsetup_add_u8(hw, irq); -} - -#endif /* QEMU_HW_LM32_HWSETUP_H */ diff --git a/hw/lm32/meson.build b/hw/lm32/meson.build deleted file mode 100644 index 42d6f8db3d9..00000000000 --- a/hw/lm32/meson.build +++ /dev/null @@ -1,6 +0,0 @@ -lm32_ss = ss.source_set() -# LM32 boards -lm32_ss.add(when: 'CONFIG_LM32_EVR', if_true: files('lm32_boards.c')) -lm32_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist.c')) - -hw_arch += {'lm32': lm32_ss} diff --git a/hw/lm32/milkymist-hw.h b/hw/lm32/milkymist-hw.h deleted file mode 100644 index 5dca5d52f57..00000000000 --- a/hw/lm32/milkymist-hw.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef QEMU_HW_MILKYMIST_HW_H -#define QEMU_HW_MILKYMIST_HW_H - -#include "hw/qdev-core.h" -#include "net/net.h" -#include "qapi/error.h" - -static inline DeviceState *milkymist_uart_create(hwaddr base, - qemu_irq irq, - Chardev *chr) -{ - DeviceState *dev; - - dev = qdev_new("milkymist-uart"); - qdev_prop_set_chr(dev, "chardev", chr); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq); - - return dev; -} - -static inline DeviceState *milkymist_hpdmc_create(hwaddr base) -{ - DeviceState *dev; - - dev = qdev_new("milkymist-hpdmc"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - - return dev; -} - -static inline DeviceState *milkymist_vgafb_create(hwaddr base, - uint32_t fb_offset, uint32_t fb_mask) -{ - DeviceState *dev; - - dev = qdev_new("milkymist-vgafb"); - qdev_prop_set_uint32(dev, "fb_offset", fb_offset); - qdev_prop_set_uint32(dev, "fb_mask", fb_mask); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - - return dev; -} - -static inline DeviceState *milkymist_sysctl_create(hwaddr base, - qemu_irq gpio_irq, qemu_irq timer0_irq, qemu_irq timer1_irq, - uint32_t freq_hz, uint32_t system_id, uint32_t capabilities, - uint32_t gpio_strappings) -{ - DeviceState *dev; - - dev = qdev_new("milkymist-sysctl"); - qdev_prop_set_uint32(dev, "frequency", freq_hz); - qdev_prop_set_uint32(dev, "systemid", system_id); - qdev_prop_set_uint32(dev, "capabilities", capabilities); - qdev_prop_set_uint32(dev, "gpio_strappings", gpio_strappings); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, gpio_irq); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, timer0_irq); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, timer1_irq); - - return dev; -} - -static inline DeviceState *milkymist_pfpu_create(hwaddr base, - qemu_irq irq) -{ - DeviceState *dev; - - dev = qdev_new("milkymist-pfpu"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq); - return dev; -} - -static inline DeviceState *milkymist_ac97_create(hwaddr base, - qemu_irq crrequest_irq, qemu_irq crreply_irq, qemu_irq dmar_irq, - qemu_irq dmaw_irq) -{ - DeviceState *dev; - - dev = qdev_new("milkymist-ac97"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, crrequest_irq); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, crreply_irq); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, dmar_irq); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 3, dmaw_irq); - - return dev; -} - -static inline DeviceState *milkymist_minimac2_create(hwaddr base, - hwaddr buffers_base, qemu_irq rx_irq, qemu_irq tx_irq) -{ - DeviceState *dev; - - qemu_check_nic_model(&nd_table[0], "minimac2"); - dev = qdev_new("milkymist-minimac2"); - qdev_set_nic_properties(dev, &nd_table[0]); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, buffers_base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, rx_irq); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, tx_irq); - - return dev; -} - -static inline DeviceState *milkymist_softusb_create(hwaddr base, - qemu_irq irq, uint32_t pmem_base, uint32_t pmem_size, - uint32_t dmem_base, uint32_t dmem_size) -{ - DeviceState *dev; - - dev = qdev_new("milkymist-softusb"); - qdev_prop_set_uint32(dev, "pmem_size", pmem_size); - qdev_prop_set_uint32(dev, "dmem_size", dmem_size); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, pmem_base); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, dmem_base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq); - - return dev; -} - -#endif /* QEMU_HW_MILKYMIST_HW_H */ diff --git a/hw/lm32/milkymist.c b/hw/lm32/milkymist.c deleted file mode 100644 index 72d1326531a..00000000000 --- a/hw/lm32/milkymist.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * QEMU model for the Milkymist board. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include "qemu/units.h" -#include "qemu/error-report.h" -#include "qemu-common.h" -#include "qemu/datadir.h" -#include "cpu.h" -#include "hw/sysbus.h" -#include "hw/irq.h" -#include "hw/block/flash.h" -#include "sysemu/sysemu.h" -#include "sysemu/qtest.h" -#include "sysemu/reset.h" -#include "hw/boards.h" -#include "hw/loader.h" -#include "hw/qdev-properties.h" -#include "elf.h" -#include "milkymist-hw.h" -#include "hw/display/milkymist_tmu2.h" -#include "hw/sd/sd.h" -#include "lm32.h" -#include "exec/address-spaces.h" -#include "qemu/cutils.h" - -#define BIOS_FILENAME "mmone-bios.bin" -#define BIOS_OFFSET 0x00860000 -#define BIOS_SIZE (512 * KiB) -#define KERNEL_LOAD_ADDR 0x40000000 - -typedef struct { - LM32CPU *cpu; - hwaddr bootstrap_pc; - hwaddr flash_base; - hwaddr initrd_base; - size_t initrd_size; - hwaddr cmdline_base; -} ResetInfo; - -static void cpu_irq_handler(void *opaque, int irq, int level) -{ - LM32CPU *cpu = opaque; - CPUState *cs = CPU(cpu); - - if (level) { - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } -} - -static void main_cpu_reset(void *opaque) -{ - ResetInfo *reset_info = opaque; - CPULM32State *env = &reset_info->cpu->env; - - cpu_reset(CPU(reset_info->cpu)); - - /* init defaults */ - env->pc = reset_info->bootstrap_pc; - env->regs[R_R1] = reset_info->cmdline_base; - env->regs[R_R2] = reset_info->initrd_base; - env->regs[R_R3] = reset_info->initrd_base + reset_info->initrd_size; - env->eba = reset_info->flash_base; - env->deba = reset_info->flash_base; -} - -static DeviceState *milkymist_memcard_create(hwaddr base) -{ - DeviceState *dev; - DriveInfo *dinfo; - - dev = qdev_new("milkymist-memcard"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - - dinfo = drive_get_next(IF_SD); - if (dinfo) { - DeviceState *card; - - card = qdev_new(TYPE_SD_CARD); - qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo), - &error_fatal); - qdev_realize_and_unref(card, qdev_get_child_bus(dev, "sd-bus"), - &error_fatal); - } - - return dev; -} - -static void -milkymist_init(MachineState *machine) -{ - MachineClass *mc = MACHINE_GET_CLASS(machine); - const char *bios_name = machine->firmware ?: BIOS_FILENAME; - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - const char *initrd_filename = machine->initrd_filename; - LM32CPU *cpu; - CPULM32State *env; - int kernel_size; - DriveInfo *dinfo; - MemoryRegion *address_space_mem = get_system_memory(); - qemu_irq irq[32]; - int i; - char *bios_filename; - ResetInfo *reset_info; - - if (machine->ram_size != mc->default_ram_size) { - char *sz = size_to_str(mc->default_ram_size); - error_report("Invalid RAM size, should be %s", sz); - g_free(sz); - exit(EXIT_FAILURE); - } - - /* memory map */ - hwaddr flash_base = 0x00000000; - size_t flash_sector_size = 128 * KiB; - size_t flash_size = 32 * MiB; - hwaddr sdram_base = 0x40000000; - - hwaddr initrd_base = sdram_base + 0x1002000; - hwaddr cmdline_base = sdram_base + 0x1000000; - size_t initrd_max = machine->ram_size - 0x1002000; - - reset_info = g_malloc0(sizeof(ResetInfo)); - - cpu = LM32_CPU(cpu_create(machine->cpu_type)); - - env = &cpu->env; - reset_info->cpu = cpu; - - cpu_lm32_set_phys_msb_ignore(env, 1); - - memory_region_add_subregion(address_space_mem, sdram_base, machine->ram); - - dinfo = drive_get(IF_PFLASH, 0, 0); - /* Numonyx JS28F256J3F105 */ - pflash_cfi01_register(flash_base, "milkymist.flash", flash_size, - dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, - flash_sector_size, 2, 0x00, 0x89, 0x00, 0x1d, 1); - - /* create irq lines */ - env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, cpu, 0)); - for (i = 0; i < 32; i++) { - irq[i] = qdev_get_gpio_in(env->pic_state, i); - } - - /* load bios rom */ - bios_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); - - if (bios_filename) { - if (load_image_targphys(bios_filename, BIOS_OFFSET, BIOS_SIZE) < 0) { - error_report("could not load bios '%s'", bios_filename); - exit(1); - } - } - - reset_info->bootstrap_pc = BIOS_OFFSET; - - /* if no kernel is given no valid bios rom is a fatal error */ - if (!kernel_filename && !dinfo && !bios_filename && !qtest_enabled()) { - error_report("could not load Milkymist One bios '%s'", bios_name); - exit(1); - } - g_free(bios_filename); - - milkymist_uart_create(0x60000000, irq[0], serial_hd(0)); - milkymist_sysctl_create(0x60001000, irq[1], irq[2], irq[3], - 80000000, 0x10014d31, 0x0000041f, 0x00000001); - milkymist_hpdmc_create(0x60002000); - milkymist_vgafb_create(0x60003000, 0x40000000, 0x0fffffff); - milkymist_memcard_create(0x60004000); - milkymist_ac97_create(0x60005000, irq[4], irq[5], irq[6], irq[7]); - milkymist_pfpu_create(0x60006000, irq[8]); - if (machine->enable_graphics) { - milkymist_tmu2_create(0x60007000, irq[9]); - } - milkymist_minimac2_create(0x60008000, 0x30000000, irq[10], irq[11]); - milkymist_softusb_create(0x6000f000, irq[15], - 0x20000000, 0x1000, 0x20020000, 0x2000); - - /* make sure juart isn't the first chardev */ - env->juart_state = lm32_juart_init(serial_hd(1)); - - if (kernel_filename) { - uint64_t entry; - - /* Boots a kernel elf binary. */ - kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, - &entry, NULL, NULL, NULL, - 1, EM_LATTICEMICO32, 0, 0); - reset_info->bootstrap_pc = entry; - - if (kernel_size < 0) { - kernel_size = load_image_targphys(kernel_filename, sdram_base, - machine->ram_size); - reset_info->bootstrap_pc = sdram_base; - } - - if (kernel_size < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - } - - if (kernel_cmdline && strlen(kernel_cmdline)) { - pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE, - kernel_cmdline); - reset_info->cmdline_base = (uint32_t)cmdline_base; - } - - if (initrd_filename) { - size_t initrd_size; - initrd_size = load_image_targphys(initrd_filename, initrd_base, - initrd_max); - reset_info->initrd_base = (uint32_t)initrd_base; - reset_info->initrd_size = (uint32_t)initrd_size; - } - - qemu_register_reset(main_cpu_reset, reset_info); -} - -static void milkymist_machine_init(MachineClass *mc) -{ - mc->desc = "Milkymist One"; - mc->init = milkymist_init; - mc->default_cpu_type = LM32_CPU_TYPE_NAME("lm32-full"); - mc->default_ram_size = 128 * MiB; - mc->default_ram_id = "milkymist.sdram"; -} - -DEFINE_MACHINE("milkymist", milkymist_machine_init) diff --git a/hw/m68k/an5206.c b/hw/m68k/an5206.c index 673898b0eac..11ae4c9795b 100644 --- a/hw/m68k/an5206.c +++ b/hw/m68k/an5206.c @@ -13,7 +13,6 @@ #include "hw/boards.h" #include "hw/loader.h" #include "elf.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "sysemu/qtest.h" diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c index 7a03c71059b..93812ee206e 100644 --- a/hw/m68k/mcf5208.c +++ b/hw/m68k/mcf5208.c @@ -26,7 +26,6 @@ #include "hw/loader.h" #include "hw/sysbus.h" #include "elf.h" -#include "exec/address-spaces.h" #define SYS_FREQ 166666666 diff --git a/hw/m68k/mcf_intc.c b/hw/m68k/mcf_intc.c index cf02f57a711..4cd30188c05 100644 --- a/hw/m68k/mcf_intc.c +++ b/hw/m68k/mcf_intc.c @@ -11,7 +11,6 @@ #include "qemu/module.h" #include "qemu/log.h" #include "cpu.h" -#include "hw/hw.h" #include "hw/irq.h" #include "hw/sysbus.h" #include "hw/m68k/mcf.h" diff --git a/hw/m68k/next-cube.c b/hw/m68k/next-cube.c index 92b45d760f1..e0d4a94f9db 100644 --- a/hw/m68k/next-cube.c +++ b/hw/m68k/next-cube.c @@ -10,9 +10,7 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "exec/hwaddr.h" -#include "exec/address-spaces.h" #include "sysemu/sysemu.h" #include "sysemu/qtest.h" #include "hw/irq.h" @@ -986,8 +984,8 @@ static void next_cube_init(MachineState *machine) sysbus_mmio_map(SYS_BUS_DEVICE(pcdev), 1, 0x02100000); /* BMAP memory */ - memory_region_init_ram_shared_nomigrate(bmapm1, NULL, "next.bmapmem", 64, - true, &error_fatal); + memory_region_init_ram_flags_nomigrate(bmapm1, NULL, "next.bmapmem", 64, + RAM_SHARED, &error_fatal); memory_region_add_subregion(sysmem, 0x020c0000, bmapm1); /* The Rev_2.5_v66.bin firmware accesses it at 0x820c0020, too */ memory_region_init_alias(bmapm2, NULL, "next.bmapmem2", bmapm1, 0x0, 64); diff --git a/hw/m68k/next-kbd.c b/hw/m68k/next-kbd.c index c11b5281f19..0544160e916 100644 --- a/hw/m68k/next-kbd.c +++ b/hw/m68k/next-kbd.c @@ -29,12 +29,9 @@ #include "qemu/osdep.h" #include "qemu/log.h" -#include "exec/address-spaces.h" -#include "hw/hw.h" #include "hw/sysbus.h" #include "hw/m68k/next-cube.h" #include "ui/console.h" -#include "sysemu/sysemu.h" #include "migration/vmstate.h" #include "qom/object.h" diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c index 4d2e866eec7..e4c7c9b88ad 100644 --- a/hw/m68k/q800.c +++ b/hw/m68k/q800.c @@ -26,14 +26,12 @@ #include "qemu/datadir.h" #include "sysemu/sysemu.h" #include "cpu.h" -#include "hw/hw.h" #include "hw/boards.h" -#include "hw/irq.h" #include "hw/or-irq.h" +#include "hw/nmi.h" #include "elf.h" #include "hw/loader.h" #include "ui/console.h" -#include "exec/address-spaces.h" #include "hw/char/escc.h" #include "hw/sysbus.h" #include "hw/scsi/esp.h" @@ -70,18 +68,24 @@ #define ASC_BASE (IO_BASE + 0x14000) #define SWIM_BASE (IO_BASE + 0x1E000) -#define NUBUS_SUPER_SLOT_BASE 0x60000000 -#define NUBUS_SLOT_BASE 0xf0000000 +#define SONIC_PROM_SIZE 0x1000 /* * the video base, whereas it a Nubus address, * is needed by the kernel to have early display and * thus provided by the bootloader */ -#define VIDEO_BASE 0xf9001000 +#define VIDEO_BASE 0xf9000000 #define MAC_CLOCK 3686418 +/* + * Slot 0x9 is reserved for use by the in-built framebuffer whilst only + * slots 0xc, 0xd and 0xe physically exist on the Quadra 800 + */ +#define Q800_NUBUS_SLOTS_AVAILABLE (BIT(0x9) | BIT(0xc) | BIT(0xd) | \ + BIT(0xe)) + /* * The GLUE (General Logic Unit) is an Apple custom integrated circuit chip * that performs a variety of functions (RAM management, clock generation, ...). @@ -97,13 +101,110 @@ struct GLUEState { SysBusDevice parent_obj; M68kCPU *cpu; uint8_t ipr; + uint8_t auxmode; + qemu_irq irqs[1]; + QEMUTimer *nmi_release; }; +#define GLUE_IRQ_IN_VIA1 0 +#define GLUE_IRQ_IN_VIA2 1 +#define GLUE_IRQ_IN_SONIC 2 +#define GLUE_IRQ_IN_ESCC 3 +#define GLUE_IRQ_IN_NMI 4 + +#define GLUE_IRQ_NUBUS_9 0 + +/* + * The GLUE logic on the Quadra 800 supports 2 different IRQ routing modes + * controlled from the VIA1 auxmode GPIO (port B bit 6) which are documented + * in NetBSD as follows: + * + * A/UX mode (Linux, NetBSD, auxmode GPIO low) + * + * Level 0: Spurious: ignored + * Level 1: Software + * Level 2: VIA2 (except ethernet, sound) + * Level 3: Ethernet + * Level 4: Serial (SCC) + * Level 5: Sound + * Level 6: VIA1 + * Level 7: NMIs: parity errors, RESET button, YANCC error + * + * Classic mode (default: used by MacOS, A/UX 3.0.1, auxmode GPIO high) + * + * Level 0: Spurious: ignored + * Level 1: VIA1 (clock, ADB) + * Level 2: VIA2 (NuBus, SCSI) + * Level 3: + * Level 4: Serial (SCC) + * Level 5: + * Level 6: + * Level 7: Non-maskable: parity errors, RESET button + * + * Note that despite references to A/UX mode in Linux and NetBSD, at least + * A/UX 3.0.1 still uses Classic mode. + */ + static void GLUE_set_irq(void *opaque, int irq, int level) { GLUEState *s = opaque; int i; + if (s->auxmode) { + /* Classic mode */ + switch (irq) { + case GLUE_IRQ_IN_VIA1: + irq = 0; + break; + + case GLUE_IRQ_IN_VIA2: + irq = 1; + break; + + case GLUE_IRQ_IN_SONIC: + /* Route to VIA2 instead */ + qemu_set_irq(s->irqs[GLUE_IRQ_NUBUS_9], level); + return; + + case GLUE_IRQ_IN_ESCC: + irq = 3; + break; + + case GLUE_IRQ_IN_NMI: + irq = 6; + break; + + default: + g_assert_not_reached(); + } + } else { + /* A/UX mode */ + switch (irq) { + case GLUE_IRQ_IN_VIA1: + irq = 5; + break; + + case GLUE_IRQ_IN_VIA2: + irq = 1; + break; + + case GLUE_IRQ_IN_SONIC: + irq = 2; + break; + + case GLUE_IRQ_IN_ESCC: + irq = 3; + break; + + case GLUE_IRQ_IN_NMI: + irq = 6; + break; + + default: + g_assert_not_reached(); + } + } + if (level) { s->ipr |= 1 << irq; } else { @@ -119,11 +220,37 @@ static void GLUE_set_irq(void *opaque, int irq, int level) m68k_set_irq_level(s->cpu, 0, 0); } +static void glue_auxmode_set_irq(void *opaque, int irq, int level) +{ + GLUEState *s = GLUE(opaque); + + s->auxmode = level; +} + +static void glue_nmi(NMIState *n, int cpu_index, Error **errp) +{ + GLUEState *s = GLUE(n); + + /* Hold NMI active for 100ms */ + GLUE_set_irq(s, GLUE_IRQ_IN_NMI, 1); + timer_mod(s->nmi_release, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100); +} + +static void glue_nmi_release(void *opaque) +{ + GLUEState *s = GLUE(opaque); + + GLUE_set_irq(s, GLUE_IRQ_IN_NMI, 0); +} + static void glue_reset(DeviceState *dev) { GLUEState *s = GLUE(dev); s->ipr = 0; + s->auxmode = 0; + + timer_del(s->nmi_release); } static const VMStateDescription vmstate_glue = { @@ -132,6 +259,8 @@ static const VMStateDescription vmstate_glue = { .minimum_version_id = 0, .fields = (VMStateField[]) { VMSTATE_UINT8(ipr, GLUEState), + VMSTATE_UINT8(auxmode, GLUEState), + VMSTATE_TIMER_PTR(nmi_release, GLUEState), VMSTATE_END_OF_LIST(), }, }; @@ -147,20 +276,36 @@ static Property glue_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static void glue_finalize(Object *obj) +{ + GLUEState *s = GLUE(obj); + + timer_free(s->nmi_release); +} + static void glue_init(Object *obj) { DeviceState *dev = DEVICE(obj); + GLUEState *s = GLUE(dev); qdev_init_gpio_in(dev, GLUE_set_irq, 8); + qdev_init_gpio_in_named(dev, glue_auxmode_set_irq, "auxmode", 1); + + qdev_init_gpio_out(dev, s->irqs, 1); + + /* NMI release timer */ + s->nmi_release = timer_new_ms(QEMU_CLOCK_VIRTUAL, glue_nmi_release, s); } static void glue_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + NMIClass *nc = NMI_CLASS(klass); dc->vmsd = &vmstate_glue; dc->reset = glue_reset; device_class_set_props(dc, glue_properties); + nc->nmi_monitor_handler = glue_nmi; } static const TypeInfo glue_info = { @@ -168,7 +313,12 @@ static const TypeInfo glue_info = { .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(GLUEState), .instance_init = glue_init, + .instance_finalize = glue_finalize, .class_init = glue_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_NMI }, + { } + }, }; static void main_cpu_reset(void *opaque) @@ -214,8 +364,11 @@ static void q800_init(MachineState *machine) int32_t initrd_size; MemoryRegion *rom; MemoryRegion *io; + MemoryRegion *dp8393x_prom = g_new(MemoryRegion, 1); + uint8_t *prom; const int io_slice_nb = (IO_SIZE / IO_SLICE) - 1; - int i; + int i, checksum; + MacFbMode *macfb_mode; ram_addr_t ram_size = machine->ram_size; const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; @@ -224,7 +377,7 @@ static void q800_init(MachineState *machine) hwaddr parameters_base; CPUState *cs; DeviceState *dev; - DeviceState *via_dev; + DeviceState *via1_dev, *via2_dev; DeviceState *escc_orgate; SysBusESPState *sysbus_esp; ESPState *esp; @@ -269,28 +422,33 @@ static void q800_init(MachineState *machine) object_property_set_link(OBJECT(glue), "cpu", OBJECT(cpu), &error_abort); sysbus_realize_and_unref(SYS_BUS_DEVICE(glue), &error_fatal); - /* VIA */ - - via_dev = qdev_new(TYPE_MAC_VIA); + /* VIA 1 */ + via1_dev = qdev_new(TYPE_MOS6522_Q800_VIA1); dinfo = drive_get(IF_MTD, 0, 0); if (dinfo) { - qdev_prop_set_drive(via_dev, "drive", blk_by_legacy_dinfo(dinfo)); + qdev_prop_set_drive(via1_dev, "drive", blk_by_legacy_dinfo(dinfo)); } - sysbus = SYS_BUS_DEVICE(via_dev); + sysbus = SYS_BUS_DEVICE(via1_dev); sysbus_realize_and_unref(sysbus, &error_fatal); - sysbus_mmio_map(sysbus, 0, VIA_BASE); - qdev_connect_gpio_out_named(DEVICE(sysbus), "irq", 0, - qdev_get_gpio_in(glue, 0)); - qdev_connect_gpio_out_named(DEVICE(sysbus), "irq", 1, - qdev_get_gpio_in(glue, 1)); - + sysbus_mmio_map(sysbus, 1, VIA_BASE); + sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, GLUE_IRQ_IN_VIA1)); + /* A/UX mode */ + qdev_connect_gpio_out(via1_dev, 0, + qdev_get_gpio_in_named(glue, "auxmode", 0)); - adb_bus = qdev_get_child_bus(via_dev, "adb.0"); + adb_bus = qdev_get_child_bus(via1_dev, "adb.0"); dev = qdev_new(TYPE_ADB_KEYBOARD); qdev_realize_and_unref(dev, adb_bus, &error_fatal); dev = qdev_new(TYPE_ADB_MOUSE); qdev_realize_and_unref(dev, adb_bus, &error_fatal); + /* VIA 2 */ + via2_dev = qdev_new(TYPE_MOS6522_Q800_VIA2); + sysbus = SYS_BUS_DEVICE(via2_dev); + sysbus_realize_and_unref(sysbus, &error_fatal); + sysbus_mmio_map(sysbus, 1, VIA_BASE + VIA_SIZE); + sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, GLUE_IRQ_IN_VIA2)); + /* MACSONIC */ if (nb_nics > 1) { @@ -322,8 +480,21 @@ static void q800_init(MachineState *machine) sysbus = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(sysbus, &error_fatal); sysbus_mmio_map(sysbus, 0, SONIC_BASE); - sysbus_mmio_map(sysbus, 1, SONIC_PROM_BASE); - sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, 2)); + sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, GLUE_IRQ_IN_SONIC)); + + memory_region_init_rom(dp8393x_prom, NULL, "dp8393x-q800.prom", + SONIC_PROM_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), SONIC_PROM_BASE, + dp8393x_prom); + + /* Add MAC address with valid checksum to PROM */ + prom = memory_region_get_ram_ptr(dp8393x_prom); + checksum = 0; + for (i = 0; i < 6; i++) { + prom[i] = revbit8(nd_table[0].macaddr.a[i]); + checksum ^= prom[i]; + } + prom[7] = 0xff - checksum; /* SCC */ @@ -345,7 +516,8 @@ static void q800_init(MachineState *machine) qdev_realize_and_unref(escc_orgate, NULL, &error_fatal); sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(escc_orgate, 0)); sysbus_connect_irq(sysbus, 1, qdev_get_gpio_in(escc_orgate, 1)); - qdev_connect_gpio_out(DEVICE(escc_orgate), 0, qdev_get_gpio_in(glue, 3)); + qdev_connect_gpio_out(DEVICE(escc_orgate), 0, + qdev_get_gpio_in(glue, GLUE_IRQ_IN_ESCC)); sysbus_mmio_map(sysbus, 0, SCC_BASE); /* SCSI */ @@ -361,12 +533,10 @@ static void q800_init(MachineState *machine) sysbus = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(sysbus, &error_fatal); - sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in_named(via_dev, - "via2-irq", - VIA2_IRQ_SCSI_BIT)); - sysbus_connect_irq(sysbus, 1, - qdev_get_gpio_in_named(via_dev, "via2-irq", - VIA2_IRQ_SCSI_DATA_BIT)); + sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(via2_dev, + VIA2_IRQ_SCSI_BIT)); + sysbus_connect_irq(sysbus, 1, qdev_get_gpio_in(via2_dev, + VIA2_IRQ_SCSI_DATA_BIT)); sysbus_mmio_map(sysbus, 0, ESP_BASE); sysbus_mmio_map(sysbus, 1, ESP_PDMA); @@ -381,20 +551,48 @@ static void q800_init(MachineState *machine) /* NuBus */ dev = qdev_new(TYPE_MAC_NUBUS_BRIDGE); + qdev_prop_set_uint32(dev, "slot-available-mask", + Q800_NUBUS_SLOTS_AVAILABLE); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, NUBUS_SUPER_SLOT_BASE); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, NUBUS_SLOT_BASE); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, + MAC_NUBUS_FIRST_SLOT * NUBUS_SUPER_SLOT_SIZE); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, NUBUS_SLOT_BASE + + MAC_NUBUS_FIRST_SLOT * NUBUS_SLOT_SIZE); + qdev_connect_gpio_out(dev, 9, + qdev_get_gpio_in_named(via2_dev, "nubus-irq", + VIA2_NUBUS_IRQ_INTVIDEO)); + for (i = 1; i < VIA2_NUBUS_IRQ_NB; i++) { + qdev_connect_gpio_out(dev, 9 + i, + qdev_get_gpio_in_named(via2_dev, "nubus-irq", + VIA2_NUBUS_IRQ_9 + i)); + } + + /* + * Since the framebuffer in slot 0x9 uses a separate IRQ, wire the unused + * IRQ via GLUE for use by SONIC Ethernet in classic mode + */ + qdev_connect_gpio_out(glue, GLUE_IRQ_NUBUS_9, + qdev_get_gpio_in_named(via2_dev, "nubus-irq", + VIA2_NUBUS_IRQ_9)); - nubus = MAC_NUBUS_BRIDGE(dev)->bus; + nubus = &NUBUS_BRIDGE(dev)->bus; /* framebuffer in nubus slot #9 */ dev = qdev_new(TYPE_NUBUS_MACFB); + qdev_prop_set_uint32(dev, "slot", 9); qdev_prop_set_uint32(dev, "width", graphic_width); qdev_prop_set_uint32(dev, "height", graphic_height); qdev_prop_set_uint8(dev, "depth", graphic_depth); + if (graphic_width == 1152 && graphic_height == 870) { + qdev_prop_set_uint8(dev, "display", MACFB_DISPLAY_APPLE_21_COLOR); + } else { + qdev_prop_set_uint8(dev, "display", MACFB_DISPLAY_VGA); + } qdev_realize_and_unref(dev, BUS(nubus), &error_fatal); + macfb_mode = (NUBUS_MACFB(dev)->macfb).mode; + cs = CPU(cpu); if (linux_boot) { uint64_t high; @@ -417,12 +615,12 @@ static void q800_init(MachineState *machine) BOOTINFO1(cs->as, parameters_base, BI_MAC_MEMSIZE, ram_size >> 20); /* in MB */ BOOTINFO2(cs->as, parameters_base, BI_MEMCHUNK, 0, ram_size); - BOOTINFO1(cs->as, parameters_base, BI_MAC_VADDR, VIDEO_BASE); + BOOTINFO1(cs->as, parameters_base, BI_MAC_VADDR, + VIDEO_BASE + macfb_mode->offset); BOOTINFO1(cs->as, parameters_base, BI_MAC_VDEPTH, graphic_depth); BOOTINFO1(cs->as, parameters_base, BI_MAC_VDIM, (graphic_height << 16) | graphic_width); - BOOTINFO1(cs->as, parameters_base, BI_MAC_VROW, - (graphic_width * graphic_depth + 7) / 8); + BOOTINFO1(cs->as, parameters_base, BI_MAC_VROW, macfb_mode->stride); BOOTINFO1(cs->as, parameters_base, BI_MAC_SCCBASE, SCC_BASE); rom = g_malloc(sizeof(*rom)); diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index e9a5d4c69b9..0efa4a45c7f 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * QEMU Vitual M68K Machine * @@ -12,14 +12,11 @@ #include "qemu-common.h" #include "sysemu/sysemu.h" #include "cpu.h" -#include "hw/hw.h" #include "hw/boards.h" -#include "hw/irq.h" #include "hw/qdev-properties.h" #include "elf.h" #include "hw/loader.h" #include "ui/console.h" -#include "exec/address-spaces.h" #include "hw/sysbus.h" #include "standard-headers/asm-m68k/bootinfo.h" #include "standard-headers/asm-m68k/bootinfo-virt.h" @@ -307,7 +304,21 @@ type_init(virt_machine_register_types) } \ type_init(machvirt_machine_##major##_##minor##_init); +static void virt_machine_6_2_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE(6, 2, true) + +static void virt_machine_6_1_options(MachineClass *mc) +{ + virt_machine_6_2_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len); +} +DEFINE_VIRT_MACHINE(6, 1, false) + static void virt_machine_6_0_options(MachineClass *mc) { + virt_machine_6_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); } -DEFINE_VIRT_MACHINE(6, 0, true) +DEFINE_VIRT_MACHINE(6, 0, false) diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig index a0ef2cf648e..03dbb3c7df5 100644 --- a/hw/mem/Kconfig +++ b/hw/mem/Kconfig @@ -7,6 +7,7 @@ config MEM_DEVICE config NVDIMM bool - default y - depends on (PC || PSERIES || ARM_VIRT) select MEM_DEVICE + +config SPARSE_MEM + bool diff --git a/hw/mem/meson.build b/hw/mem/meson.build index ef79e046787..82f86d117e6 100644 --- a/hw/mem/meson.build +++ b/hw/mem/meson.build @@ -1,8 +1,9 @@ mem_ss = ss.source_set() mem_ss.add(files('memory-device.c')) -mem_ss.add(when: 'CONFIG_FUZZ', if_true: files('sparse-mem.c')) mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) + +softmmu_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c')) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 12b655eda8b..48b913aba67 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -34,6 +34,16 @@ static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); +static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp) +{ + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set"); + return NULL; + } + + return host_memory_backend_get_memory(dimm->hostmem); +} + void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine, const uint64_t *legacy_align, Error **errp) { @@ -66,9 +76,8 @@ void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine, void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine) { - PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm, - &error_abort); + MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm, + &error_abort); memory_device_plug(MEMORY_DEVICE(dimm), machine); vmstate_register_ram(vmstate_mr, DEVICE(dimm)); @@ -76,9 +85,8 @@ void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine) void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine) { - PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm, - &error_abort); + MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm, + &error_abort); memory_device_unplug(MEMORY_DEVICE(dimm), machine); vmstate_unregister_ram(vmstate_mr, DEVICE(dimm)); @@ -173,7 +181,21 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MachineState *ms = MACHINE(qdev_get_machine()); - int nb_numa_nodes = ms->numa_state->num_nodes; + + if (ms->numa_state) { + int nb_numa_nodes = ms->numa_state->num_nodes; + + if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || + (!nb_numa_nodes && dimm->node)) { + error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" + PRIu32 "' which exceeds the number of numa nodes: %d", + dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); + return; + } + } else if (dimm->node > 0) { + error_setg(errp, "machine doesn't support NUMA"); + return; + } if (!dimm->hostmem) { error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); @@ -183,13 +205,6 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) object_get_canonical_path_component(OBJECT(dimm->hostmem))); return; } - if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || - (!nb_numa_nodes && dimm->node)) { - error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" - PRIu32 "' which exceeds the number of numa nodes: %d", - dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); - return; - } if (ddc->realize) { ddc->realize(dimm, errp); @@ -205,16 +220,6 @@ static void pc_dimm_unrealize(DeviceState *dev) host_memory_backend_set_mapped(dimm->hostmem, false); } -static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp) -{ - if (!dimm->hostmem) { - error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set"); - return NULL; - } - - return host_memory_backend_get_memory(dimm->hostmem); -} - static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md) { return object_property_get_uint(OBJECT(md), PC_DIMM_ADDR_PROP, @@ -266,7 +271,6 @@ static void pc_dimm_md_fill_device_info(const MemoryDeviceState *md, static void pc_dimm_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); - PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc); MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc); dc->realize = pc_dimm_realize; @@ -274,8 +278,6 @@ static void pc_dimm_class_init(ObjectClass *oc, void *data) device_class_set_props(dc, pc_dimm_properties); dc->desc = "DIMM memory module"; - ddc->get_vmstate_memory_region = pc_dimm_get_memory_region; - mdc->get_addr = pc_dimm_md_get_addr; mdc->set_addr = pc_dimm_md_set_addr; /* for a dimm plugged_size == region_size */ diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c index a13ac74dd9f..e6640eb8e72 100644 --- a/hw/mem/sparse-mem.c +++ b/hw/mem/sparse-mem.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" -#include "exec/address-spaces.h" #include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "qapi/error.h" diff --git a/hw/mem/trace-events b/hw/mem/trace-events index 9f6b52acd7e..8b6b02b5bf2 100644 --- a/hw/mem/trace-events +++ b/hw/mem/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # pc-dimm.c mhp_pc_dimm_assigned_slot(int slot) "%d" diff --git a/hw/meson.build b/hw/meson.build index 8ba79b1a528..b3366c888ef 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -21,6 +21,7 @@ subdir('mem') subdir('misc') subdir('net') subdir('nubus') +subdir('nvme') subdir('nvram') subdir('pci') subdir('pci-bridge') @@ -30,6 +31,7 @@ subdir('rdma') subdir('rtc') subdir('scsi') subdir('sd') +subdir('sensor') subdir('smbios') subdir('ssi') subdir('timer') @@ -47,11 +49,9 @@ subdir('avr') subdir('cris') subdir('hppa') subdir('i386') -subdir('lm32') subdir('m68k') subdir('microblaze') subdir('mips') -subdir('moxie') subdir('nios2') subdir('openrisc') subdir('ppc') @@ -63,5 +63,4 @@ subdir('sh4') subdir('sparc') subdir('sparc64') subdir('tricore') -subdir('unicore32') subdir('xtensa') diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c index caaba1aa4c1..8821d009f1a 100644 --- a/hw/microblaze/boot.c +++ b/hw/microblaze/boot.c @@ -33,7 +33,6 @@ #include "qemu/error-report.h" #include "sysemu/device_tree.h" #include "sysemu/reset.h" -#include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/loader.h" #include "elf.h" diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig index aadd436bf4e..b4c5549ce84 100644 --- a/hw/mips/Kconfig +++ b/hw/mips/Kconfig @@ -20,7 +20,7 @@ config JAZZ select G364FB select DP8393X select ESP - select FDC + select FDC_SYSBUS select MC146818RTC select PCKBD select SERIAL @@ -47,9 +47,15 @@ config LOONGSON3V config MIPS_CPS bool select PTIMER + select MIPS_ITU config MIPS_BOSTON bool + select FITLOADER + select MIPS_CPS + select PCI_EXPRESS_XILINX + select AHCI_ICH9 + select SERIAL config FW_CFG_MIPS bool diff --git a/hw/mips/bootloader.c b/hw/mips/bootloader.c index 6ec83144909..99991f8b2b5 100644 --- a/hw/mips/bootloader.c +++ b/hw/mips/bootloader.c @@ -182,7 +182,11 @@ void bl_gen_write_ulong(uint32_t **p, target_ulong addr, target_ulong val) { bl_gen_load_ulong(p, BL_REG_K0, val); bl_gen_load_ulong(p, BL_REG_K1, addr); - bl_gen_sd(p, BL_REG_K0, BL_REG_K1, 0x0); + if (bootcpu_supports_isa(ISA_MIPS3)) { + bl_gen_sd(p, BL_REG_K0, BL_REG_K1, 0x0); + } else { + bl_gen_sw(p, BL_REG_K0, BL_REG_K1, 0x0); + } } void bl_gen_write_u32(uint32_t **p, target_ulong addr, uint32_t val) diff --git a/hw/mips/boston.c b/hw/mips/boston.c index ac2e93a05aa..59ca08b93a9 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" -#include "exec/address-spaces.h" +#include "elf.h" #include "hw/boards.h" #include "hw/char/serial.h" #include "hw/ide/pci.h" @@ -49,6 +49,15 @@ typedef struct BostonState BostonState; DECLARE_INSTANCE_CHECKER(BostonState, BOSTON, TYPE_BOSTON) +#define FDT_IRQ_TYPE_NONE 0 +#define FDT_IRQ_TYPE_LEVEL_HIGH 4 +#define FDT_GIC_SHARED 0 +#define FDT_GIC_LOCAL 1 +#define FDT_BOSTON_CLK_SYS 1 +#define FDT_BOSTON_CLK_CPU 2 +#define FDT_PCI_IRQ_MAP_PINS 4 +#define FDT_PCI_IRQ_MAP_DESCS 6 + struct BostonState { SysBusDevice parent_obj; @@ -65,6 +74,44 @@ struct BostonState { hwaddr fdt_base; }; +enum { + BOSTON_LOWDDR, + BOSTON_PCIE0, + BOSTON_PCIE1, + BOSTON_PCIE2, + BOSTON_PCIE2_MMIO, + BOSTON_CM, + BOSTON_GIC, + BOSTON_CDMM, + BOSTON_CPC, + BOSTON_PLATREG, + BOSTON_UART, + BOSTON_LCD, + BOSTON_FLASH, + BOSTON_PCIE1_MMIO, + BOSTON_PCIE0_MMIO, + BOSTON_HIGHDDR, +}; + +static const MemMapEntry boston_memmap[] = { + [BOSTON_LOWDDR] = { 0x0, 0x10000000 }, + [BOSTON_PCIE0] = { 0x10000000, 0x2000000 }, + [BOSTON_PCIE1] = { 0x12000000, 0x2000000 }, + [BOSTON_PCIE2] = { 0x14000000, 0x2000000 }, + [BOSTON_PCIE2_MMIO] = { 0x16000000, 0x100000 }, + [BOSTON_CM] = { 0x16100000, 0x20000 }, + [BOSTON_GIC] = { 0x16120000, 0x20000 }, + [BOSTON_CDMM] = { 0x16140000, 0x8000 }, + [BOSTON_CPC] = { 0x16200000, 0x8000 }, + [BOSTON_PLATREG] = { 0x17ffd000, 0x1000 }, + [BOSTON_UART] = { 0x17ffe000, 0x20 }, + [BOSTON_LCD] = { 0x17fff000, 0x8 }, + [BOSTON_FLASH] = { 0x18000000, 0x8000000 }, + [BOSTON_PCIE1_MMIO] = { 0x20000000, 0x20000000 }, + [BOSTON_PCIE0_MMIO] = { 0x40000000, 0x40000000 }, + [BOSTON_HIGHDDR] = { 0x80000000, 0x0 }, +}; + enum boston_plat_reg { PLAT_FPGA_BUILD = 0x00, PLAT_CORE_CL = 0x04, @@ -276,24 +323,24 @@ type_init(boston_register_types) static void gen_firmware(uint32_t *p, hwaddr kernel_entry, hwaddr fdt_addr) { - const uint32_t cm_base = 0x16100000; - const uint32_t gic_base = 0x16120000; - const uint32_t cpc_base = 0x16200000; + uint64_t regaddr; /* Move CM GCRs */ - bl_gen_write_ulong(&p, - cpu_mips_phys_to_kseg1(NULL, GCR_BASE_ADDR + GCR_BASE_OFS), - cm_base); + regaddr = cpu_mips_phys_to_kseg1(NULL, GCR_BASE_ADDR + GCR_BASE_OFS), + bl_gen_write_ulong(&p, regaddr, + boston_memmap[BOSTON_CM].base); /* Move & enable GIC GCRs */ - bl_gen_write_ulong(&p, - cpu_mips_phys_to_kseg1(NULL, cm_base + GCR_GIC_BASE_OFS), - gic_base | GCR_GIC_BASE_GICEN_MSK); + regaddr = cpu_mips_phys_to_kseg1(NULL, boston_memmap[BOSTON_CM].base + + GCR_GIC_BASE_OFS), + bl_gen_write_ulong(&p, regaddr, + boston_memmap[BOSTON_GIC].base | GCR_GIC_BASE_GICEN_MSK); /* Move & enable CPC GCRs */ - bl_gen_write_ulong(&p, - cpu_mips_phys_to_kseg1(NULL, cm_base + GCR_CPC_BASE_OFS), - cpc_base | GCR_CPC_BASE_CPCEN_MSK); + regaddr = cpu_mips_phys_to_kseg1(NULL, boston_memmap[BOSTON_CM].base + + GCR_CPC_BASE_OFS), + bl_gen_write_ulong(&p, regaddr, + boston_memmap[BOSTON_CPC].base | GCR_CPC_BASE_CPCEN_MSK); /* * Setup argument registers to follow the UHI boot protocol: @@ -334,8 +381,9 @@ static const void *boston_fdt_filter(void *opaque, const void *fdt_orig, ram_low_sz = MIN(256 * MiB, machine->ram_size); ram_high_sz = machine->ram_size - ram_low_sz; qemu_fdt_setprop_sized_cells(fdt, "/memory@0", "reg", - 1, 0x00000000, 1, ram_low_sz, - 1, 0x90000000, 1, ram_high_sz); + 1, boston_memmap[BOSTON_LOWDDR].base, 1, ram_low_sz, + 1, boston_memmap[BOSTON_HIGHDDR].base + ram_low_sz, + 1, ram_high_sz); fdt = g_realloc(fdt, fdt_totalsize(fdt)); qemu_fdt_dumpdtb(fdt, fdt_sz); @@ -398,6 +446,222 @@ xilinx_pcie_init(MemoryRegion *sys_mem, uint32_t bus_nr, return XILINX_PCIE_HOST(dev); } + +static void fdt_create_pcie(void *fdt, int gic_ph, int irq, hwaddr reg_base, + hwaddr reg_size, hwaddr mmio_base, hwaddr mmio_size) +{ + int i; + char *name, *intc_name; + uint32_t intc_ph; + uint32_t interrupt_map[FDT_PCI_IRQ_MAP_PINS][FDT_PCI_IRQ_MAP_DESCS]; + + intc_ph = qemu_fdt_alloc_phandle(fdt); + name = g_strdup_printf("/soc/pci@%" HWADDR_PRIx, reg_base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", + "xlnx,axi-pcie-host-1.00.a"); + qemu_fdt_setprop_string(fdt, name, "device_type", "pci"); + qemu_fdt_setprop_cells(fdt, name, "reg", reg_base, reg_size); + + qemu_fdt_setprop_cell(fdt, name, "#address-cells", 3); + qemu_fdt_setprop_cell(fdt, name, "#size-cells", 2); + qemu_fdt_setprop_cell(fdt, name, "#interrupt-cells", 1); + + qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", gic_ph); + qemu_fdt_setprop_cells(fdt, name, "interrupts", FDT_GIC_SHARED, irq, + FDT_IRQ_TYPE_LEVEL_HIGH); + + qemu_fdt_setprop_cells(fdt, name, "ranges", 0x02000000, 0, mmio_base, + mmio_base, 0, mmio_size); + qemu_fdt_setprop_cells(fdt, name, "bus-range", 0x00, 0xff); + + + + intc_name = g_strdup_printf("%s/interrupt-controller", name); + qemu_fdt_add_subnode(fdt, intc_name); + qemu_fdt_setprop(fdt, intc_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(fdt, intc_name, "#address-cells", 0); + qemu_fdt_setprop_cell(fdt, intc_name, "#interrupt-cells", 1); + qemu_fdt_setprop_cell(fdt, intc_name, "phandle", intc_ph); + + qemu_fdt_setprop_cells(fdt, name, "interrupt-map-mask", 0, 0, 0, 7); + for (i = 0; i < FDT_PCI_IRQ_MAP_PINS; i++) { + uint32_t *irqmap = interrupt_map[i]; + + irqmap[0] = cpu_to_be32(0); + irqmap[1] = cpu_to_be32(0); + irqmap[2] = cpu_to_be32(0); + irqmap[3] = cpu_to_be32(i + 1); + irqmap[4] = cpu_to_be32(intc_ph); + irqmap[5] = cpu_to_be32(i + 1); + } + qemu_fdt_setprop(fdt, name, "interrupt-map", + &interrupt_map, sizeof(interrupt_map)); + + g_free(intc_name); + g_free(name); +} + +static const void *create_fdt(BostonState *s, + const MemMapEntry *memmap, int *dt_size) +{ + void *fdt; + int cpu; + MachineState *mc = s->mach; + uint32_t platreg_ph, gic_ph, clk_ph; + char *name, *gic_name, *platreg_name, *stdout_name; + static const char * const syscon_compat[2] = { + "img,boston-platform-regs", "syscon" + }; + + fdt = create_device_tree(dt_size); + if (!fdt) { + error_report("create_device_tree() failed"); + exit(1); + } + + platreg_ph = qemu_fdt_alloc_phandle(fdt); + gic_ph = qemu_fdt_alloc_phandle(fdt); + clk_ph = qemu_fdt_alloc_phandle(fdt); + + qemu_fdt_setprop_string(fdt, "/", "model", "img,boston"); + qemu_fdt_setprop_string(fdt, "/", "compatible", "img,boston"); + qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x1); + qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x1); + + + qemu_fdt_add_subnode(fdt, "/cpus"); + qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); + qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); + + for (cpu = 0; cpu < mc->smp.cpus; cpu++) { + name = g_strdup_printf("/cpus/cpu@%d", cpu); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "img,mips"); + qemu_fdt_setprop_string(fdt, name, "status", "okay"); + qemu_fdt_setprop_cell(fdt, name, "reg", cpu); + qemu_fdt_setprop_string(fdt, name, "device_type", "cpu"); + qemu_fdt_setprop_cells(fdt, name, "clocks", clk_ph, FDT_BOSTON_CLK_CPU); + g_free(name); + } + + qemu_fdt_add_subnode(fdt, "/soc"); + qemu_fdt_setprop(fdt, "/soc", "ranges", NULL, 0); + qemu_fdt_setprop_string(fdt, "/soc", "compatible", "simple-bus"); + qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x1); + qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x1); + + fdt_create_pcie(fdt, gic_ph, 2, + memmap[BOSTON_PCIE0].base, memmap[BOSTON_PCIE0].size, + memmap[BOSTON_PCIE0_MMIO].base, memmap[BOSTON_PCIE0_MMIO].size); + + fdt_create_pcie(fdt, gic_ph, 1, + memmap[BOSTON_PCIE1].base, memmap[BOSTON_PCIE1].size, + memmap[BOSTON_PCIE1_MMIO].base, memmap[BOSTON_PCIE1_MMIO].size); + + fdt_create_pcie(fdt, gic_ph, 0, + memmap[BOSTON_PCIE2].base, memmap[BOSTON_PCIE2].size, + memmap[BOSTON_PCIE2_MMIO].base, memmap[BOSTON_PCIE2_MMIO].size); + + /* GIC with it's timer node */ + gic_name = g_strdup_printf("/soc/interrupt-controller@%" HWADDR_PRIx, + memmap[BOSTON_GIC].base); + qemu_fdt_add_subnode(fdt, gic_name); + qemu_fdt_setprop_string(fdt, gic_name, "compatible", "mti,gic"); + qemu_fdt_setprop_cells(fdt, gic_name, "reg", memmap[BOSTON_GIC].base, + memmap[BOSTON_GIC].size); + qemu_fdt_setprop(fdt, gic_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(fdt, gic_name, "#interrupt-cells", 3); + qemu_fdt_setprop_cell(fdt, gic_name, "phandle", gic_ph); + + name = g_strdup_printf("%s/timer", gic_name); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "mti,gic-timer"); + qemu_fdt_setprop_cells(fdt, name, "interrupts", FDT_GIC_LOCAL, 1, + FDT_IRQ_TYPE_NONE); + qemu_fdt_setprop_cells(fdt, name, "clocks", clk_ph, FDT_BOSTON_CLK_CPU); + g_free(name); + g_free(gic_name); + + /* CDMM node */ + name = g_strdup_printf("/soc/cdmm@%" HWADDR_PRIx, memmap[BOSTON_CDMM].base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "mti,mips-cdmm"); + qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_CDMM].base, + memmap[BOSTON_CDMM].size); + g_free(name); + + /* CPC node */ + name = g_strdup_printf("/soc/cpc@%" HWADDR_PRIx, memmap[BOSTON_CPC].base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "mti,mips-cpc"); + qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_CPC].base, + memmap[BOSTON_CPC].size); + g_free(name); + + /* platreg and it's clk node */ + platreg_name = g_strdup_printf("/soc/system-controller@%" HWADDR_PRIx, + memmap[BOSTON_PLATREG].base); + qemu_fdt_add_subnode(fdt, platreg_name); + qemu_fdt_setprop_string_array(fdt, platreg_name, "compatible", + (char **)&syscon_compat, + ARRAY_SIZE(syscon_compat)); + qemu_fdt_setprop_cells(fdt, platreg_name, "reg", + memmap[BOSTON_PLATREG].base, + memmap[BOSTON_PLATREG].size); + qemu_fdt_setprop_cell(fdt, platreg_name, "phandle", platreg_ph); + + name = g_strdup_printf("%s/clock", platreg_name); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "img,boston-clock"); + qemu_fdt_setprop_cell(fdt, name, "#clock-cells", 1); + qemu_fdt_setprop_cell(fdt, name, "phandle", clk_ph); + g_free(name); + g_free(platreg_name); + + /* reboot node */ + name = g_strdup_printf("/soc/reboot"); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-reboot"); + qemu_fdt_setprop_cell(fdt, name, "regmap", platreg_ph); + qemu_fdt_setprop_cell(fdt, name, "offset", 0x10); + qemu_fdt_setprop_cell(fdt, name, "mask", 0x10); + g_free(name); + + /* uart node */ + name = g_strdup_printf("/soc/uart@%" HWADDR_PRIx, memmap[BOSTON_UART].base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "ns16550a"); + qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_UART].base, + memmap[BOSTON_UART].size); + qemu_fdt_setprop_cell(fdt, name, "reg-shift", 0x2); + qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", gic_ph); + qemu_fdt_setprop_cells(fdt, name, "interrupts", FDT_GIC_SHARED, 3, + FDT_IRQ_TYPE_LEVEL_HIGH); + qemu_fdt_setprop_cells(fdt, name, "clocks", clk_ph, FDT_BOSTON_CLK_SYS); + + qemu_fdt_add_subnode(fdt, "/chosen"); + stdout_name = g_strdup_printf("%s:115200", name); + qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", stdout_name); + g_free(stdout_name); + g_free(name); + + /* lcd node */ + name = g_strdup_printf("/soc/lcd@%" HWADDR_PRIx, memmap[BOSTON_LCD].base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "img,boston-lcd"); + qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_LCD].base, + memmap[BOSTON_LCD].size); + g_free(name); + + name = g_strdup_printf("/memory@0"); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "device_type", "memory"); + g_free(name); + + return fdt; +} + static void boston_mach_init(MachineState *machine) { DeviceState *dev; @@ -439,11 +703,15 @@ static void boston_mach_init(MachineState *machine) sysbus_mmio_map_overlap(SYS_BUS_DEVICE(&s->cps), 0, 0, 1); flash = g_new(MemoryRegion, 1); - memory_region_init_rom(flash, NULL, "boston.flash", 128 * MiB, - &error_fatal); - memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0); + memory_region_init_rom(flash, NULL, "boston.flash", + boston_memmap[BOSTON_FLASH].size, &error_fatal); + memory_region_add_subregion_overlap(sys_mem, + boston_memmap[BOSTON_FLASH].base, + flash, 0); - memory_region_add_subregion_overlap(sys_mem, 0x80000000, machine->ram, 0); + memory_region_add_subregion_overlap(sys_mem, + boston_memmap[BOSTON_HIGHDDR].base, + machine->ram, 0); ddr_low_alias = g_new(MemoryRegion, 1); memory_region_init_alias(ddr_low_alias, NULL, "boston_low.ddr", @@ -452,32 +720,41 @@ static void boston_mach_init(MachineState *machine) memory_region_add_subregion_overlap(sys_mem, 0, ddr_low_alias, 0); xilinx_pcie_init(sys_mem, 0, - 0x10000000, 32 * MiB, - 0x40000000, 1 * GiB, + boston_memmap[BOSTON_PCIE0].base, + boston_memmap[BOSTON_PCIE0].size, + boston_memmap[BOSTON_PCIE0_MMIO].base, + boston_memmap[BOSTON_PCIE0_MMIO].size, get_cps_irq(&s->cps, 2), false); xilinx_pcie_init(sys_mem, 1, - 0x12000000, 32 * MiB, - 0x20000000, 512 * MiB, + boston_memmap[BOSTON_PCIE1].base, + boston_memmap[BOSTON_PCIE1].size, + boston_memmap[BOSTON_PCIE1_MMIO].base, + boston_memmap[BOSTON_PCIE1_MMIO].size, get_cps_irq(&s->cps, 1), false); pcie2 = xilinx_pcie_init(sys_mem, 2, - 0x14000000, 32 * MiB, - 0x16000000, 1 * MiB, + boston_memmap[BOSTON_PCIE2].base, + boston_memmap[BOSTON_PCIE2].size, + boston_memmap[BOSTON_PCIE2_MMIO].base, + boston_memmap[BOSTON_PCIE2_MMIO].size, get_cps_irq(&s->cps, 0), true); platreg = g_new(MemoryRegion, 1); memory_region_init_io(platreg, NULL, &boston_platreg_ops, s, - "boston-platregs", 0x1000); - memory_region_add_subregion_overlap(sys_mem, 0x17ffd000, platreg, 0); + "boston-platregs", + boston_memmap[BOSTON_PLATREG].size); + memory_region_add_subregion_overlap(sys_mem, + boston_memmap[BOSTON_PLATREG].base, platreg, 0); - s->uart = serial_mm_init(sys_mem, 0x17ffe000, 2, + s->uart = serial_mm_init(sys_mem, boston_memmap[BOSTON_UART].base, 2, get_cps_irq(&s->cps, 3), 10000000, serial_hd(0), DEVICE_NATIVE_ENDIAN); lcd = g_new(MemoryRegion, 1); memory_region_init_io(lcd, NULL, &boston_lcd_ops, s, "boston-lcd", 0x8); - memory_region_add_subregion_overlap(sys_mem, 0x17fff000, lcd, 0); + memory_region_add_subregion_overlap(sys_mem, + boston_memmap[BOSTON_LCD].base, lcd, 0); chr = qemu_chr_new("lcd", "vc:320x240", NULL); qemu_chr_fe_init(&s->lcd_display, chr, NULL); @@ -500,10 +777,40 @@ static void boston_mach_init(MachineState *machine) exit(1); } } else if (machine->kernel_filename) { - fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s); - if (fit_err) { - error_report("unable to load FIT image"); - exit(1); + uint64_t kernel_entry, kernel_high; + ssize_t kernel_size; + + kernel_size = load_elf(machine->kernel_filename, NULL, + cpu_mips_kseg0_to_phys, NULL, + &kernel_entry, NULL, &kernel_high, + NULL, 0, EM_MIPS, 1, 0); + + if (kernel_size > 0) { + int dt_size; + g_autofree const void *dtb_file_data, *dtb_load_data; + hwaddr dtb_paddr = QEMU_ALIGN_UP(kernel_high, 64 * KiB); + hwaddr dtb_vaddr = cpu_mips_phys_to_kseg0(NULL, dtb_paddr); + + s->kernel_entry = kernel_entry; + if (machine->dtb) { + dtb_file_data = load_device_tree(machine->dtb, &dt_size); + } else { + dtb_file_data = create_fdt(s, boston_memmap, &dt_size); + } + + dtb_load_data = boston_fdt_filter(s, dtb_file_data, + NULL, &dtb_vaddr); + + /* Calculate real fdt size after filter */ + dt_size = fdt_totalsize(dtb_load_data); + rom_add_blob_fixed("dtb", dtb_load_data, dt_size, dtb_paddr); + } else { + /* Try to load file as FIT */ + fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s); + if (fit_err) { + error_report("unable to load kernel image"); + exit(1); + } } gen_firmware(memory_region_get_ram_ptr(flash) + 0x7c00000, diff --git a/hw/mips/fuloong2e.c b/hw/mips/fuloong2e.c index 4f61f2c873b..c1b8066a13b 100644 --- a/hw/mips/fuloong2e.c +++ b/hw/mips/fuloong2e.c @@ -33,13 +33,11 @@ #include "hw/mips/bootloader.h" #include "hw/mips/cpudevs.h" #include "hw/pci/pci.h" -#include "qemu/log.h" #include "hw/loader.h" #include "hw/ide/pci.h" #include "hw/qdev-properties.h" #include "elf.h" #include "hw/isa/vt82c686.h" -#include "exec/address-spaces.h" #include "sysemu/qtest.h" #include "sysemu/reset.h" #include "sysemu/sysemu.h" diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c index 43349d6837d..c7480bd0196 100644 --- a/hw/mips/gt64xxx_pci.c +++ b/hw/mips/gt64xxx_pci.c @@ -33,7 +33,6 @@ #include "migration/vmstate.h" #include "hw/intc/i8259.h" #include "hw/irq.h" -#include "exec/address-spaces.h" #include "trace.h" #include "qom/object.h" diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c index 1a0888a0fd5..f5a26e174d5 100644 --- a/hw/mips/jazz.c +++ b/hw/mips/jazz.c @@ -35,7 +35,6 @@ #include "hw/isa/isa.h" #include "hw/block/fdc.h" #include "sysemu/sysemu.h" -#include "sysemu/arch_init.h" #include "hw/boards.h" #include "net/net.h" #include "hw/scsi/esp.h" @@ -47,7 +46,6 @@ #include "hw/audio/pcspk.h" #include "hw/input/i8042.h" #include "hw/sysbus.h" -#include "exec/address-spaces.h" #include "sysemu/qtest.h" #include "sysemu/reset.h" #include "qapi/error.h" @@ -120,39 +118,17 @@ static const MemoryRegionOps dma_dummy_ops = { #define MAGNUM_BIOS_SIZE \ (BIOS_SIZE < MAGNUM_BIOS_SIZE_MAX ? BIOS_SIZE : MAGNUM_BIOS_SIZE_MAX) -#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) -static void (*real_do_transaction_failed)(CPUState *cpu, hwaddr physaddr, - vaddr addr, unsigned size, - MMUAccessType access_type, - int mmu_idx, MemTxAttrs attrs, - MemTxResult response, - uintptr_t retaddr); - -static void mips_jazz_do_transaction_failed(CPUState *cs, hwaddr physaddr, - vaddr addr, unsigned size, - MMUAccessType access_type, - int mmu_idx, MemTxAttrs attrs, - MemTxResult response, - uintptr_t retaddr) -{ - if (access_type != MMU_INST_FETCH) { - /* ignore invalid access (ie do not raise exception) */ - return; - } - (*real_do_transaction_failed)(cs, physaddr, addr, size, access_type, - mmu_idx, attrs, response, retaddr); -} -#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ +#define SONIC_PROM_SIZE 0x1000 static void mips_jazz_init(MachineState *machine, enum jazz_model_e jazz_model) { MemoryRegion *address_space = get_system_memory(); char *filename; - int bios_size, n; + int bios_size, n, big_endian; Clock *cpuclk; MIPSCPU *cpu; - CPUClass *cc; + MIPSCPUClass *mcc; CPUMIPSState *env; qemu_irq *i8259; rc4030_dma *dmas; @@ -162,6 +138,7 @@ static void mips_jazz_init(MachineState *machine, MemoryRegion *rtc = g_new(MemoryRegion, 1); MemoryRegion *i8042 = g_new(MemoryRegion, 1); MemoryRegion *dma_dummy = g_new(MemoryRegion, 1); + MemoryRegion *dp8393x_prom = g_new(MemoryRegion, 1); NICInfo *nd; DeviceState *dev, *rc4030; SysBusDevice *sysbus; @@ -180,6 +157,12 @@ static void mips_jazz_init(MachineState *machine, [JAZZ_PICA61] = {33333333, 4}, }; +#ifdef TARGET_WORDS_BIGENDIAN + big_endian = 1; +#else + big_endian = 0; +#endif + if (machine->ram_size > 256 * MiB) { error_report("RAM size more than 256Mb is not supported"); exit(EXIT_FAILURE); @@ -199,8 +182,6 @@ static void mips_jazz_init(MachineState *machine, * However, we can't simply add a global memory region to catch * everything, as this would make all accesses including instruction * accesses be ignored and not raise exceptions. - * So instead we hijack the do_transaction_failed method on the CPU, and - * do not raise exceptions for data access. * * NOTE: this behaviour of raising exceptions for bad instruction * fetches but not bad data accesses was added in commit 54e755588cf1e9 @@ -210,11 +191,8 @@ static void mips_jazz_init(MachineState *machine, * we could replace this hijacking of CPU methods with a simple global * memory region that catches all memory accesses, as we do on Malta. */ - cc = CPU_GET_CLASS(cpu); -#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) - real_do_transaction_failed = cc->tcg_ops->do_transaction_failed; - cc->tcg_ops->do_transaction_failed = mips_jazz_do_transaction_failed; -#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ + mcc = MIPS_CPU_GET_CLASS(cpu); + mcc->no_data_aborts = true; /* allocate RAM */ memory_region_add_subregion(address_space, 0, machine->ram); @@ -258,6 +236,10 @@ static void mips_jazz_init(MachineState *machine, NULL, "dummy_dma", 0x1000); memory_region_add_subregion(address_space, 0x8000d000, dma_dummy); + memory_region_init_rom(dp8393x_prom, NULL, "dp8393x-jazz.prom", + SONIC_PROM_SIZE, &error_fatal); + memory_region_add_subregion(address_space, 0x8000b000, dp8393x_prom); + /* ISA bus: IO space at 0x90000000, mem space at 0x91000000 */ memory_region_init(isa_io, NULL, "isa-io", 0x00010000); memory_region_init(isa_mem, NULL, "isa-mem", 0x01000000); @@ -305,18 +287,33 @@ static void mips_jazz_init(MachineState *machine, nd->model = g_strdup("dp83932"); } if (strcmp(nd->model, "dp83932") == 0) { + int checksum, i; + uint8_t *prom; + qemu_check_nic_model(nd, "dp83932"); dev = qdev_new("dp8393x"); qdev_set_nic_properties(dev, nd); qdev_prop_set_uint8(dev, "it_shift", 2); + qdev_prop_set_bit(dev, "big_endian", big_endian > 0); object_property_set_link(OBJECT(dev), "dma_mr", OBJECT(rc4030_dma_mr), &error_abort); sysbus = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(sysbus, &error_fatal); sysbus_mmio_map(sysbus, 0, 0x80001000); - sysbus_mmio_map(sysbus, 1, 0x8000b000); sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 4)); + + /* Add MAC address with valid checksum to PROM */ + prom = memory_region_get_ram_ptr(dp8393x_prom); + checksum = 0; + for (i = 0; i < 6; i++) { + prom[i] = nd->macaddr.a[i]; + checksum += prom[i]; + if (checksum > 0xff) { + checksum = (checksum + 1) & 0xff; + } + } + prom[7] = 0xff - checksum; break; } else if (is_help_option(nd->model)) { error_report("Supported NICs: dp83932"); @@ -363,16 +360,12 @@ static void mips_jazz_init(MachineState *machine, memory_region_add_subregion(address_space, 0x80005000, i8042); /* Serial ports */ - if (serial_hd(0)) { - serial_mm_init(address_space, 0x80006000, 0, - qdev_get_gpio_in(rc4030, 8), 8000000 / 16, - serial_hd(0), DEVICE_NATIVE_ENDIAN); - } - if (serial_hd(1)) { - serial_mm_init(address_space, 0x80007000, 0, - qdev_get_gpio_in(rc4030, 9), 8000000 / 16, - serial_hd(1), DEVICE_NATIVE_ENDIAN); - } + serial_mm_init(address_space, 0x80006000, 0, + qdev_get_gpio_in(rc4030, 8), 8000000 / 16, + serial_hd(0), DEVICE_NATIVE_ENDIAN); + serial_mm_init(address_space, 0x80007000, 0, + qdev_get_gpio_in(rc4030, 9), 8000000 / 16, + serial_hd(1), DEVICE_NATIVE_ENDIAN); /* Parallel port */ if (parallel_hds[0]) diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c index b15071defc6..ae192db0c8b 100644 --- a/hw/mips/loongson3_virt.c +++ b/hw/mips/loongson3_virt.c @@ -29,10 +29,8 @@ #include "qemu/cutils.h" #include "qemu/datadir.h" #include "qapi/error.h" -#include "cpu.h" #include "elf.h" #include "kvm_mips.h" -#include "hw/boards.h" #include "hw/char/serial.h" #include "hw/intc/loongson_liointc.h" #include "hw/mips/mips.h" @@ -49,12 +47,10 @@ #include "hw/pci-host/gpex.h" #include "hw/usb.h" #include "net/net.h" -#include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "sysemu/qtest.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" -#include "qemu/log.h" #include "qemu/error-report.h" #define PM_CNTL_MODE 0x10 diff --git a/hw/mips/malta.c b/hw/mips/malta.c index fba13dda5c9..981d4a8cb93 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -27,7 +27,6 @@ #include "qemu/bitops.h" #include "qemu-common.h" #include "qemu/datadir.h" -#include "cpu.h" #include "hw/clock.h" #include "hw/southbridge/piix.h" #include "hw/isa/superio.h" @@ -39,15 +38,12 @@ #include "hw/mips/mips.h" #include "hw/mips/cpudevs.h" #include "hw/pci/pci.h" -#include "sysemu/sysemu.h" -#include "sysemu/arch_init.h" #include "qemu/log.h" #include "hw/mips/bios.h" #include "hw/ide.h" #include "hw/irq.h" #include "hw/loader.h" #include "elf.h" -#include "exec/address-spaces.h" #include "qom/object.h" #include "hw/sysbus.h" /* SysBusDevice */ #include "qemu/host-utils.h" diff --git a/hw/mips/meson.build b/hw/mips/meson.build index 1195716dc73..dd0101ad4d8 100644 --- a/hw/mips/meson.build +++ b/hw/mips/meson.build @@ -1,12 +1,15 @@ mips_ss = ss.source_set() mips_ss.add(files('bootloader.c', 'mips_int.c')) mips_ss.add(when: 'CONFIG_FW_CFG_MIPS', if_true: files('fw_cfg.c')) -mips_ss.add(when: 'CONFIG_FULOONG', if_true: files('fuloong2e.c')) mips_ss.add(when: 'CONFIG_LOONGSON3V', if_true: files('loongson3_bootp.c', 'loongson3_virt.c')) -mips_ss.add(when: 'CONFIG_JAZZ', if_true: files('jazz.c')) mips_ss.add(when: 'CONFIG_MALTA', if_true: files('gt64xxx_pci.c', 'malta.c')) +mips_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('cps.c')) + +if 'CONFIG_TCG' in config_all +mips_ss.add(when: 'CONFIG_JAZZ', if_true: files('jazz.c')) mips_ss.add(when: 'CONFIG_MIPSSIM', if_true: files('mipssim.c')) +mips_ss.add(when: 'CONFIG_FULOONG', if_true: files('fuloong2e.c')) mips_ss.add(when: 'CONFIG_MIPS_BOSTON', if_true: [files('boston.c'), fdt]) -mips_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('cps.c')) +endif hw_arch += {'mips': mips_ss} diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c index 0f9c6f07c1c..2db5e10fe0b 100644 --- a/hw/mips/mips_int.c +++ b/hw/mips/mips_int.c @@ -24,7 +24,6 @@ #include "qemu/main-loop.h" #include "hw/irq.h" #include "hw/mips/cpudevs.h" -#include "cpu.h" #include "sysemu/kvm.h" #include "kvm_mips.h" diff --git a/hw/mips/mipssim.c b/hw/mips/mipssim.c index f9212826a9b..a9525a1d351 100644 --- a/hw/mips/mipssim.c +++ b/hw/mips/mipssim.c @@ -29,7 +29,6 @@ #include "qapi/error.h" #include "qemu-common.h" #include "qemu/datadir.h" -#include "cpu.h" #include "hw/clock.h" #include "hw/mips/mips.h" #include "hw/mips/cpudevs.h" @@ -43,7 +42,6 @@ #include "elf.h" #include "hw/sysbus.h" #include "hw/qdev-properties.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "sysemu/qtest.h" #include "sysemu/reset.h" diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig index c71ed258204..507058d8bff 100644 --- a/hw/misc/Kconfig +++ b/hw/misc/Kconfig @@ -11,21 +11,6 @@ config ARMSSE_MHU config ARMSSE_CPU_PWRCTRL bool -config MAX111X - bool - -config TMP105 - bool - depends on I2C - -config TMP421 - bool - depends on I2C - -config EMC141X - bool - depends on I2C - config ISA_DEBUG bool depends on ISA_BUS diff --git a/hw/misc/armv7m_ras.c b/hw/misc/armv7m_ras.c new file mode 100644 index 00000000000..de24922c944 --- /dev/null +++ b/hw/misc/armv7m_ras.c @@ -0,0 +1,93 @@ +/* + * Arm M-profile RAS (Reliability, Availability and Serviceability) block + * + * Copyright (c) 2021 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "hw/misc/armv7m_ras.h" +#include "qemu/log.h" + +static MemTxResult ras_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + if (attrs.user) { + return MEMTX_ERROR; + } + + switch (addr) { + case 0xe10: /* ERRIIDR */ + /* architect field = Arm; product/variant/revision 0 */ + *data = 0x43b; + break; + case 0xfc8: /* ERRDEVID */ + /* Minimal RAS: we implement 0 error record indexes */ + *data = 0; + break; + default: + qemu_log_mask(LOG_UNIMP, "Read RAS register offset 0x%x\n", + (uint32_t)addr); + *data = 0; + break; + } + return MEMTX_OK; +} + +static MemTxResult ras_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + if (attrs.user) { + return MEMTX_ERROR; + } + + switch (addr) { + default: + qemu_log_mask(LOG_UNIMP, "Write to RAS register offset 0x%x\n", + (uint32_t)addr); + break; + } + return MEMTX_OK; +} + +static const MemoryRegionOps ras_ops = { + .read_with_attrs = ras_read, + .write_with_attrs = ras_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + + +static void armv7m_ras_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + ARMv7MRAS *s = ARMV7M_RAS(obj); + + memory_region_init_io(&s->iomem, obj, &ras_ops, + s, "armv7m-ras", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); +} + +static void armv7m_ras_class_init(ObjectClass *klass, void *data) +{ + /* This device has no state: no need for vmstate or reset */ +} + +static const TypeInfo armv7m_ras_info = { + .name = TYPE_ARMV7M_RAS, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ARMv7MRAS), + .instance_init = armv7m_ras_init, + .class_init = armv7m_ras_class_init, +}; + +static void armv7m_ras_register_types(void) +{ + type_register_static(&armv7m_ras_info); +} + +type_init(armv7m_ras_register_types); diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c new file mode 100644 index 00000000000..10f00e65f4e --- /dev/null +++ b/hw/misc/aspeed_hace.c @@ -0,0 +1,389 @@ +/* + * ASPEED Hash and Crypto Engine + * + * Copyright (C) 2021 IBM Corp. + * + * Joel Stanley + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "hw/misc/aspeed_hace.h" +#include "qapi/error.h" +#include "migration/vmstate.h" +#include "crypto/hash.h" +#include "hw/qdev-properties.h" +#include "hw/irq.h" + +#define R_CRYPT_CMD (0x10 / 4) + +#define R_STATUS (0x1c / 4) +#define HASH_IRQ BIT(9) +#define CRYPT_IRQ BIT(12) +#define TAG_IRQ BIT(15) + +#define R_HASH_SRC (0x20 / 4) +#define R_HASH_DEST (0x24 / 4) +#define R_HASH_SRC_LEN (0x2c / 4) + +#define R_HASH_CMD (0x30 / 4) +/* Hash algorithm selection */ +#define HASH_ALGO_MASK (BIT(4) | BIT(5) | BIT(6)) +#define HASH_ALGO_MD5 0 +#define HASH_ALGO_SHA1 BIT(5) +#define HASH_ALGO_SHA224 BIT(6) +#define HASH_ALGO_SHA256 (BIT(4) | BIT(6)) +#define HASH_ALGO_SHA512_SERIES (BIT(5) | BIT(6)) +/* SHA512 algorithm selection */ +#define SHA512_HASH_ALGO_MASK (BIT(10) | BIT(11) | BIT(12)) +#define HASH_ALGO_SHA512_SHA512 0 +#define HASH_ALGO_SHA512_SHA384 BIT(10) +#define HASH_ALGO_SHA512_SHA256 BIT(11) +#define HASH_ALGO_SHA512_SHA224 (BIT(10) | BIT(11)) +/* HMAC modes */ +#define HASH_HMAC_MASK (BIT(7) | BIT(8)) +#define HASH_DIGEST 0 +#define HASH_DIGEST_HMAC BIT(7) +#define HASH_DIGEST_ACCUM BIT(8) +#define HASH_HMAC_KEY (BIT(7) | BIT(8)) +/* Cascaded operation modes */ +#define HASH_ONLY 0 +#define HASH_ONLY2 BIT(0) +#define HASH_CRYPT_THEN_HASH BIT(1) +#define HASH_HASH_THEN_CRYPT (BIT(0) | BIT(1)) +/* Other cmd bits */ +#define HASH_IRQ_EN BIT(9) +#define HASH_SG_EN BIT(18) +/* Scatter-gather data list */ +#define SG_LIST_LEN_SIZE 4 +#define SG_LIST_LEN_MASK 0x0FFFFFFF +#define SG_LIST_LEN_LAST BIT(31) +#define SG_LIST_ADDR_SIZE 4 +#define SG_LIST_ADDR_MASK 0x7FFFFFFF +#define SG_LIST_ENTRY_SIZE (SG_LIST_LEN_SIZE + SG_LIST_ADDR_SIZE) +#define ASPEED_HACE_MAX_SG 256 /* max number of entries */ + +static const struct { + uint32_t mask; + QCryptoHashAlgorithm algo; +} hash_algo_map[] = { + { HASH_ALGO_MD5, QCRYPTO_HASH_ALG_MD5 }, + { HASH_ALGO_SHA1, QCRYPTO_HASH_ALG_SHA1 }, + { HASH_ALGO_SHA224, QCRYPTO_HASH_ALG_SHA224 }, + { HASH_ALGO_SHA256, QCRYPTO_HASH_ALG_SHA256 }, + { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA512, QCRYPTO_HASH_ALG_SHA512 }, + { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA384, QCRYPTO_HASH_ALG_SHA384 }, + { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA256, QCRYPTO_HASH_ALG_SHA256 }, +}; + +static int hash_algo_lookup(uint32_t reg) +{ + int i; + + reg &= HASH_ALGO_MASK | SHA512_HASH_ALGO_MASK; + + for (i = 0; i < ARRAY_SIZE(hash_algo_map); i++) { + if (reg == hash_algo_map[i].mask) { + return hash_algo_map[i].algo; + } + } + + return -1; +} + +static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode) +{ + struct iovec iov[ASPEED_HACE_MAX_SG]; + g_autofree uint8_t *digest_buf; + size_t digest_len = 0; + int i; + + if (sg_mode) { + uint32_t len = 0; + + for (i = 0; !(len & SG_LIST_LEN_LAST); i++) { + uint32_t addr, src; + hwaddr plen; + + if (i == ASPEED_HACE_MAX_SG) { + qemu_log_mask(LOG_GUEST_ERROR, + "aspeed_hace: guest failed to set end of sg list marker\n"); + break; + } + + src = s->regs[R_HASH_SRC] + (i * SG_LIST_ENTRY_SIZE); + + len = address_space_ldl_le(&s->dram_as, src, + MEMTXATTRS_UNSPECIFIED, NULL); + + addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE, + MEMTXATTRS_UNSPECIFIED, NULL); + addr &= SG_LIST_ADDR_MASK; + + iov[i].iov_len = len & SG_LIST_LEN_MASK; + plen = iov[i].iov_len; + iov[i].iov_base = address_space_map(&s->dram_as, addr, &plen, false, + MEMTXATTRS_UNSPECIFIED); + } + } else { + hwaddr len = s->regs[R_HASH_SRC_LEN]; + + iov[0].iov_len = len; + iov[0].iov_base = address_space_map(&s->dram_as, s->regs[R_HASH_SRC], + &len, false, + MEMTXATTRS_UNSPECIFIED); + i = 1; + } + + if (qcrypto_hash_bytesv(algo, iov, i, &digest_buf, &digest_len, NULL) < 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n", __func__); + return; + } + + if (address_space_write(&s->dram_as, s->regs[R_HASH_DEST], + MEMTXATTRS_UNSPECIFIED, + digest_buf, digest_len)) { + qemu_log_mask(LOG_GUEST_ERROR, + "aspeed_hace: address space write failed\n"); + } + + for (; i > 0; i--) { + address_space_unmap(&s->dram_as, iov[i - 1].iov_base, + iov[i - 1].iov_len, false, + iov[i - 1].iov_len); + } + + /* + * Set status bits to indicate completion. Testing shows hardware sets + * these irrespective of HASH_IRQ_EN. + */ + s->regs[R_STATUS] |= HASH_IRQ; +} + +static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size) +{ + AspeedHACEState *s = ASPEED_HACE(opaque); + + addr >>= 2; + + if (addr >= ASPEED_HACE_NR_REGS) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n", + __func__, addr << 2); + return 0; + } + + return s->regs[addr]; +} + +static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + AspeedHACEState *s = ASPEED_HACE(opaque); + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); + + addr >>= 2; + + if (addr >= ASPEED_HACE_NR_REGS) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n", + __func__, addr << 2); + return; + } + + switch (addr) { + case R_STATUS: + if (data & HASH_IRQ) { + data &= ~HASH_IRQ; + + if (s->regs[addr] & HASH_IRQ) { + qemu_irq_lower(s->irq); + } + } + break; + case R_HASH_SRC: + data &= ahc->src_mask; + break; + case R_HASH_DEST: + data &= ahc->dest_mask; + break; + case R_HASH_SRC_LEN: + data &= 0x0FFFFFFF; + break; + case R_HASH_CMD: { + int algo; + data &= ahc->hash_mask; + + if ((data & HASH_HMAC_MASK)) { + qemu_log_mask(LOG_UNIMP, + "%s: HMAC engine command mode %"PRIx64" not implemented", + __func__, (data & HASH_HMAC_MASK) >> 8); + } + if (data & BIT(1)) { + qemu_log_mask(LOG_UNIMP, + "%s: Cascaded mode not implemented", + __func__); + } + algo = hash_algo_lookup(data); + if (algo < 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid hash algorithm selection 0x%"PRIx64"\n", + __func__, data & ahc->hash_mask); + break; + } + do_hash_operation(s, algo, data & HASH_SG_EN); + + if (data & HASH_IRQ_EN) { + qemu_irq_raise(s->irq); + } + break; + } + case R_CRYPT_CMD: + qemu_log_mask(LOG_UNIMP, "%s: Crypt commands not implemented\n", + __func__); + break; + default: + break; + } + + s->regs[addr] = data; +} + +static const MemoryRegionOps aspeed_hace_ops = { + .read = aspeed_hace_read, + .write = aspeed_hace_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +static void aspeed_hace_reset(DeviceState *dev) +{ + struct AspeedHACEState *s = ASPEED_HACE(dev); + + memset(s->regs, 0, sizeof(s->regs)); +} + +static void aspeed_hace_realize(DeviceState *dev, Error **errp) +{ + AspeedHACEState *s = ASPEED_HACE(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + + sysbus_init_irq(sbd, &s->irq); + + memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_hace_ops, s, + TYPE_ASPEED_HACE, 0x1000); + + if (!s->dram_mr) { + error_setg(errp, TYPE_ASPEED_HACE ": 'dram' link not set"); + return; + } + + address_space_init(&s->dram_as, s->dram_mr, "dram"); + + sysbus_init_mmio(sbd, &s->iomem); +} + +static Property aspeed_hace_properties[] = { + DEFINE_PROP_LINK("dram", AspeedHACEState, dram_mr, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_END_OF_LIST(), +}; + + +static const VMStateDescription vmstate_aspeed_hace = { + .name = TYPE_ASPEED_HACE, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, AspeedHACEState, ASPEED_HACE_NR_REGS), + VMSTATE_END_OF_LIST(), + } +}; + +static void aspeed_hace_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = aspeed_hace_realize; + dc->reset = aspeed_hace_reset; + device_class_set_props(dc, aspeed_hace_properties); + dc->vmsd = &vmstate_aspeed_hace; +} + +static const TypeInfo aspeed_hace_info = { + .name = TYPE_ASPEED_HACE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AspeedHACEState), + .class_init = aspeed_hace_class_init, + .class_size = sizeof(AspeedHACEClass) +}; + +static void aspeed_ast2400_hace_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedHACEClass *ahc = ASPEED_HACE_CLASS(klass); + + dc->desc = "AST2400 Hash and Crypto Engine"; + + ahc->src_mask = 0x0FFFFFFF; + ahc->dest_mask = 0x0FFFFFF8; + ahc->hash_mask = 0x000003ff; /* No SG or SHA512 modes */ +} + +static const TypeInfo aspeed_ast2400_hace_info = { + .name = TYPE_ASPEED_AST2400_HACE, + .parent = TYPE_ASPEED_HACE, + .class_init = aspeed_ast2400_hace_class_init, +}; + +static void aspeed_ast2500_hace_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedHACEClass *ahc = ASPEED_HACE_CLASS(klass); + + dc->desc = "AST2500 Hash and Crypto Engine"; + + ahc->src_mask = 0x3fffffff; + ahc->dest_mask = 0x3ffffff8; + ahc->hash_mask = 0x000003ff; /* No SG or SHA512 modes */ +} + +static const TypeInfo aspeed_ast2500_hace_info = { + .name = TYPE_ASPEED_AST2500_HACE, + .parent = TYPE_ASPEED_HACE, + .class_init = aspeed_ast2500_hace_class_init, +}; + +static void aspeed_ast2600_hace_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedHACEClass *ahc = ASPEED_HACE_CLASS(klass); + + dc->desc = "AST2600 Hash and Crypto Engine"; + + ahc->src_mask = 0x7FFFFFFF; + ahc->dest_mask = 0x7FFFFFF8; + ahc->hash_mask = 0x00147FFF; +} + +static const TypeInfo aspeed_ast2600_hace_info = { + .name = TYPE_ASPEED_AST2600_HACE, + .parent = TYPE_ASPEED_HACE, + .class_init = aspeed_ast2600_hace_class_init, +}; + +static void aspeed_hace_register_types(void) +{ + type_register_static(&aspeed_ast2400_hace_info); + type_register_static(&aspeed_ast2500_hace_info); + type_register_static(&aspeed_ast2600_hace_info); + type_register_static(&aspeed_hace_info); +} + +type_init(aspeed_hace_register_types); diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c index 40a38ebd854..d06e179a6e6 100644 --- a/hw/misc/aspeed_scu.c +++ b/hw/misc/aspeed_scu.c @@ -101,14 +101,26 @@ #define AST2600_CLK_STOP_CTRL_CLR TO_REG(0x84) #define AST2600_CLK_STOP_CTRL2 TO_REG(0x90) #define AST2600_CLK_STOP_CTRL2_CLR TO_REG(0x94) +#define AST2600_DEBUG_CTRL TO_REG(0xC8) +#define AST2600_DEBUG_CTRL2 TO_REG(0xD8) #define AST2600_SDRAM_HANDSHAKE TO_REG(0x100) #define AST2600_HPLL_PARAM TO_REG(0x200) #define AST2600_HPLL_EXT TO_REG(0x204) +#define AST2600_APLL_PARAM TO_REG(0x210) +#define AST2600_APLL_EXT TO_REG(0x214) +#define AST2600_MPLL_PARAM TO_REG(0x220) #define AST2600_MPLL_EXT TO_REG(0x224) +#define AST2600_EPLL_PARAM TO_REG(0x240) #define AST2600_EPLL_EXT TO_REG(0x244) +#define AST2600_DPLL_PARAM TO_REG(0x260) +#define AST2600_DPLL_EXT TO_REG(0x264) #define AST2600_CLK_SEL TO_REG(0x300) #define AST2600_CLK_SEL2 TO_REG(0x304) -#define AST2600_CLK_SEL3 TO_REG(0x310) +#define AST2600_CLK_SEL3 TO_REG(0x308) +#define AST2600_CLK_SEL4 TO_REG(0x310) +#define AST2600_CLK_SEL5 TO_REG(0x314) +#define AST2600_UARTCLK TO_REG(0x338) +#define AST2600_HUARTCLK TO_REG(0x33C) #define AST2600_HW_STRAP1 TO_REG(0x500) #define AST2600_HW_STRAP1_CLR TO_REG(0x504) #define AST2600_HW_STRAP1_PROT TO_REG(0x508) @@ -433,6 +445,8 @@ static uint32_t aspeed_silicon_revs[] = { AST2500_A1_SILICON_REV, AST2600_A0_SILICON_REV, AST2600_A1_SILICON_REV, + AST2600_A2_SILICON_REV, + AST2600_A3_SILICON_REV, }; bool is_supported_silicon_rev(uint32_t silicon_rev) @@ -651,16 +665,28 @@ static const MemoryRegionOps aspeed_ast2600_scu_ops = { .valid.unaligned = false, }; -static const uint32_t ast2600_a1_resets[ASPEED_AST2600_SCU_NR_REGS] = { +static const uint32_t ast2600_a3_resets[ASPEED_AST2600_SCU_NR_REGS] = { [AST2600_SYS_RST_CTRL] = 0xF7C3FED8, - [AST2600_SYS_RST_CTRL2] = 0xFFFFFFFC, + [AST2600_SYS_RST_CTRL2] = 0x0DFFFFFC, [AST2600_CLK_STOP_CTRL] = 0xFFFF7F8A, [AST2600_CLK_STOP_CTRL2] = 0xFFF0FFF0, + [AST2600_DEBUG_CTRL] = 0x00000FFF, + [AST2600_DEBUG_CTRL2] = 0x000000FF, [AST2600_SDRAM_HANDSHAKE] = 0x00000000, - [AST2600_HPLL_PARAM] = 0x1000405F, + [AST2600_HPLL_PARAM] = 0x1000408F, + [AST2600_APLL_PARAM] = 0x1000405F, + [AST2600_MPLL_PARAM] = 0x1008405F, + [AST2600_EPLL_PARAM] = 0x1004077F, + [AST2600_DPLL_PARAM] = 0x1078405F, + [AST2600_CLK_SEL] = 0xF3940000, + [AST2600_CLK_SEL2] = 0x00700000, + [AST2600_CLK_SEL3] = 0x00000000, + [AST2600_CLK_SEL4] = 0xF3F40000, + [AST2600_CLK_SEL5] = 0x30000000, + [AST2600_UARTCLK] = 0x00014506, + [AST2600_HUARTCLK] = 0x000145C0, [AST2600_CHIP_ID0] = 0x1234ABCD, [AST2600_CHIP_ID1] = 0x88884444, - }; static void aspeed_ast2600_scu_reset(DeviceState *dev) @@ -675,7 +701,7 @@ static void aspeed_ast2600_scu_reset(DeviceState *dev) * of actual revision. QEMU and Linux only support A1 onwards so this is * sufficient. */ - s->regs[AST2600_SILICON_REV] = AST2600_A1_SILICON_REV; + s->regs[AST2600_SILICON_REV] = AST2600_A3_SILICON_REV; s->regs[AST2600_SILICON_REV2] = s->silicon_rev; s->regs[AST2600_HW_STRAP1] = s->hw_strap1; s->regs[AST2600_HW_STRAP2] = s->hw_strap2; @@ -689,7 +715,7 @@ static void aspeed_2600_scu_class_init(ObjectClass *klass, void *data) dc->desc = "ASPEED 2600 System Control Unit"; dc->reset = aspeed_ast2600_scu_reset; - asc->resets = ast2600_a1_resets; + asc->resets = ast2600_a3_resets; asc->calc_hpll = aspeed_2500_scu_calc_hpll; /* No change since AST2500 */ asc->apb_divider = 4; asc->nr_regs = ASPEED_AST2600_SCU_NR_REGS; diff --git a/hw/misc/aspeed_xdma.c b/hw/misc/aspeed_xdma.c index 533d237e3ce..1c21577c98c 100644 --- a/hw/misc/aspeed_xdma.c +++ b/hw/misc/aspeed_xdma.c @@ -30,6 +30,19 @@ #define XDMA_IRQ_ENG_STAT_US_COMP BIT(4) #define XDMA_IRQ_ENG_STAT_DS_COMP BIT(5) #define XDMA_IRQ_ENG_STAT_RESET 0xF8000000 + +#define XDMA_AST2600_BMC_CMDQ_ADDR 0x14 +#define XDMA_AST2600_BMC_CMDQ_ENDP 0x18 +#define XDMA_AST2600_BMC_CMDQ_WRP 0x1c +#define XDMA_AST2600_BMC_CMDQ_RDP 0x20 +#define XDMA_AST2600_IRQ_CTRL 0x38 +#define XDMA_AST2600_IRQ_CTRL_US_COMP BIT(16) +#define XDMA_AST2600_IRQ_CTRL_DS_COMP BIT(17) +#define XDMA_AST2600_IRQ_CTRL_W_MASK 0x017003FF +#define XDMA_AST2600_IRQ_STATUS 0x3c +#define XDMA_AST2600_IRQ_STATUS_US_COMP BIT(16) +#define XDMA_AST2600_IRQ_STATUS_DS_COMP BIT(17) + #define XDMA_MEM_SIZE 0x1000 #define TO_REG(addr) ((addr) / sizeof(uint32_t)) @@ -52,56 +65,48 @@ static void aspeed_xdma_write(void *opaque, hwaddr addr, uint64_t val, unsigned int idx; uint32_t val32 = (uint32_t)val; AspeedXDMAState *xdma = opaque; + AspeedXDMAClass *axc = ASPEED_XDMA_GET_CLASS(xdma); if (addr >= ASPEED_XDMA_REG_SIZE) { return; } - switch (addr) { - case XDMA_BMC_CMDQ_ENDP: + if (addr == axc->cmdq_endp) { xdma->regs[TO_REG(addr)] = val32 & XDMA_BMC_CMDQ_W_MASK; - break; - case XDMA_BMC_CMDQ_WRP: + } else if (addr == axc->cmdq_wrp) { idx = TO_REG(addr); xdma->regs[idx] = val32 & XDMA_BMC_CMDQ_W_MASK; - xdma->regs[TO_REG(XDMA_BMC_CMDQ_RDP)] = xdma->regs[idx]; + xdma->regs[TO_REG(axc->cmdq_rdp)] = xdma->regs[idx]; trace_aspeed_xdma_write(addr, val); if (xdma->bmc_cmdq_readp_set) { xdma->bmc_cmdq_readp_set = 0; } else { - xdma->regs[TO_REG(XDMA_IRQ_ENG_STAT)] |= - XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP; + xdma->regs[TO_REG(axc->intr_status)] |= axc->intr_complete; - if (xdma->regs[TO_REG(XDMA_IRQ_ENG_CTRL)] & - (XDMA_IRQ_ENG_CTRL_US_COMP | XDMA_IRQ_ENG_CTRL_DS_COMP)) + if (xdma->regs[TO_REG(axc->intr_ctrl)] & axc->intr_complete) { qemu_irq_raise(xdma->irq); + } } - break; - case XDMA_BMC_CMDQ_RDP: + } else if (addr == axc->cmdq_rdp) { trace_aspeed_xdma_write(addr, val); if (val32 == XDMA_BMC_CMDQ_RDP_MAGIC) { xdma->bmc_cmdq_readp_set = 1; } - break; - case XDMA_IRQ_ENG_CTRL: - xdma->regs[TO_REG(addr)] = val32 & XDMA_IRQ_ENG_CTRL_W_MASK; - break; - case XDMA_IRQ_ENG_STAT: + } else if (addr == axc->intr_ctrl) { + xdma->regs[TO_REG(addr)] = val32 & axc->intr_ctrl_mask; + } else if (addr == axc->intr_status) { trace_aspeed_xdma_write(addr, val); idx = TO_REG(addr); - if (val32 & (XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP)) { - xdma->regs[idx] &= - ~(XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP); + if (val32 & axc->intr_complete) { + xdma->regs[idx] &= ~axc->intr_complete; qemu_irq_lower(xdma->irq); } - break; - default: + } else { xdma->regs[TO_REG(addr)] = val32; - break; } } @@ -127,10 +132,11 @@ static void aspeed_xdma_realize(DeviceState *dev, Error **errp) static void aspeed_xdma_reset(DeviceState *dev) { AspeedXDMAState *xdma = ASPEED_XDMA(dev); + AspeedXDMAClass *axc = ASPEED_XDMA_GET_CLASS(xdma); xdma->bmc_cmdq_readp_set = 0; memset(xdma->regs, 0, ASPEED_XDMA_REG_SIZE); - xdma->regs[TO_REG(XDMA_IRQ_ENG_STAT)] = XDMA_IRQ_ENG_STAT_RESET; + xdma->regs[TO_REG(axc->intr_status)] = XDMA_IRQ_ENG_STAT_RESET; qemu_irq_lower(xdma->irq); } @@ -144,6 +150,73 @@ static const VMStateDescription aspeed_xdma_vmstate = { }, }; +static void aspeed_2600_xdma_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedXDMAClass *axc = ASPEED_XDMA_CLASS(klass); + + dc->desc = "ASPEED 2600 XDMA Controller"; + + axc->cmdq_endp = XDMA_AST2600_BMC_CMDQ_ENDP; + axc->cmdq_wrp = XDMA_AST2600_BMC_CMDQ_WRP; + axc->cmdq_rdp = XDMA_AST2600_BMC_CMDQ_RDP; + axc->intr_ctrl = XDMA_AST2600_IRQ_CTRL; + axc->intr_ctrl_mask = XDMA_AST2600_IRQ_CTRL_W_MASK; + axc->intr_status = XDMA_AST2600_IRQ_STATUS; + axc->intr_complete = XDMA_AST2600_IRQ_STATUS_US_COMP | + XDMA_AST2600_IRQ_STATUS_DS_COMP; +} + +static const TypeInfo aspeed_2600_xdma_info = { + .name = TYPE_ASPEED_2600_XDMA, + .parent = TYPE_ASPEED_XDMA, + .class_init = aspeed_2600_xdma_class_init, +}; + +static void aspeed_2500_xdma_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedXDMAClass *axc = ASPEED_XDMA_CLASS(klass); + + dc->desc = "ASPEED 2500 XDMA Controller"; + + axc->cmdq_endp = XDMA_BMC_CMDQ_ENDP; + axc->cmdq_wrp = XDMA_BMC_CMDQ_WRP; + axc->cmdq_rdp = XDMA_BMC_CMDQ_RDP; + axc->intr_ctrl = XDMA_IRQ_ENG_CTRL; + axc->intr_ctrl_mask = XDMA_IRQ_ENG_CTRL_W_MASK; + axc->intr_status = XDMA_IRQ_ENG_STAT; + axc->intr_complete = XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP; +}; + +static const TypeInfo aspeed_2500_xdma_info = { + .name = TYPE_ASPEED_2500_XDMA, + .parent = TYPE_ASPEED_XDMA, + .class_init = aspeed_2500_xdma_class_init, +}; + +static void aspeed_2400_xdma_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedXDMAClass *axc = ASPEED_XDMA_CLASS(klass); + + dc->desc = "ASPEED 2400 XDMA Controller"; + + axc->cmdq_endp = XDMA_BMC_CMDQ_ENDP; + axc->cmdq_wrp = XDMA_BMC_CMDQ_WRP; + axc->cmdq_rdp = XDMA_BMC_CMDQ_RDP; + axc->intr_ctrl = XDMA_IRQ_ENG_CTRL; + axc->intr_ctrl_mask = XDMA_IRQ_ENG_CTRL_W_MASK; + axc->intr_status = XDMA_IRQ_ENG_STAT; + axc->intr_complete = XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP; +}; + +static const TypeInfo aspeed_2400_xdma_info = { + .name = TYPE_ASPEED_2400_XDMA, + .parent = TYPE_ASPEED_XDMA, + .class_init = aspeed_2400_xdma_class_init, +}; + static void aspeed_xdma_class_init(ObjectClass *classp, void *data) { DeviceClass *dc = DEVICE_CLASS(classp); @@ -158,10 +231,15 @@ static const TypeInfo aspeed_xdma_info = { .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(AspeedXDMAState), .class_init = aspeed_xdma_class_init, + .class_size = sizeof(AspeedXDMAClass), + .abstract = true, }; static void aspeed_xdma_register_type(void) { type_register_static(&aspeed_xdma_info); + type_register_static(&aspeed_2400_xdma_info); + type_register_static(&aspeed_2500_xdma_info); + type_register_static(&aspeed_2600_xdma_info); } type_init(aspeed_xdma_register_type); diff --git a/hw/misc/auxbus.c b/hw/misc/auxbus.c index 6c099ae2a2d..8a8012f5f08 100644 --- a/hw/misc/auxbus.c +++ b/hw/misc/auxbus.c @@ -65,7 +65,7 @@ AUXBus *aux_bus_init(DeviceState *parent, const char *name) AUXBus *bus; Object *auxtoi2c; - bus = AUX_BUS(qbus_create(TYPE_AUX_BUS, parent, name)); + bus = AUX_BUS(qbus_new(TYPE_AUX_BUS, parent, name)); auxtoi2c = object_new_with_props(TYPE_AUXTOI2C, OBJECT(bus), "i2c", &error_abort, NULL); @@ -106,7 +106,6 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address, AUXReply ret = AUX_NACK; I2CBus *i2c_bus = aux_get_i2c_bus(bus); size_t i; - bool is_write = false; DPRINTF("request at address 0x%" PRIX32 ", command %u, len %u\n", address, cmd, len); @@ -117,11 +116,10 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address, */ case WRITE_AUX: case READ_AUX: - is_write = cmd == READ_AUX ? false : true; for (i = 0; i < len; i++) { if (!address_space_rw(&bus->aux_addr_space, address++, MEMTXATTRS_UNSPECIFIED, data++, 1, - is_write)) { + cmd == WRITE_AUX)) { ret = AUX_I2C_ACK; } else { ret = AUX_NACK; @@ -133,24 +131,37 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address, * Classic I2C transactions.. */ case READ_I2C: + if (i2c_bus_busy(i2c_bus)) { + i2c_end_transfer(i2c_bus); + } + + if (i2c_start_recv(i2c_bus, address)) { + ret = AUX_I2C_NACK; + break; + } + + ret = AUX_I2C_ACK; + for (i = 0; i < len; i++) { + data[i] = i2c_recv(i2c_bus); + } + i2c_end_transfer(i2c_bus); + break; case WRITE_I2C: - is_write = cmd == READ_I2C ? false : true; if (i2c_bus_busy(i2c_bus)) { i2c_end_transfer(i2c_bus); } - if (i2c_start_transfer(i2c_bus, address, is_write)) { + if (i2c_start_send(i2c_bus, address)) { ret = AUX_I2C_NACK; break; } ret = AUX_I2C_ACK; - while (len > 0) { - if (i2c_send_recv(i2c_bus, data++, is_write) < 0) { + for (i = 0; i < len; i++) { + if (i2c_send(i2c_bus, data[i]) < 0) { ret = AUX_I2C_NACK; break; } - len--; } i2c_end_transfer(i2c_bus); break; @@ -163,14 +174,12 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address, * - We changed the address. */ case WRITE_I2C_MOT: - case READ_I2C_MOT: - is_write = cmd == READ_I2C_MOT ? false : true; ret = AUX_I2C_NACK; if (!i2c_bus_busy(i2c_bus)) { /* * No transactions started.. */ - if (i2c_start_transfer(i2c_bus, address, is_write)) { + if (i2c_start_send(i2c_bus, address)) { break; } } else if ((address != bus->last_i2c_address) || @@ -179,23 +188,48 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address, * Transaction started but we need to restart.. */ i2c_end_transfer(i2c_bus); - if (i2c_start_transfer(i2c_bus, address, is_write)) { + if (i2c_start_send(i2c_bus, address)) { break; } } bus->last_transaction = cmd; bus->last_i2c_address = address; - while (len > 0) { - if (i2c_send_recv(i2c_bus, data++, is_write) < 0) { + ret = AUX_I2C_ACK; + for (i = 0; i < len; i++) { + if (i2c_send(i2c_bus, data[i]) < 0) { i2c_end_transfer(i2c_bus); + ret = AUX_I2C_NACK; break; } - len--; } - if (len == 0) { - ret = AUX_I2C_ACK; + break; + case READ_I2C_MOT: + ret = AUX_I2C_NACK; + if (!i2c_bus_busy(i2c_bus)) { + /* + * No transactions started.. + */ + if (i2c_start_recv(i2c_bus, address)) { + break; + } + } else if ((address != bus->last_i2c_address) || + (bus->last_transaction != cmd)) { + /* + * Transaction started but we need to restart.. + */ + i2c_end_transfer(i2c_bus); + if (i2c_start_recv(i2c_bus, address)) { + break; + } + } + + bus->last_transaction = cmd; + bus->last_i2c_address = address; + for (i = 0; i < len; i++) { + data[i] = i2c_recv(i2c_bus); } + ret = AUX_I2C_ACK; break; default: qemu_log_mask(LOG_UNIMP, "AUX cmd=%u not implemented\n", cmd); diff --git a/hw/misc/bcm2835_powermgt.c b/hw/misc/bcm2835_powermgt.c new file mode 100644 index 00000000000..25fa804cbda --- /dev/null +++ b/hw/misc/bcm2835_powermgt.c @@ -0,0 +1,160 @@ +/* + * BCM2835 Power Management emulation + * + * Copyright (C) 2017 Marcin Chojnacki + * Copyright (C) 2021 Nolan Leake + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/misc/bcm2835_powermgt.h" +#include "migration/vmstate.h" +#include "sysemu/runstate.h" + +#define PASSWORD 0x5a000000 +#define PASSWORD_MASK 0xff000000 + +#define R_RSTC 0x1c +#define V_RSTC_RESET 0x20 +#define R_RSTS 0x20 +#define V_RSTS_POWEROFF 0x555 /* Linux uses partition 63 to indicate halt. */ +#define R_WDOG 0x24 + +static uint64_t bcm2835_powermgt_read(void *opaque, hwaddr offset, + unsigned size) +{ + BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque; + uint32_t res = 0; + + switch (offset) { + case R_RSTC: + res = s->rstc; + break; + case R_RSTS: + res = s->rsts; + break; + case R_WDOG: + res = s->wdog; + break; + + default: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_read: Unknown offset 0x%08"HWADDR_PRIx + "\n", offset); + res = 0; + break; + } + + return res; +} + +static void bcm2835_powermgt_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque; + + if ((value & PASSWORD_MASK) != PASSWORD) { + qemu_log_mask(LOG_GUEST_ERROR, + "bcm2835_powermgt_write: Bad password 0x%"PRIx64 + " at offset 0x%08"HWADDR_PRIx"\n", + value, offset); + return; + } + + value = value & ~PASSWORD_MASK; + + switch (offset) { + case R_RSTC: + s->rstc = value; + if (value & V_RSTC_RESET) { + if ((s->rsts & 0xfff) == V_RSTS_POWEROFF) { + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } else { + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + } + } + break; + case R_RSTS: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_write: RSTS\n"); + s->rsts = value; + break; + case R_WDOG: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_write: WDOG\n"); + s->wdog = value; + break; + + default: + qemu_log_mask(LOG_UNIMP, + "bcm2835_powermgt_write: Unknown offset 0x%08"HWADDR_PRIx + "\n", offset); + break; + } +} + +static const MemoryRegionOps bcm2835_powermgt_ops = { + .read = bcm2835_powermgt_read, + .write = bcm2835_powermgt_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 4, + .impl.max_access_size = 4, +}; + +static const VMStateDescription vmstate_bcm2835_powermgt = { + .name = TYPE_BCM2835_POWERMGT, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(rstc, BCM2835PowerMgtState), + VMSTATE_UINT32(rsts, BCM2835PowerMgtState), + VMSTATE_UINT32(wdog, BCM2835PowerMgtState), + VMSTATE_END_OF_LIST() + } +}; + +static void bcm2835_powermgt_init(Object *obj) +{ + BCM2835PowerMgtState *s = BCM2835_POWERMGT(obj); + + memory_region_init_io(&s->iomem, obj, &bcm2835_powermgt_ops, s, + TYPE_BCM2835_POWERMGT, 0x200); + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); +} + +static void bcm2835_powermgt_reset(DeviceState *dev) +{ + BCM2835PowerMgtState *s = BCM2835_POWERMGT(dev); + + /* https://elinux.org/BCM2835_registers#PM */ + s->rstc = 0x00000102; + s->rsts = 0x00001000; + s->wdog = 0x00000000; +} + +static void bcm2835_powermgt_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = bcm2835_powermgt_reset; + dc->vmsd = &vmstate_bcm2835_powermgt; +} + +static TypeInfo bcm2835_powermgt_info = { + .name = TYPE_BCM2835_POWERMGT, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835PowerMgtState), + .class_init = bcm2835_powermgt_class_init, + .instance_init = bcm2835_powermgt_init, +}; + +static void bcm2835_powermgt_register_types(void) +{ + type_register_static(&bcm2835_powermgt_info); +} + +type_init(bcm2835_powermgt_register_types) diff --git a/hw/misc/imx7_snvs.c b/hw/misc/imx7_snvs.c index 45972a59202..ee7698bd9cd 100644 --- a/hw/misc/imx7_snvs.c +++ b/hw/misc/imx7_snvs.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "hw/misc/imx7_snvs.h" -#include "qemu/log.h" #include "qemu/module.h" #include "sysemu/runstate.h" diff --git a/hw/misc/imx_ccm.c b/hw/misc/imx_ccm.c index 08a50ee4c8a..9403c5daa36 100644 --- a/hw/misc/imx_ccm.c +++ b/hw/misc/imx_ccm.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "hw/misc/imx_ccm.h" -#include "qemu/log.h" #include "qemu/module.h" #ifndef DEBUG_IMX_CCM diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c index 4c270df2db0..632c03779cb 100644 --- a/hw/misc/imx_rngc.c +++ b/hw/misc/imx_rngc.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/module.h" -#include "qemu/log.h" #include "qemu/guest-random.h" #include "hw/irq.h" #include "hw/misc/imx_rngc.h" diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index a1fa4878bef..1ba4a98377c 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -493,9 +493,8 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) size = buf.st_size; /* mmap the region and map into the BAR2 */ - memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), - "ivshmem.bar2", size, true, fd, 0, - &local_err); + memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), "ivshmem.bar2", + size, RAM_SHARED, fd, 0, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/misc/led.c b/hw/misc/led.c index f552b8b6483..f6d6d68bce5 100644 --- a/hw/misc/led.c +++ b/hw/misc/led.c @@ -10,7 +10,6 @@ #include "migration/vmstate.h" #include "hw/qdev-properties.h" #include "hw/misc/led.h" -#include "hw/irq.h" #include "trace.h" #define LED_INTENSITY_PERCENT_MAX 100 diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c index ff0156db76f..b378e6b3055 100644 --- a/hw/misc/mac_via.c +++ b/hw/misc/mac_via.c @@ -34,11 +34,9 @@ #include "qemu/log.h" /* - * VIAs: There are two in every machine, + * VIAs: There are two in every machine */ -#define VIA_SIZE (0x2000) - /* * Not all of these are true post MacII I think. * CSA: probably the ones CHRP marks as 'unused' change purposes @@ -132,6 +130,10 @@ * On SE/30, vertical sync interrupt enable. * 0=enabled. This vSync interrupt shows up * as a slot $E interrupt. + * On Quadra 800 this bit toggles A/UX mode which + * configures the glue logic to deliver some IRQs + * at different levels compared to a classic + * Mac. */ #define VIA1B_vADBS2 0x20 /* ADB state input bit 1 (unused on IIfx) */ #define VIA1B_vADBS1 0x10 /* ADB state input bit 0 (unused on IIfx) */ @@ -374,11 +376,10 @@ static void via2_irq_request(void *opaque, int irq, int level) } -static void pram_update(MacVIAState *m) +static void pram_update(MOS6522Q800VIA1State *v1s) { - if (m->blk) { - if (blk_pwrite(m->blk, 0, m->mos6522_via1.PRAM, - sizeof(m->mos6522_via1.PRAM), 0) < 0) { + if (v1s->blk) { + if (blk_pwrite(v1s->blk, 0, v1s->PRAM, sizeof(v1s->PRAM), 0) < 0) { qemu_log("pram_update: cannot write to file\n"); } } @@ -434,9 +435,8 @@ static int via1_rtc_compact_cmd(uint8_t value) return REG_INVALID; } -static void via1_rtc_update(MacVIAState *m) +static void via1_rtc_update(MOS6522Q800VIA1State *v1s) { - MOS6522Q800VIA1State *v1s = &m->mos6522_via1; MOS6522State *s = MOS6522(v1s); int cmd, sector, addr; uint32_t time; @@ -448,40 +448,40 @@ static void via1_rtc_update(MacVIAState *m) if (s->dirb & VIA1B_vRTCData) { /* send bits to the RTC */ if (!(v1s->last_b & VIA1B_vRTCClk) && (s->b & VIA1B_vRTCClk)) { - m->data_out <<= 1; - m->data_out |= s->b & VIA1B_vRTCData; - m->data_out_cnt++; + v1s->data_out <<= 1; + v1s->data_out |= s->b & VIA1B_vRTCData; + v1s->data_out_cnt++; } - trace_via1_rtc_update_data_out(m->data_out_cnt, m->data_out); + trace_via1_rtc_update_data_out(v1s->data_out_cnt, v1s->data_out); } else { - trace_via1_rtc_update_data_in(m->data_in_cnt, m->data_in); + trace_via1_rtc_update_data_in(v1s->data_in_cnt, v1s->data_in); /* receive bits from the RTC */ if ((v1s->last_b & VIA1B_vRTCClk) && !(s->b & VIA1B_vRTCClk) && - m->data_in_cnt) { + v1s->data_in_cnt) { s->b = (s->b & ~VIA1B_vRTCData) | - ((m->data_in >> 7) & VIA1B_vRTCData); - m->data_in <<= 1; - m->data_in_cnt--; + ((v1s->data_in >> 7) & VIA1B_vRTCData); + v1s->data_in <<= 1; + v1s->data_in_cnt--; } return; } - if (m->data_out_cnt != 8) { + if (v1s->data_out_cnt != 8) { return; } - m->data_out_cnt = 0; + v1s->data_out_cnt = 0; - trace_via1_rtc_internal_status(m->cmd, m->alt, m->data_out); + trace_via1_rtc_internal_status(v1s->cmd, v1s->alt, v1s->data_out); /* first byte: it's a command */ - if (m->cmd == REG_EMPTY) { + if (v1s->cmd == REG_EMPTY) { - cmd = via1_rtc_compact_cmd(m->data_out); + cmd = via1_rtc_compact_cmd(v1s->data_out); trace_via1_rtc_internal_cmd(cmd); if (cmd == REG_INVALID) { - trace_via1_rtc_cmd_invalid(m->data_out); + trace_via1_rtc_cmd_invalid(v1s->data_out); return; } @@ -493,20 +493,20 @@ static void via1_rtc_update(MacVIAState *m) * register 3 is highest-order byte */ - time = m->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + time = v1s->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / NANOSECONDS_PER_SECOND); trace_via1_rtc_internal_time(time); - m->data_in = (time >> ((cmd & 0x03) << 3)) & 0xff; - m->data_in_cnt = 8; + v1s->data_in = (time >> ((cmd & 0x03) << 3)) & 0xff; + v1s->data_in_cnt = 8; trace_via1_rtc_cmd_seconds_read((cmd & 0x7f) - REG_0, - m->data_in); + v1s->data_in); break; case REG_PRAM_ADDR...REG_PRAM_ADDR_LAST: /* PRAM address 0x00 -> 0x13 */ - m->data_in = v1s->PRAM[(cmd & 0x7f) - REG_PRAM_ADDR]; - m->data_in_cnt = 8; + v1s->data_in = v1s->PRAM[(cmd & 0x7f) - REG_PRAM_ADDR]; + v1s->data_in_cnt = 8; trace_via1_rtc_cmd_pram_read((cmd & 0x7f) - REG_PRAM_ADDR, - m->data_in); + v1s->data_in); break; case REG_PRAM_SECT...REG_PRAM_SECT_LAST: /* @@ -514,7 +514,7 @@ static void via1_rtc_update(MacVIAState *m) * the only two-byte read command */ trace_via1_rtc_internal_set_cmd(cmd); - m->cmd = cmd; + v1s->cmd = cmd; break; default: g_assert_not_reached(); @@ -524,9 +524,9 @@ static void via1_rtc_update(MacVIAState *m) } /* this is a write command, needs a parameter */ - if (cmd == REG_WPROTECT || !m->wprotect) { + if (cmd == REG_WPROTECT || !v1s->wprotect) { trace_via1_rtc_internal_set_cmd(cmd); - m->cmd = cmd; + v1s->cmd = cmd; } else { trace_via1_rtc_internal_ignore_cmd(cmd); } @@ -534,46 +534,47 @@ static void via1_rtc_update(MacVIAState *m) } /* second byte: it's a parameter */ - if (m->alt == REG_EMPTY) { - switch (m->cmd & 0x7f) { + if (v1s->alt == REG_EMPTY) { + switch (v1s->cmd & 0x7f) { case REG_0...REG_3: /* seconds register */ /* FIXME */ - trace_via1_rtc_cmd_seconds_write(m->cmd - REG_0, m->data_out); - m->cmd = REG_EMPTY; + trace_via1_rtc_cmd_seconds_write(v1s->cmd - REG_0, v1s->data_out); + v1s->cmd = REG_EMPTY; break; case REG_TEST: /* device control: nothing to do */ - trace_via1_rtc_cmd_test_write(m->data_out); - m->cmd = REG_EMPTY; + trace_via1_rtc_cmd_test_write(v1s->data_out); + v1s->cmd = REG_EMPTY; break; case REG_WPROTECT: /* Write Protect register */ - trace_via1_rtc_cmd_wprotect_write(m->data_out); - m->wprotect = !!(m->data_out & 0x80); - m->cmd = REG_EMPTY; + trace_via1_rtc_cmd_wprotect_write(v1s->data_out); + v1s->wprotect = !!(v1s->data_out & 0x80); + v1s->cmd = REG_EMPTY; break; case REG_PRAM_ADDR...REG_PRAM_ADDR_LAST: /* PRAM address 0x00 -> 0x13 */ - trace_via1_rtc_cmd_pram_write(m->cmd - REG_PRAM_ADDR, m->data_out); - v1s->PRAM[m->cmd - REG_PRAM_ADDR] = m->data_out; - pram_update(m); - m->cmd = REG_EMPTY; + trace_via1_rtc_cmd_pram_write(v1s->cmd - REG_PRAM_ADDR, + v1s->data_out); + v1s->PRAM[v1s->cmd - REG_PRAM_ADDR] = v1s->data_out; + pram_update(v1s); + v1s->cmd = REG_EMPTY; break; case REG_PRAM_SECT...REG_PRAM_SECT_LAST: - addr = (m->data_out >> 2) & 0x1f; - sector = (m->cmd & 0x7f) - REG_PRAM_SECT; - if (m->cmd & 0x80) { + addr = (v1s->data_out >> 2) & 0x1f; + sector = (v1s->cmd & 0x7f) - REG_PRAM_SECT; + if (v1s->cmd & 0x80) { /* it's a read */ - m->data_in = v1s->PRAM[sector * 32 + addr]; - m->data_in_cnt = 8; + v1s->data_in = v1s->PRAM[sector * 32 + addr]; + v1s->data_in_cnt = 8; trace_via1_rtc_cmd_pram_sect_read(sector, addr, sector * 32 + addr, - m->data_in); - m->cmd = REG_EMPTY; + v1s->data_in); + v1s->cmd = REG_EMPTY; } else { /* it's a write, we need one more parameter */ trace_via1_rtc_internal_set_alt(addr, sector, addr); - m->alt = addr; + v1s->alt = addr; } break; default: @@ -584,22 +585,21 @@ static void via1_rtc_update(MacVIAState *m) } /* third byte: it's the data of a REG_PRAM_SECT write */ - g_assert(REG_PRAM_SECT <= m->cmd && m->cmd <= REG_PRAM_SECT_LAST); - sector = m->cmd - REG_PRAM_SECT; - v1s->PRAM[sector * 32 + m->alt] = m->data_out; - pram_update(m); - trace_via1_rtc_cmd_pram_sect_write(sector, m->alt, sector * 32 + m->alt, - m->data_out); - m->alt = REG_EMPTY; - m->cmd = REG_EMPTY; + g_assert(REG_PRAM_SECT <= v1s->cmd && v1s->cmd <= REG_PRAM_SECT_LAST); + sector = v1s->cmd - REG_PRAM_SECT; + v1s->PRAM[sector * 32 + v1s->alt] = v1s->data_out; + pram_update(v1s); + trace_via1_rtc_cmd_pram_sect_write(sector, v1s->alt, sector * 32 + v1s->alt, + v1s->data_out); + v1s->alt = REG_EMPTY; + v1s->cmd = REG_EMPTY; } static void adb_via_poll(void *opaque) { - MacVIAState *m = opaque; - MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&m->mos6522_via1); + MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque); MOS6522State *s = MOS6522(v1s); - ADBBusState *adb_bus = &m->adb_bus; + ADBBusState *adb_bus = &v1s->adb_bus; uint8_t obuf[9]; uint8_t *data = &s->sr; int olen; @@ -611,50 +611,50 @@ static void adb_via_poll(void *opaque) */ adb_autopoll_block(adb_bus); - if (m->adb_data_in_size > 0 && m->adb_data_in_index == 0) { + if (v1s->adb_data_in_size > 0 && v1s->adb_data_in_index == 0) { /* * For older Linux kernels that switch to IDLE mode after sending the * ADB command, detect if there is an existing response and return that * as a a "fake" autopoll reply or bus timeout accordingly */ - *data = m->adb_data_out[0]; - olen = m->adb_data_in_size; + *data = v1s->adb_data_out[0]; + olen = v1s->adb_data_in_size; s->b &= ~VIA1B_vADBInt; - qemu_irq_raise(m->adb_data_ready); + qemu_irq_raise(v1s->adb_data_ready); } else { /* * Otherwise poll as normal */ - m->adb_data_in_index = 0; - m->adb_data_out_index = 0; + v1s->adb_data_in_index = 0; + v1s->adb_data_out_index = 0; olen = adb_poll(adb_bus, obuf, adb_bus->autopoll_mask); if (olen > 0) { /* Autopoll response */ *data = obuf[0]; olen--; - memcpy(m->adb_data_in, &obuf[1], olen); - m->adb_data_in_size = olen; + memcpy(v1s->adb_data_in, &obuf[1], olen); + v1s->adb_data_in_size = olen; s->b &= ~VIA1B_vADBInt; - qemu_irq_raise(m->adb_data_ready); + qemu_irq_raise(v1s->adb_data_ready); } else { - *data = m->adb_autopoll_cmd; + *data = v1s->adb_autopoll_cmd; obuf[0] = 0xff; obuf[1] = 0xff; olen = 2; - memcpy(m->adb_data_in, obuf, olen); - m->adb_data_in_size = olen; + memcpy(v1s->adb_data_in, obuf, olen); + v1s->adb_data_in_size = olen; s->b &= ~VIA1B_vADBInt; - qemu_irq_raise(m->adb_data_ready); + qemu_irq_raise(v1s->adb_data_ready); } } trace_via1_adb_poll(*data, (s->b & VIA1B_vADBInt) ? "+" : "-", - adb_bus->status, m->adb_data_in_index, olen); + adb_bus->status, v1s->adb_data_in_index, olen); } static int adb_via_send_len(uint8_t data) @@ -687,11 +687,10 @@ static int adb_via_send_len(uint8_t data) } } -static void adb_via_send(MacVIAState *s, int state, uint8_t data) +static void adb_via_send(MOS6522Q800VIA1State *v1s, int state, uint8_t data) { - MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&s->mos6522_via1); MOS6522State *ms = MOS6522(v1s); - ADBBusState *adb_bus = &s->adb_bus; + ADBBusState *adb_bus = &v1s->adb_bus; uint16_t autopoll_mask; switch (state) { @@ -707,22 +706,22 @@ static void adb_via_send(MacVIAState *s, int state, uint8_t data) ms->b &= ~VIA1B_vADBInt; } else { ms->b |= VIA1B_vADBInt; - s->adb_data_out_index = 0; - s->adb_data_out[s->adb_data_out_index++] = data; + v1s->adb_data_out_index = 0; + v1s->adb_data_out[v1s->adb_data_out_index++] = data; } trace_via1_adb_send(" NEW", data, (ms->b & VIA1B_vADBInt) ? "+" : "-"); - qemu_irq_raise(s->adb_data_ready); + qemu_irq_raise(v1s->adb_data_ready); break; case ADB_STATE_EVEN: case ADB_STATE_ODD: ms->b |= VIA1B_vADBInt; - s->adb_data_out[s->adb_data_out_index++] = data; + v1s->adb_data_out[v1s->adb_data_out_index++] = data; trace_via1_adb_send(state == ADB_STATE_EVEN ? "EVEN" : " ODD", data, (ms->b & VIA1B_vADBInt) ? "+" : "-"); - qemu_irq_raise(s->adb_data_ready); + qemu_irq_raise(v1s->adb_data_ready); break; case ADB_STATE_IDLE: @@ -730,40 +729,39 @@ static void adb_via_send(MacVIAState *s, int state, uint8_t data) } /* If the command is complete, execute it */ - if (s->adb_data_out_index == adb_via_send_len(s->adb_data_out[0])) { - s->adb_data_in_size = adb_request(adb_bus, s->adb_data_in, - s->adb_data_out, - s->adb_data_out_index); - s->adb_data_in_index = 0; + if (v1s->adb_data_out_index == adb_via_send_len(v1s->adb_data_out[0])) { + v1s->adb_data_in_size = adb_request(adb_bus, v1s->adb_data_in, + v1s->adb_data_out, + v1s->adb_data_out_index); + v1s->adb_data_in_index = 0; if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) { /* * Bus timeout (but allow first EVEN and ODD byte to indicate * timeout via vADBInt and SRQ status) */ - s->adb_data_in[0] = 0xff; - s->adb_data_in[1] = 0xff; - s->adb_data_in_size = 2; + v1s->adb_data_in[0] = 0xff; + v1s->adb_data_in[1] = 0xff; + v1s->adb_data_in_size = 2; } /* * If last command is TALK, store it for use by autopoll and adjust * the autopoll mask accordingly */ - if ((s->adb_data_out[0] & 0xc) == 0xc) { - s->adb_autopoll_cmd = s->adb_data_out[0]; + if ((v1s->adb_data_out[0] & 0xc) == 0xc) { + v1s->adb_autopoll_cmd = v1s->adb_data_out[0]; - autopoll_mask = 1 << (s->adb_autopoll_cmd >> 4); + autopoll_mask = 1 << (v1s->adb_autopoll_cmd >> 4); adb_set_autopoll_mask(adb_bus, autopoll_mask); } } } -static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) +static void adb_via_receive(MOS6522Q800VIA1State *v1s, int state, uint8_t *data) { - MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&s->mos6522_via1); MOS6522State *ms = MOS6522(v1s); - ADBBusState *adb_bus = &s->adb_bus; + ADBBusState *adb_bus = &v1s->adb_bus; uint16_t pending; switch (state) { @@ -777,16 +775,16 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) trace_via1_adb_receive("IDLE", *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", adb_bus->status, - s->adb_data_in_index, s->adb_data_in_size); + v1s->adb_data_in_index, v1s->adb_data_in_size); break; case ADB_STATE_EVEN: case ADB_STATE_ODD: - switch (s->adb_data_in_index) { + switch (v1s->adb_data_in_index) { case 0: /* First EVEN byte: vADBInt indicates bus timeout */ - *data = s->adb_data_in[s->adb_data_in_index]; + *data = v1s->adb_data_in[v1s->adb_data_in_index]; if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) { ms->b &= ~VIA1B_vADBInt; } else { @@ -795,16 +793,16 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", - adb_bus->status, s->adb_data_in_index, - s->adb_data_in_size); + adb_bus->status, v1s->adb_data_in_index, + v1s->adb_data_in_size); - s->adb_data_in_index++; + v1s->adb_data_in_index++; break; case 1: /* First ODD byte: vADBInt indicates SRQ */ - *data = s->adb_data_in[s->adb_data_in_index]; - pending = adb_bus->pending & ~(1 << (s->adb_autopoll_cmd >> 4)); + *data = v1s->adb_data_in[v1s->adb_data_in_index]; + pending = adb_bus->pending & ~(1 << (v1s->adb_autopoll_cmd >> 4)); if (pending) { ms->b &= ~VIA1B_vADBInt; } else { @@ -813,10 +811,10 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", - adb_bus->status, s->adb_data_in_index, - s->adb_data_in_size); + adb_bus->status, v1s->adb_data_in_index, + v1s->adb_data_in_size); - s->adb_data_in_index++; + v1s->adb_data_in_index++; break; default: @@ -826,11 +824,11 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) * end of the poll reply, so provide these extra bytes below to * keep it happy */ - if (s->adb_data_in_index < s->adb_data_in_size) { + if (v1s->adb_data_in_index < v1s->adb_data_in_size) { /* Next data byte */ - *data = s->adb_data_in[s->adb_data_in_index]; + *data = v1s->adb_data_in[v1s->adb_data_in_index]; ms->b |= VIA1B_vADBInt; - } else if (s->adb_data_in_index == s->adb_data_in_size) { + } else if (v1s->adb_data_in_index == v1s->adb_data_in_size) { if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) { /* Bus timeout (no more data) */ *data = 0xff; @@ -849,23 +847,22 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data) trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD", *data, (ms->b & VIA1B_vADBInt) ? "+" : "-", - adb_bus->status, s->adb_data_in_index, - s->adb_data_in_size); + adb_bus->status, v1s->adb_data_in_index, + v1s->adb_data_in_size); - if (s->adb_data_in_index <= s->adb_data_in_size) { - s->adb_data_in_index++; + if (v1s->adb_data_in_index <= v1s->adb_data_in_size) { + v1s->adb_data_in_index++; } break; } - qemu_irq_raise(s->adb_data_ready); + qemu_irq_raise(v1s->adb_data_ready); break; } } -static void via1_adb_update(MacVIAState *m) +static void via1_adb_update(MOS6522Q800VIA1State *v1s) { - MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&m->mos6522_via1); MOS6522State *s = MOS6522(v1s); int oldstate, state; @@ -875,14 +872,29 @@ static void via1_adb_update(MacVIAState *m) if (state != oldstate) { if (s->acr & VIA1ACR_vShiftOut) { /* output mode */ - adb_via_send(m, state, s->sr); + adb_via_send(v1s, state, s->sr); } else { /* input mode */ - adb_via_receive(m, state, &s->sr); + adb_via_receive(v1s, state, &s->sr); } } } +static void via1_auxmode_update(MOS6522Q800VIA1State *v1s) +{ + MOS6522State *s = MOS6522(v1s); + int oldirq, irq; + + oldirq = (v1s->last_b & VIA1B_vMystery) ? 1 : 0; + irq = (s->b & VIA1B_vMystery) ? 1 : 0; + + /* Check to see if the A/UX mode bit has changed */ + if (irq != oldirq) { + trace_via1_auxmode(irq); + qemu_set_irq(v1s->auxmode_irq, irq); + } +} + static uint64_t mos6522_q800_via1_read(void *opaque, hwaddr addr, unsigned size) { MOS6522Q800VIA1State *s = MOS6522_Q800_VIA1(opaque); @@ -896,7 +908,6 @@ static void mos6522_q800_via1_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque); - MacVIAState *m = container_of(v1s, MacVIAState, mos6522_via1); MOS6522State *ms = MOS6522(v1s); addr = (addr >> 9) & 0xf; @@ -904,8 +915,9 @@ static void mos6522_q800_via1_write(void *opaque, hwaddr addr, uint64_t val, switch (addr) { case VIA_REG_B: - via1_rtc_update(m); - via1_adb_update(m); + via1_rtc_update(v1s); + via1_adb_update(v1s); + via1_auxmode_update(v1s); v1s->last_b = ms->b; break; @@ -951,216 +963,158 @@ static const MemoryRegionOps mos6522_q800_via2_ops = { }, }; -static void mac_via_reset(DeviceState *dev) +static void via1_postload_update_cb(void *opaque, bool running, RunState state) { - MacVIAState *m = MAC_VIA(dev); - ADBBusState *adb_bus = &m->adb_bus; + MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque); - adb_set_autopoll_enabled(adb_bus, true); + qemu_del_vm_change_state_handler(v1s->vmstate); + v1s->vmstate = NULL; - m->cmd = REG_EMPTY; - m->alt = REG_EMPTY; + pram_update(v1s); } -static void mac_via_realize(DeviceState *dev, Error **errp) +static int via1_post_load(void *opaque, int version_id) { - MacVIAState *m = MAC_VIA(dev); - MOS6522State *ms; - ADBBusState *adb_bus = &m->adb_bus; + MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque); + + if (v1s->blk) { + v1s->vmstate = qemu_add_vm_change_state_handler( + via1_postload_update_cb, v1s); + } + + return 0; +} + +/* VIA 1 */ +static void mos6522_q800_via1_reset(DeviceState *dev) +{ + MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(dev); + MOS6522State *ms = MOS6522(v1s); + MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(ms); + ADBBusState *adb_bus = &v1s->adb_bus; + + mdc->parent_reset(dev); + + ms->timers[0].frequency = VIA_TIMER_FREQ; + ms->timers[1].frequency = VIA_TIMER_FREQ; + + ms->b = VIA1B_vADB_StateMask | VIA1B_vADBInt | VIA1B_vRTCEnb; + + /* ADB/RTC */ + adb_set_autopoll_enabled(adb_bus, true); + v1s->cmd = REG_EMPTY; + v1s->alt = REG_EMPTY; +} + +static void mos6522_q800_via1_realize(DeviceState *dev, Error **errp) +{ + MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(dev); + ADBBusState *adb_bus = &v1s->adb_bus; struct tm tm; int ret; - /* Init VIAs 1 and 2 */ - object_initialize_child(OBJECT(dev), "via1", &m->mos6522_via1, - TYPE_MOS6522_Q800_VIA1); - - object_initialize_child(OBJECT(dev), "via2", &m->mos6522_via2, - TYPE_MOS6522_Q800_VIA2); - - /* Pass through mos6522 output IRQs */ - ms = MOS6522(&m->mos6522_via1); - object_property_add_alias(OBJECT(dev), "irq[0]", OBJECT(ms), - SYSBUS_DEVICE_GPIO_IRQ "[0]"); - ms = MOS6522(&m->mos6522_via2); - object_property_add_alias(OBJECT(dev), "irq[1]", OBJECT(ms), - SYSBUS_DEVICE_GPIO_IRQ "[0]"); - - sysbus_realize(SYS_BUS_DEVICE(&m->mos6522_via1), &error_abort); - sysbus_realize(SYS_BUS_DEVICE(&m->mos6522_via2), &error_abort); - - /* Pass through mos6522 input IRQs */ - qdev_pass_gpios(DEVICE(&m->mos6522_via1), dev, "via1-irq"); - qdev_pass_gpios(DEVICE(&m->mos6522_via2), dev, "via2-irq"); - - /* VIA 1 */ - m->mos6522_via1.one_second_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, - via1_one_second, - &m->mos6522_via1); - via1_one_second_update(&m->mos6522_via1); - m->mos6522_via1.sixty_hz_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - via1_sixty_hz, - &m->mos6522_via1); - via1_sixty_hz_update(&m->mos6522_via1); + v1s->one_second_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, via1_one_second, + v1s); + via1_one_second_update(v1s); + v1s->sixty_hz_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, via1_sixty_hz, + v1s); + via1_sixty_hz_update(v1s); qemu_get_timedate(&tm, 0); - m->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET; + v1s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET; - adb_register_autopoll_callback(adb_bus, adb_via_poll, m); - m->adb_data_ready = qdev_get_gpio_in_named(dev, "via1-irq", - VIA1_IRQ_ADB_READY_BIT); + adb_register_autopoll_callback(adb_bus, adb_via_poll, v1s); + v1s->adb_data_ready = qdev_get_gpio_in(dev, VIA1_IRQ_ADB_READY_BIT); - if (m->blk) { - int64_t len = blk_getlength(m->blk); + if (v1s->blk) { + int64_t len = blk_getlength(v1s->blk); if (len < 0) { error_setg_errno(errp, -len, "could not get length of backing image"); return; } - ret = blk_set_perm(m->blk, + ret = blk_set_perm(v1s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, BLK_PERM_ALL, errp); if (ret < 0) { return; } - len = blk_pread(m->blk, 0, m->mos6522_via1.PRAM, - sizeof(m->mos6522_via1.PRAM)); - if (len != sizeof(m->mos6522_via1.PRAM)) { + len = blk_pread(v1s->blk, 0, v1s->PRAM, sizeof(v1s->PRAM)); + if (len != sizeof(v1s->PRAM)) { error_setg(errp, "can't read PRAM contents"); return; } } } -static void mac_via_init(Object *obj) +static void mos6522_q800_via1_init(Object *obj) { - SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - MacVIAState *m = MAC_VIA(obj); - - /* MMIO */ - memory_region_init(&m->mmio, obj, "mac-via", 2 * VIA_SIZE); - sysbus_init_mmio(sbd, &m->mmio); - - memory_region_init_io(&m->via1mem, obj, &mos6522_q800_via1_ops, - &m->mos6522_via1, "via1", VIA_SIZE); - memory_region_add_subregion(&m->mmio, 0x0, &m->via1mem); + MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(v1s); - memory_region_init_io(&m->via2mem, obj, &mos6522_q800_via2_ops, - &m->mos6522_via2, "via2", VIA_SIZE); - memory_region_add_subregion(&m->mmio, VIA_SIZE, &m->via2mem); + memory_region_init_io(&v1s->via_mem, obj, &mos6522_q800_via1_ops, v1s, + "via1", VIA_SIZE); + sysbus_init_mmio(sbd, &v1s->via_mem); /* ADB */ - qbus_create_inplace((BusState *)&m->adb_bus, sizeof(m->adb_bus), - TYPE_ADB_BUS, DEVICE(obj), "adb.0"); -} - -static void postload_update_cb(void *opaque, bool running, RunState state) -{ - MacVIAState *m = MAC_VIA(opaque); + qbus_init((BusState *)&v1s->adb_bus, sizeof(v1s->adb_bus), + TYPE_ADB_BUS, DEVICE(v1s), "adb.0"); - qemu_del_vm_change_state_handler(m->vmstate); - m->vmstate = NULL; + qdev_init_gpio_in(DEVICE(obj), via1_irq_request, VIA1_IRQ_NB); - pram_update(m); + /* A/UX mode */ + qdev_init_gpio_out(DEVICE(obj), &v1s->auxmode_irq, 1); } -static int mac_via_post_load(void *opaque, int version_id) -{ - MacVIAState *m = MAC_VIA(opaque); - - if (m->blk) { - m->vmstate = qemu_add_vm_change_state_handler(postload_update_cb, - m); - } - - return 0; -} - -static const VMStateDescription vmstate_mac_via = { - .name = "mac-via", - .version_id = 2, - .minimum_version_id = 2, - .post_load = mac_via_post_load, +static const VMStateDescription vmstate_q800_via1 = { + .name = "q800-via1", + .version_id = 0, + .minimum_version_id = 0, + .post_load = via1_post_load, .fields = (VMStateField[]) { - /* VIAs */ - VMSTATE_STRUCT(mos6522_via1.parent_obj, MacVIAState, 0, vmstate_mos6522, - MOS6522State), - VMSTATE_UINT8(mos6522_via1.last_b, MacVIAState), - VMSTATE_BUFFER(mos6522_via1.PRAM, MacVIAState), - VMSTATE_TIMER_PTR(mos6522_via1.one_second_timer, MacVIAState), - VMSTATE_INT64(mos6522_via1.next_second, MacVIAState), - VMSTATE_TIMER_PTR(mos6522_via1.sixty_hz_timer, MacVIAState), - VMSTATE_INT64(mos6522_via1.next_sixty_hz, MacVIAState), - VMSTATE_STRUCT(mos6522_via2.parent_obj, MacVIAState, 0, vmstate_mos6522, + VMSTATE_STRUCT(parent_obj, MOS6522Q800VIA1State, 0, vmstate_mos6522, MOS6522State), + VMSTATE_UINT8(last_b, MOS6522Q800VIA1State), /* RTC */ - VMSTATE_UINT32(tick_offset, MacVIAState), - VMSTATE_UINT8(data_out, MacVIAState), - VMSTATE_INT32(data_out_cnt, MacVIAState), - VMSTATE_UINT8(data_in, MacVIAState), - VMSTATE_UINT8(data_in_cnt, MacVIAState), - VMSTATE_UINT8(cmd, MacVIAState), - VMSTATE_INT32(wprotect, MacVIAState), - VMSTATE_INT32(alt, MacVIAState), + VMSTATE_BUFFER(PRAM, MOS6522Q800VIA1State), + VMSTATE_UINT32(tick_offset, MOS6522Q800VIA1State), + VMSTATE_UINT8(data_out, MOS6522Q800VIA1State), + VMSTATE_INT32(data_out_cnt, MOS6522Q800VIA1State), + VMSTATE_UINT8(data_in, MOS6522Q800VIA1State), + VMSTATE_UINT8(data_in_cnt, MOS6522Q800VIA1State), + VMSTATE_UINT8(cmd, MOS6522Q800VIA1State), + VMSTATE_INT32(wprotect, MOS6522Q800VIA1State), + VMSTATE_INT32(alt, MOS6522Q800VIA1State), /* ADB */ - VMSTATE_INT32(adb_data_in_size, MacVIAState), - VMSTATE_INT32(adb_data_in_index, MacVIAState), - VMSTATE_INT32(adb_data_out_index, MacVIAState), - VMSTATE_BUFFER(adb_data_in, MacVIAState), - VMSTATE_BUFFER(adb_data_out, MacVIAState), - VMSTATE_UINT8(adb_autopoll_cmd, MacVIAState), + VMSTATE_INT32(adb_data_in_size, MOS6522Q800VIA1State), + VMSTATE_INT32(adb_data_in_index, MOS6522Q800VIA1State), + VMSTATE_INT32(adb_data_out_index, MOS6522Q800VIA1State), + VMSTATE_BUFFER(adb_data_in, MOS6522Q800VIA1State), + VMSTATE_BUFFER(adb_data_out, MOS6522Q800VIA1State), + VMSTATE_UINT8(adb_autopoll_cmd, MOS6522Q800VIA1State), + /* Timers */ + VMSTATE_TIMER_PTR(one_second_timer, MOS6522Q800VIA1State), + VMSTATE_INT64(next_second, MOS6522Q800VIA1State), + VMSTATE_TIMER_PTR(sixty_hz_timer, MOS6522Q800VIA1State), + VMSTATE_INT64(next_sixty_hz, MOS6522Q800VIA1State), VMSTATE_END_OF_LIST() } }; -static Property mac_via_properties[] = { - DEFINE_PROP_DRIVE("drive", MacVIAState, blk), +static Property mos6522_q800_via1_properties[] = { + DEFINE_PROP_DRIVE("drive", MOS6522Q800VIA1State, blk), DEFINE_PROP_END_OF_LIST(), }; -static void mac_via_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - - dc->realize = mac_via_realize; - dc->reset = mac_via_reset; - dc->vmsd = &vmstate_mac_via; - device_class_set_props(dc, mac_via_properties); -} - -static TypeInfo mac_via_info = { - .name = TYPE_MAC_VIA, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MacVIAState), - .instance_init = mac_via_init, - .class_init = mac_via_class_init, -}; - -/* VIA 1 */ -static void mos6522_q800_via1_reset(DeviceState *dev) -{ - MOS6522State *ms = MOS6522(dev); - MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(ms); - - mdc->parent_reset(dev); - - ms->timers[0].frequency = VIA_TIMER_FREQ; - ms->timers[1].frequency = VIA_TIMER_FREQ; - - ms->b = VIA1B_vADB_StateMask | VIA1B_vADBInt | VIA1B_vRTCEnb; -} - -static void mos6522_q800_via1_init(Object *obj) -{ - qdev_init_gpio_in_named(DEVICE(obj), via1_irq_request, "via1-irq", - VIA1_IRQ_NB); -} - static void mos6522_q800_via1_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); + dc->realize = mos6522_q800_via1_realize; dc->reset = mos6522_q800_via1_reset; + dc->vmsd = &vmstate_q800_via1; + device_class_set_props(dc, mos6522_q800_via1_properties); } static const TypeInfo mos6522_q800_via1_type_info = { @@ -1192,20 +1146,61 @@ static void mos6522_q800_via2_reset(DeviceState *dev) ms->dirb = 0; ms->b = 0; + ms->dira = 0; + ms->a = 0x7f; +} + +static void via2_nubus_irq_request(void *opaque, int irq, int level) +{ + MOS6522Q800VIA2State *v2s = opaque; + MOS6522State *s = MOS6522(v2s); + MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(s); + + if (level) { + /* Port A nubus IRQ inputs are active LOW */ + s->a &= ~(1 << irq); + s->ifr |= 1 << VIA2_IRQ_NUBUS_BIT; + } else { + s->a |= (1 << irq); + s->ifr &= ~(1 << VIA2_IRQ_NUBUS_BIT); + } + + mdc->update_irq(s); } static void mos6522_q800_via2_init(Object *obj) { - qdev_init_gpio_in_named(DEVICE(obj), via2_irq_request, "via2-irq", - VIA2_IRQ_NB); + MOS6522Q800VIA2State *v2s = MOS6522_Q800_VIA2(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(v2s); + + memory_region_init_io(&v2s->via_mem, obj, &mos6522_q800_via2_ops, v2s, + "via2", VIA_SIZE); + sysbus_init_mmio(sbd, &v2s->via_mem); + + qdev_init_gpio_in(DEVICE(obj), via2_irq_request, VIA2_IRQ_NB); + + qdev_init_gpio_in_named(DEVICE(obj), via2_nubus_irq_request, "nubus-irq", + VIA2_NUBUS_IRQ_NB); } +static const VMStateDescription vmstate_q800_via2 = { + .name = "q800-via2", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(parent_obj, MOS6522Q800VIA2State, 0, vmstate_mos6522, + MOS6522State), + VMSTATE_END_OF_LIST() + } +}; + static void mos6522_q800_via2_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); MOS6522DeviceClass *mdc = MOS6522_CLASS(oc); dc->reset = mos6522_q800_via2_reset; + dc->vmsd = &vmstate_q800_via2; mdc->portB_write = mos6522_q800_via2_portB_write; } @@ -1221,7 +1216,6 @@ static void mac_via_register_types(void) { type_register_static(&mos6522_q800_via1_type_info); type_register_static(&mos6522_q800_via2_type_info); - type_register_static(&mac_via_info); } type_init(mac_via_register_types); diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c index edbd4186b2a..e917a6a095b 100644 --- a/hw/misc/macio/cuda.c +++ b/hw/misc/macio/cuda.c @@ -553,8 +553,8 @@ static void cuda_init(Object *obj) memory_region_init_io(&s->mem, obj, &mos6522_cuda_ops, s, "cuda", 0x2000); sysbus_init_mmio(sbd, &s->mem); - qbus_create_inplace(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS, - DEVICE(obj), "adb.0"); + qbus_init(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS, + DEVICE(obj), "adb.0"); } static Property cuda_properties[] = { diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index e6eeb575d53..c1fad43f6c6 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -35,7 +35,6 @@ #include "hw/char/escc.h" #include "hw/misc/macio/macio.h" #include "hw/intc/heathrow_pic.h" -#include "sysemu/sysemu.h" #include "trace.h" /* Note: this code is strongly inspirated from the corresponding code @@ -388,8 +387,8 @@ static void macio_instance_init(Object *obj) memory_region_init(&s->bar, obj, "macio", 0x80000); - qbus_create_inplace(&s->macio_bus, sizeof(s->macio_bus), TYPE_MACIO_BUS, - DEVICE(obj), "macio.0"); + qbus_init(&s->macio_bus, sizeof(s->macio_bus), TYPE_MACIO_BUS, + DEVICE(obj), "macio.0"); object_initialize_child(OBJECT(s), "dbdma", &s->dbdma, TYPE_MAC_DBDMA); diff --git a/hw/misc/macio/pmu.c b/hw/misc/macio/pmu.c index 71924d4768c..eb39c64694a 100644 --- a/hw/misc/macio/pmu.c +++ b/hw/misc/macio/pmu.c @@ -718,6 +718,7 @@ static const VMStateDescription vmstate_pmu = { }, .subsections = (const VMStateDescription * []) { &vmstate_pmu_adb, + NULL } }; @@ -754,8 +755,8 @@ static void pmu_realize(DeviceState *dev, Error **errp) timer_mod(s->one_sec_timer, s->one_sec_target); if (s->has_adb) { - qbus_create_inplace(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS, - dev, "adb.0"); + qbus_init(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS, + dev, "adb.0"); adb_register_autopoll_callback(adb_bus, pmu_adb_poll, s); } } diff --git a/hw/misc/macio/trace-events b/hw/misc/macio/trace-events index e4a1cc0d246..ad4b9d1c08e 100644 --- a/hw/misc/macio/trace-events +++ b/hw/misc/macio/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # cuda.c cuda_delay_set_sr_int(void) "" diff --git a/hw/misc/mchp_pfsoc_dmc.c b/hw/misc/mchp_pfsoc_dmc.c index 15cf3d77252..43d8e970abc 100644 --- a/hw/misc/mchp_pfsoc_dmc.c +++ b/hw/misc/mchp_pfsoc_dmc.c @@ -24,7 +24,6 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "qapi/error.h" -#include "hw/hw.h" #include "hw/sysbus.h" #include "hw/misc/mchp_pfsoc_dmc.h" diff --git a/hw/misc/mchp_pfsoc_ioscb.c b/hw/misc/mchp_pfsoc_ioscb.c index 8b0d1cacd7e..f4fd55a0e5c 100644 --- a/hw/misc/mchp_pfsoc_ioscb.c +++ b/hw/misc/mchp_pfsoc_ioscb.c @@ -24,7 +24,6 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "qapi/error.h" -#include "hw/hw.h" #include "hw/sysbus.h" #include "hw/misc/mchp_pfsoc_ioscb.h" diff --git a/hw/misc/mchp_pfsoc_sysreg.c b/hw/misc/mchp_pfsoc_sysreg.c index 248a3133455..89571eded53 100644 --- a/hw/misc/mchp_pfsoc_sysreg.c +++ b/hw/misc/mchp_pfsoc_sysreg.c @@ -24,7 +24,6 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "qapi/error.h" -#include "hw/hw.h" #include "hw/sysbus.h" #include "hw/misc/mchp_pfsoc_sysreg.h" diff --git a/hw/misc/meson.build b/hw/misc/meson.build index 21034dc60a8..3f41a3a5b27 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -3,13 +3,9 @@ softmmu_ss.add(when: 'CONFIG_EDU', if_true: files('edu.c')) softmmu_ss.add(when: 'CONFIG_FW_CFG_DMA', if_true: files('vmcoreinfo.c')) softmmu_ss.add(when: 'CONFIG_ISA_DEBUG', if_true: files('debugexit.c')) softmmu_ss.add(when: 'CONFIG_ISA_TESTDEV', if_true: files('pc-testdev.c')) -softmmu_ss.add(when: 'CONFIG_MAX111X', if_true: files('max111x.c')) softmmu_ss.add(when: 'CONFIG_PCA9552', if_true: files('pca9552.c')) softmmu_ss.add(when: 'CONFIG_PCI_TESTDEV', if_true: files('pci-testdev.c')) softmmu_ss.add(when: 'CONFIG_SGA', if_true: files('sga.c')) -softmmu_ss.add(when: 'CONFIG_TMP105', if_true: files('tmp105.c')) -softmmu_ss.add(when: 'CONFIG_TMP421', if_true: files('tmp421.c')) -softmmu_ss.add(when: 'CONFIG_EMC141X', if_true: files('emc141x.c')) softmmu_ss.add(when: 'CONFIG_UNIMP', if_true: files('unimp.c')) softmmu_ss.add(when: 'CONFIG_EMPTY_SLOT', if_true: files('empty_slot.c')) softmmu_ss.add(when: 'CONFIG_LED', if_true: files('led.c')) @@ -21,6 +17,8 @@ softmmu_ss.add(when: 'CONFIG_INTEGRATOR_DEBUG', if_true: files('arm_integrator_d softmmu_ss.add(when: 'CONFIG_A9SCU', if_true: files('a9scu.c')) softmmu_ss.add(when: 'CONFIG_ARM11SCU', if_true: files('arm11scu.c')) +softmmu_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m_ras.c')) + # Mac devices softmmu_ss.add(when: 'CONFIG_MOS6522', if_true: files('mos6522.c')) @@ -36,9 +34,6 @@ softmmu_ss.add(when: 'CONFIG_SIFIVE_E_PRCI', if_true: files('sifive_e_prci.c')) softmmu_ss.add(when: 'CONFIG_SIFIVE_U_OTP', if_true: files('sifive_u_otp.c')) softmmu_ss.add(when: 'CONFIG_SIFIVE_U_PRCI', if_true: files('sifive_u_prci.c')) -# PKUnity SoC devices -softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_pm.c')) - subdir('macio') softmmu_ss.add(when: 'CONFIG_IVSHMEM_DEVICE', if_true: files('ivshmem.c')) @@ -63,7 +58,6 @@ softmmu_ss.add(when: 'CONFIG_IMX', if_true: files( 'imx_ccm.c', 'imx_rngc.c', )) -softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-hpdmc.c', 'milkymist-pfpu.c')) softmmu_ss.add(when: 'CONFIG_MAINSTONE', if_true: files('mst_fpga.c')) softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files( 'npcm7xx_clk.c', @@ -86,9 +80,10 @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files( 'bcm2835_rng.c', 'bcm2835_thermal.c', 'bcm2835_cprman.c', + 'bcm2835_powermgt.c', )) softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c')) -softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c', 'zynq-xadc.c')) +softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c')) softmmu_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-xramc.c')) softmmu_ss.add(when: 'CONFIG_STM32F2XX_SYSCFG', if_true: files('stm32f2xx_syscfg.c')) softmmu_ss.add(when: 'CONFIG_STM32F4XX_SYSCFG', if_true: files('stm32f4xx_syscfg.c')) @@ -109,6 +104,7 @@ softmmu_ss.add(when: 'CONFIG_PVPANIC_ISA', if_true: files('pvpanic-isa.c')) softmmu_ss.add(when: 'CONFIG_PVPANIC_PCI', if_true: files('pvpanic-pci.c')) softmmu_ss.add(when: 'CONFIG_AUX', if_true: files('auxbus.c')) softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files( + 'aspeed_hace.c', 'aspeed_lpc.c', 'aspeed_scu.c', 'aspeed_sdmc.c', diff --git a/hw/misc/milkymist-hpdmc.c b/hw/misc/milkymist-hpdmc.c deleted file mode 100644 index 09a3875f02c..00000000000 --- a/hw/misc/milkymist-hpdmc.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * QEMU model of the Milkymist High Performance Dynamic Memory Controller. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/hpdmc.pdf - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -enum { - R_SYSTEM = 0, - R_BYPASS, - R_TIMING, - R_IODELAY, - R_MAX -}; - -enum { - IODELAY_DQSDELAY_RDY = (1<<5), - IODELAY_PLL1_LOCKED = (1<<6), - IODELAY_PLL2_LOCKED = (1<<7), -}; - -#define TYPE_MILKYMIST_HPDMC "milkymist-hpdmc" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistHpdmcState, MILKYMIST_HPDMC) - -struct MilkymistHpdmcState { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - - uint32_t regs[R_MAX]; -}; - -static uint64_t hpdmc_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistHpdmcState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_SYSTEM: - case R_BYPASS: - case R_TIMING: - case R_IODELAY: - r = s->regs[addr]; - break; - - default: - error_report("milkymist_hpdmc: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_hpdmc_memory_read(addr << 2, r); - - return r; -} - -static void hpdmc_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistHpdmcState *s = opaque; - - trace_milkymist_hpdmc_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_SYSTEM: - case R_BYPASS: - case R_TIMING: - s->regs[addr] = value; - break; - case R_IODELAY: - /* ignore writes */ - break; - - default: - error_report("milkymist_hpdmc: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } -} - -static const MemoryRegionOps hpdmc_mmio_ops = { - .read = hpdmc_read, - .write = hpdmc_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void milkymist_hpdmc_reset(DeviceState *d) -{ - MilkymistHpdmcState *s = MILKYMIST_HPDMC(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - - /* defaults */ - s->regs[R_IODELAY] = IODELAY_DQSDELAY_RDY | IODELAY_PLL1_LOCKED - | IODELAY_PLL2_LOCKED; -} - -static void milkymist_hpdmc_realize(DeviceState *dev, Error **errp) -{ - MilkymistHpdmcState *s = MILKYMIST_HPDMC(dev); - - memory_region_init_io(&s->regs_region, OBJECT(dev), &hpdmc_mmio_ops, s, - "milkymist-hpdmc", R_MAX * 4); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->regs_region); -} - -static const VMStateDescription vmstate_milkymist_hpdmc = { - .name = "milkymist-hpdmc", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistHpdmcState, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static void milkymist_hpdmc_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_hpdmc_realize; - dc->reset = milkymist_hpdmc_reset; - dc->vmsd = &vmstate_milkymist_hpdmc; -} - -static const TypeInfo milkymist_hpdmc_info = { - .name = TYPE_MILKYMIST_HPDMC, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistHpdmcState), - .class_init = milkymist_hpdmc_class_init, -}; - -static void milkymist_hpdmc_register_types(void) -{ - type_register_static(&milkymist_hpdmc_info); -} - -type_init(milkymist_hpdmc_register_types) diff --git a/hw/misc/milkymist-pfpu.c b/hw/misc/milkymist-pfpu.c deleted file mode 100644 index e4ee209c101..00000000000 --- a/hw/misc/milkymist-pfpu.c +++ /dev/null @@ -1,548 +0,0 @@ -/* - * QEMU model of the Milkymist programmable FPU. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/pfpu.pdf - * - */ - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "qemu/error-report.h" -#include -#include "qom/object.h" - -/* #define TRACE_EXEC */ - -#ifdef TRACE_EXEC -# define D_EXEC(x) x -#else -# define D_EXEC(x) -#endif - -enum { - R_CTL = 0, - R_MESHBASE, - R_HMESHLAST, - R_VMESHLAST, - R_CODEPAGE, - R_VERTICES, - R_COLLISIONS, - R_STRAYWRITES, - R_LASTDMA, - R_PC, - R_DREGBASE, - R_CODEBASE, - R_MAX -}; - -enum { - CTL_START_BUSY = (1<<0), -}; - -enum { - OP_NOP = 0, - OP_FADD, - OP_FSUB, - OP_FMUL, - OP_FABS, - OP_F2I, - OP_I2F, - OP_VECTOUT, - OP_SIN, - OP_COS, - OP_ABOVE, - OP_EQUAL, - OP_COPY, - OP_IF, - OP_TSIGN, - OP_QUAKE, -}; - -enum { - GPR_X = 0, - GPR_Y = 1, - GPR_FLAGS = 2, -}; - -enum { - LATENCY_FADD = 5, - LATENCY_FSUB = 5, - LATENCY_FMUL = 7, - LATENCY_FABS = 2, - LATENCY_F2I = 2, - LATENCY_I2F = 3, - LATENCY_VECTOUT = 0, - LATENCY_SIN = 4, - LATENCY_COS = 4, - LATENCY_ABOVE = 2, - LATENCY_EQUAL = 2, - LATENCY_COPY = 2, - LATENCY_IF = 2, - LATENCY_TSIGN = 2, - LATENCY_QUAKE = 2, - MAX_LATENCY = 7 -}; - -#define GPR_BEGIN 0x100 -#define GPR_END 0x17f -#define MICROCODE_BEGIN 0x200 -#define MICROCODE_END 0x3ff -#define MICROCODE_WORDS 2048 - -#define REINTERPRET_CAST(type, val) (*((type *)&(val))) - -#ifdef TRACE_EXEC -static const char *opcode_to_str[] = { - "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT", - "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE", -}; -#endif - -#define TYPE_MILKYMIST_PFPU "milkymist-pfpu" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistPFPUState, MILKYMIST_PFPU) - -struct MilkymistPFPUState { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - Chardev *chr; - qemu_irq irq; - - uint32_t regs[R_MAX]; - uint32_t gp_regs[128]; - uint32_t microcode[MICROCODE_WORDS]; - - int output_queue_pos; - uint32_t output_queue[MAX_LATENCY]; -}; - -static inline uint32_t -get_dma_address(uint32_t base, uint32_t x, uint32_t y) -{ - return base + 8 * (128 * y + x); -} - -static inline void -output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos) -{ - s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val; -} - -static inline uint32_t -output_queue_remove(MilkymistPFPUState *s) -{ - return s->output_queue[s->output_queue_pos]; -} - -static inline void -output_queue_advance(MilkymistPFPUState *s) -{ - s->output_queue[s->output_queue_pos] = 0; - s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY; -} - -static int pfpu_decode_insn(MilkymistPFPUState *s) -{ - uint32_t pc = s->regs[R_PC]; - uint32_t insn = s->microcode[pc]; - uint32_t reg_a = (insn >> 18) & 0x7f; - uint32_t reg_b = (insn >> 11) & 0x7f; - uint32_t op = (insn >> 7) & 0xf; - uint32_t reg_d = insn & 0x7f; - uint32_t r = 0; - int latency = 0; - - switch (op) { - case OP_NOP: - break; - case OP_FADD: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); - float t = a + b; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_FADD; - D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); - } break; - case OP_FSUB: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); - float t = a - b; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_FSUB; - D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); - } break; - case OP_FMUL: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); - float t = a * b; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_FMUL; - D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); - } break; - case OP_FABS: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float t = fabsf(a); - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_FABS; - D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r)); - } break; - case OP_F2I: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - int32_t t = a; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_F2I; - D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r)); - } break; - case OP_I2F: - { - int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); - float t = a; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_I2F; - D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r)); - } break; - case OP_VECTOUT: - { - uint32_t a = cpu_to_be32(s->gp_regs[reg_a]); - uint32_t b = cpu_to_be32(s->gp_regs[reg_b]); - hwaddr dma_ptr = - get_dma_address(s->regs[R_MESHBASE], - s->gp_regs[GPR_X], s->gp_regs[GPR_Y]); - cpu_physical_memory_write(dma_ptr, &a, 4); - cpu_physical_memory_write(dma_ptr + 4, &b, 4); - s->regs[R_LASTDMA] = dma_ptr + 4; - D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr)); - trace_milkymist_pfpu_vectout(a, b, dma_ptr); - } break; - case OP_SIN: - { - int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); - float t = sinf(a * (1.0f / (M_PI * 4096.0f))); - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_SIN; - D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r)); - } break; - case OP_COS: - { - int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); - float t = cosf(a * (1.0f / (M_PI * 4096.0f))); - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_COS; - D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r)); - } break; - case OP_ABOVE: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); - float t = (a > b) ? 1.0f : 0.0f; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_ABOVE; - D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); - } break; - case OP_EQUAL: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); - float t = (a == b) ? 1.0f : 0.0f; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_EQUAL; - D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); - } break; - case OP_COPY: - { - r = s->gp_regs[reg_a]; - latency = LATENCY_COPY; - D_EXEC(qemu_log("COPY")); - } break; - case OP_IF: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); - uint32_t f = s->gp_regs[GPR_FLAGS]; - float t = (f != 0) ? a : b; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_IF; - D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r)); - } break; - case OP_TSIGN: - { - float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); - float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); - float t = (b < 0) ? -a : a; - r = REINTERPRET_CAST(uint32_t, t); - latency = LATENCY_TSIGN; - D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r)); - } break; - case OP_QUAKE: - { - uint32_t a = s->gp_regs[reg_a]; - r = 0x5f3759df - (a >> 1); - latency = LATENCY_QUAKE; - D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r)); - } break; - - default: - error_report("milkymist_pfpu: unknown opcode %d", op); - break; - } - - if (!reg_d) { - D_EXEC(qemu_log("%04d %8s R%03d, R%03d \n", - s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, - s->regs[R_PC] + latency)); - } else { - D_EXEC(qemu_log("%04d %8s R%03d, R%03d -> R%03d\n", - s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, - s->regs[R_PC] + latency, reg_d)); - } - - if (op == OP_VECTOUT) { - return 0; - } - - /* store output for this cycle */ - if (reg_d) { - uint32_t val = output_queue_remove(s); - D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val)); - s->gp_regs[reg_d] = val; - } - - output_queue_advance(s); - - /* store op output */ - if (op != OP_NOP) { - output_queue_insert(s, r, latency-1); - } - - /* advance PC */ - s->regs[R_PC]++; - - return 1; -}; - -static void pfpu_start(MilkymistPFPUState *s) -{ - int x, y; - int i; - - for (y = 0; y <= s->regs[R_VMESHLAST]; y++) { - for (x = 0; x <= s->regs[R_HMESHLAST]; x++) { - D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y)); - - /* set current position */ - s->gp_regs[GPR_X] = x; - s->gp_regs[GPR_Y] = y; - - /* run microcode on this position */ - i = 0; - while (pfpu_decode_insn(s)) { - /* decode at most MICROCODE_WORDS instructions */ - if (++i >= MICROCODE_WORDS) { - error_report("milkymist_pfpu: too many instructions " - "executed in microcode. No VECTOUT?"); - break; - } - } - - /* reset pc for next run */ - s->regs[R_PC] = 0; - } - } - - s->regs[R_VERTICES] = x * y; - - trace_milkymist_pfpu_pulse_irq(); - qemu_irq_pulse(s->irq); -} - -static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr) -{ - return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN; -} - -static uint64_t pfpu_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistPFPUState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_CTL: - case R_MESHBASE: - case R_HMESHLAST: - case R_VMESHLAST: - case R_CODEPAGE: - case R_VERTICES: - case R_COLLISIONS: - case R_STRAYWRITES: - case R_LASTDMA: - case R_PC: - case R_DREGBASE: - case R_CODEBASE: - r = s->regs[addr]; - break; - case GPR_BEGIN ... GPR_END: - r = s->gp_regs[addr - GPR_BEGIN]; - break; - case MICROCODE_BEGIN ... MICROCODE_END: - r = s->microcode[get_microcode_address(s, addr)]; - break; - - default: - error_report("milkymist_pfpu: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_pfpu_memory_read(addr << 2, r); - - return r; -} - -static void pfpu_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistPFPUState *s = opaque; - - trace_milkymist_pfpu_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_CTL: - if (value & CTL_START_BUSY) { - pfpu_start(s); - } - break; - case R_MESHBASE: - case R_HMESHLAST: - case R_VMESHLAST: - case R_CODEPAGE: - case R_VERTICES: - case R_COLLISIONS: - case R_STRAYWRITES: - case R_LASTDMA: - case R_PC: - case R_DREGBASE: - case R_CODEBASE: - s->regs[addr] = value; - break; - case GPR_BEGIN ... GPR_END: - s->gp_regs[addr - GPR_BEGIN] = value; - break; - case MICROCODE_BEGIN ... MICROCODE_END: - s->microcode[get_microcode_address(s, addr)] = value; - break; - - default: - error_report("milkymist_pfpu: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } -} - -static const MemoryRegionOps pfpu_mmio_ops = { - .read = pfpu_read, - .write = pfpu_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void milkymist_pfpu_reset(DeviceState *d) -{ - MilkymistPFPUState *s = MILKYMIST_PFPU(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - for (i = 0; i < 128; i++) { - s->gp_regs[i] = 0; - } - for (i = 0; i < MICROCODE_WORDS; i++) { - s->microcode[i] = 0; - } - s->output_queue_pos = 0; - for (i = 0; i < MAX_LATENCY; i++) { - s->output_queue[i] = 0; - } -} - -static void milkymist_pfpu_realize(DeviceState *dev, Error **errp) -{ - MilkymistPFPUState *s = MILKYMIST_PFPU(dev); - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - - sysbus_init_irq(sbd, &s->irq); - - memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s, - "milkymist-pfpu", MICROCODE_END * 4); - sysbus_init_mmio(sbd, &s->regs_region); -} - -static const VMStateDescription vmstate_milkymist_pfpu = { - .name = "milkymist-pfpu", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX), - VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128), - VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS), - VMSTATE_INT32(output_queue_pos, MilkymistPFPUState), - VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY), - VMSTATE_END_OF_LIST() - } -}; - -static void milkymist_pfpu_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_pfpu_realize; - dc->reset = milkymist_pfpu_reset; - dc->vmsd = &vmstate_milkymist_pfpu; -} - -static const TypeInfo milkymist_pfpu_info = { - .name = TYPE_MILKYMIST_PFPU, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistPFPUState), - .class_init = milkymist_pfpu_class_init, -}; - -static void milkymist_pfpu_register_types(void) -{ - type_register_static(&milkymist_pfpu_info); -} - -type_init(milkymist_pfpu_register_types) diff --git a/hw/misc/mips_itu.c b/hw/misc/mips_itu.c index 133399598fc..80683fed318 100644 --- a/hw/misc/mips_itu.c +++ b/hw/misc/mips_itu.c @@ -22,7 +22,6 @@ #include "qemu/log.h" #include "qemu/module.h" #include "qapi/error.h" -#include "cpu.h" #include "exec/exec-all.h" #include "hw/misc/mips_itu.h" #include "hw/qdev-properties.h" diff --git a/hw/misc/mps2-scc.c b/hw/misc/mps2-scc.c index c56aca86ad5..b3b42a792cd 100644 --- a/hw/misc/mps2-scc.c +++ b/hw/misc/mps2-scc.c @@ -23,6 +23,7 @@ #include "qemu/bitops.h" #include "trace.h" #include "hw/sysbus.h" +#include "hw/irq.h" #include "migration/vmstate.h" #include "hw/registerfields.h" #include "hw/misc/mps2-scc.h" @@ -186,10 +187,13 @@ static void mps2_scc_write(void *opaque, hwaddr offset, uint64_t value, switch (offset) { case A_CFG0: /* - * TODO on some boards bit 0 controls RAM remapping; - * on others bit 1 is CPU_WAIT. + * On some boards bit 0 controls board-specific remapping; + * we always reflect bit 0 in the 'remap' GPIO output line, + * and let the board wire it up or not as it chooses. + * TODO on some boards bit 1 is CPU_WAIT. */ s->cfg0 = value; + qemu_set_irq(s->remap, s->cfg0 & 1); break; case A_CFG1: s->cfg1 = value; @@ -283,7 +287,7 @@ static void mps2_scc_reset(DeviceState *dev) int i; trace_mps2_scc_reset(); - s->cfg0 = 0; + s->cfg0 = s->cfg0_reset; s->cfg1 = 0; s->cfg2 = 0; s->cfg5 = 0; @@ -308,6 +312,7 @@ static void mps2_scc_init(Object *obj) memory_region_init_io(&s->iomem, obj, &mps2_scc_ops, s, "mps2-scc", 0x1000); sysbus_init_mmio(sbd, &s->iomem); + qdev_init_gpio_out_named(DEVICE(obj), &s->remap, "remap", 1); } static void mps2_scc_realize(DeviceState *dev, Error **errp) @@ -353,6 +358,8 @@ static Property mps2_scc_properties[] = { DEFINE_PROP_UINT32("scc-cfg4", MPS2SCC, cfg4, 0), DEFINE_PROP_UINT32("scc-aid", MPS2SCC, aid, 0), DEFINE_PROP_UINT32("scc-id", MPS2SCC, id, 0), + /* Reset value for CFG0 register */ + DEFINE_PROP_UINT32("scc-cfg0", MPS2SCC, cfg0_reset, 0), /* * These are the initial settings for the source clocks on the board. * In hardware they can be configured via a config file read by the diff --git a/hw/misc/mst_fpga.c b/hw/misc/mst_fpga.c index edfc35d5f0f..2aaadfa9668 100644 --- a/hw/misc/mst_fpga.c +++ b/hw/misc/mst_fpga.c @@ -222,7 +222,7 @@ static void mst_fpga_init(Object *obj) sysbus_init_mmio(sbd, &s->iomem); } -static VMStateDescription vmstate_mst_fpga_regs = { +static const VMStateDescription vmstate_mst_fpga_regs = { .name = "mainstone_fpga", .version_id = 0, .minimum_version_id = 0, diff --git a/hw/misc/npcm7xx_clk.c b/hw/misc/npcm7xx_clk.c index a1ee67dc9a1..0b61070c52f 100644 --- a/hw/misc/npcm7xx_clk.c +++ b/hw/misc/npcm7xx_clk.c @@ -35,7 +35,7 @@ #define NPCM7XX_CLOCK_REF_HZ (25000000) /* Register Field Definitions */ -#define NPCM7XX_CLK_WDRCR_CA9C BIT(0) /* Cortex A9 Cores */ +#define NPCM7XX_CLK_WDRCR_CA9C BIT(0) /* Cortex-A9 Cores */ #define PLLCON_LOKI BIT(31) #define PLLCON_LOKS BIT(30) diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c index b7686e27d7f..fff19e369a3 100644 --- a/hw/misc/pca9552.c +++ b/hw/misc/pca9552.c @@ -272,7 +272,7 @@ static void pca955x_get_led(Object *obj, Visitor *v, const char *name, * reading the INPUTx reg */ reg = PCA9552_LS0 + led / 4; - state = (pca955x_read(s, reg) >> (led % 8)) & 0x3; + state = (pca955x_read(s, reg) >> ((led % 4) * 2)) & 0x3; visit_type_str(v, name, (char **)&led_state[state], errp); } diff --git a/hw/misc/puv3_pm.c b/hw/misc/puv3_pm.c deleted file mode 100644 index 676c23f7dbc..00000000000 --- a/hw/misc/puv3_pm.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Power Management device simulation in PKUnity SoC - * - * Copyright (C) 2010-2012 Guan Xuetao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation, or any later version. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "qom/object.h" - -#undef DEBUG_PUV3 -#include "hw/unicore32/puv3.h" -#include "qemu/module.h" -#include "qemu/log.h" - -#define TYPE_PUV3_PM "puv3_pm" -OBJECT_DECLARE_SIMPLE_TYPE(PUV3PMState, PUV3_PM) - -struct PUV3PMState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - - uint32_t reg_PMCR; - uint32_t reg_PCGR; - uint32_t reg_PLL_SYS_CFG; - uint32_t reg_PLL_DDR_CFG; - uint32_t reg_PLL_VGA_CFG; - uint32_t reg_DIVCFG; -}; - -static uint64_t puv3_pm_read(void *opaque, hwaddr offset, - unsigned size) -{ - PUV3PMState *s = opaque; - uint32_t ret = 0; - - switch (offset) { - case 0x14: - ret = s->reg_PCGR; - break; - case 0x18: - ret = s->reg_PLL_SYS_CFG; - break; - case 0x1c: - ret = s->reg_PLL_DDR_CFG; - break; - case 0x20: - ret = s->reg_PLL_VGA_CFG; - break; - case 0x24: - ret = s->reg_DIVCFG; - break; - case 0x28: /* PLL SYS STATUS */ - ret = 0x00002401; - break; - case 0x2c: /* PLL DDR STATUS */ - ret = 0x00100c00; - break; - case 0x30: /* PLL VGA STATUS */ - ret = 0x00003801; - break; - case 0x34: /* DIV STATUS */ - ret = 0x22f52015; - break; - case 0x38: /* SW RESET */ - ret = 0x0; - break; - case 0x44: /* PLL DFC DONE */ - ret = 0x7; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad read offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } - DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); - - return ret; -} - -static void puv3_pm_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) -{ - PUV3PMState *s = opaque; - - switch (offset) { - case 0x0: - s->reg_PMCR = value; - break; - case 0x14: - s->reg_PCGR = value; - break; - case 0x18: - s->reg_PLL_SYS_CFG = value; - break; - case 0x1c: - s->reg_PLL_DDR_CFG = value; - break; - case 0x20: - s->reg_PLL_VGA_CFG = value; - break; - case 0x24: - case 0x38: - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad write offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } - DPRINTF("offset 0x%x, value 0x%x\n", offset, value); -} - -static const MemoryRegionOps puv3_pm_ops = { - .read = puv3_pm_read, - .write = puv3_pm_write, - .impl = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void puv3_pm_realize(DeviceState *dev, Error **errp) -{ - PUV3PMState *s = PUV3_PM(dev); - - s->reg_PCGR = 0x0; - - memory_region_init_io(&s->iomem, OBJECT(s), &puv3_pm_ops, s, "puv3_pm", - PUV3_REGS_OFFSET); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem); -} - -static void puv3_pm_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = puv3_pm_realize; -} - -static const TypeInfo puv3_pm_info = { - .name = TYPE_PUV3_PM, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(PUV3PMState), - .class_init = puv3_pm_class_init, -}; - -static void puv3_pm_register_type(void) -{ - type_register_static(&puv3_pm_info); -} - -type_init(puv3_pm_register_type) diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c index 27113abd6cf..7b66d58acc8 100644 --- a/hw/misc/pvpanic-isa.c +++ b/hw/misc/pvpanic-isa.c @@ -13,7 +13,6 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/module.h" #include "sysemu/runstate.h" diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c index d629639d8f4..af8cbe28305 100644 --- a/hw/misc/pvpanic-pci.c +++ b/hw/misc/pvpanic-pci.c @@ -12,7 +12,6 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/module.h" #include "sysemu/runstate.h" diff --git a/hw/misc/sga.c b/hw/misc/sga.c index 4dbe6d78f9e..1d04672b013 100644 --- a/hw/misc/sga.c +++ b/hw/misc/sga.c @@ -30,6 +30,7 @@ #include "hw/loader.h" #include "qemu/module.h" #include "qom/object.h" +#include "qemu/error-report.h" #define SGABIOS_FILENAME "sgabios.bin" @@ -42,6 +43,7 @@ struct ISASGAState { static void sga_realizefn(DeviceState *dev, Error **errp) { + warn_report("-device sga is deprecated, use -machine graphics=off"); rom_add_vga(SGABIOS_FILENAME); } diff --git a/hw/misc/sifive_e_prci.c b/hw/misc/sifive_e_prci.c index 8ec4ee4b41f..a8702c6a5d4 100644 --- a/hw/misc/sifive_e_prci.c +++ b/hw/misc/sifive_e_prci.c @@ -23,7 +23,6 @@ #include "qapi/error.h" #include "qemu/log.h" #include "qemu/module.h" -#include "hw/hw.h" #include "hw/misc/sifive_e_prci.h" static uint64_t sifive_e_prci_read(void *opaque, hwaddr addr, unsigned int size) diff --git a/hw/misc/sifive_test.c b/hw/misc/sifive_test.c index 2deb2072cc8..56df45bfe59 100644 --- a/hw/misc/sifive_test.c +++ b/hw/misc/sifive_test.c @@ -24,7 +24,6 @@ #include "qemu/log.h" #include "qemu/module.h" #include "sysemu/runstate.h" -#include "hw/hw.h" #include "hw/misc/sifive_test.h" static uint64_t sifive_test_read(void *opaque, hwaddr addr, unsigned int size) diff --git a/hw/misc/sifive_u_otp.c b/hw/misc/sifive_u_otp.c index 18aa0bd55d8..52fdb750c0e 100644 --- a/hw/misc/sifive_u_otp.c +++ b/hw/misc/sifive_u_otp.c @@ -209,7 +209,14 @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp) TYPE_SIFIVE_U_OTP, SIFIVE_U_OTP_REG_SIZE); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio); - dinfo = drive_get_next(IF_NONE); + dinfo = drive_get_next(IF_PFLASH); + if (!dinfo) { + dinfo = drive_get_next(IF_NONE); + if (dinfo) { + warn_report("using \"-drive if=none\" for the OTP is deprecated, " + "use \"-drive if=pflash\" instead."); + } + } if (dinfo) { int ret; uint64_t perm; @@ -235,14 +242,10 @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp) if (blk_pread(s->blk, 0, s->fuse, filesize) != filesize) { error_setg(errp, "failed to read the initial flash content"); + return; } } } -} - -static void sifive_u_otp_reset(DeviceState *dev) -{ - SiFiveUOTPState *s = SIFIVE_U_OTP(dev); /* Initialize all fuses' initial value to 0xFFs */ memset(s->fuse, 0xff, sizeof(s->fuse)); @@ -259,13 +262,15 @@ static void sifive_u_otp_reset(DeviceState *dev) serial_data = s->serial; if (blk_pwrite(s->blk, index * SIFIVE_U_OTP_FUSE_WORD, &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) { - error_report("write error index<%d>", index); + error_setg(errp, "failed to write index<%d>", index); + return; } serial_data = ~(s->serial); if (blk_pwrite(s->blk, (index + 1) * SIFIVE_U_OTP_FUSE_WORD, &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) { - error_report("write error index<%d>", index + 1); + error_setg(errp, "failed to write index<%d>", index + 1); + return; } } @@ -279,7 +284,6 @@ static void sifive_u_otp_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, sifive_u_otp_properties); dc->realize = sifive_u_otp_realize; - dc->reset = sifive_u_otp_reset; } static const TypeInfo sifive_u_otp_info = { diff --git a/hw/misc/trace-events b/hw/misc/trace-events index d0a89eb0596..2da96d167a7 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # allwinner-cpucfg.c allwinner_cpucfg_cpu_reset(uint8_t cpu_id, uint32_t reset_addr) "id %u, reset_addr 0x%" PRIu32 @@ -67,16 +67,6 @@ slavio_sysctrl_mem_readl(uint32_t ret) "Read system control 0x%08x" slavio_led_mem_writew(uint32_t val) "Write diagnostic LED 0x%04x" slavio_led_mem_readw(uint32_t ret) "Read diagnostic LED 0x%04x" -# milkymist-hpdmc.c -milkymist_hpdmc_memory_read(uint32_t addr, uint32_t value) "addr=0x%08x value=0x%08x" -milkymist_hpdmc_memory_write(uint32_t addr, uint32_t value) "addr=0x%08x value=0x%08x" - -# milkymist-pfpu.c -milkymist_pfpu_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_pfpu_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_pfpu_vectout(uint32_t a, uint32_t b, uint32_t dma_ptr) "a 0x%08x b 0x%08x dma_ptr 0x%08x" -milkymist_pfpu_pulse_irq(void) "Pulse IRQ" - # aspeed_scu.c aspeed_scu_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32 @@ -238,6 +228,7 @@ via1_rtc_cmd_pram_sect_write(int sector, int offset, int addr, int value) "secto via1_adb_send(const char *state, uint8_t data, const char *vadbint) "state %s data=0x%02x vADBInt=%s" via1_adb_receive(const char *state, uint8_t data, const char *vadbint, int status, int index, int size) "state %s data=0x%02x vADBInt=%s status=0x%x index=%d size=%d" via1_adb_poll(uint8_t data, const char *vadbint, int status, int index, int size) "data=0x%02x vADBInt=%s status=0x%x index=%d size=%d" +via1_auxmode(int mode) "setting auxmode to %d" # grlib_ahb_apb_pnp.c grlib_ahb_pnp_read(uint64_t addr, uint32_t value) "AHB PnP read addr:0x%03"PRIx64" data:0x%08x" diff --git a/hw/misc/virt_ctrl.c b/hw/misc/virt_ctrl.c index 2ea01bd7a1f..e75d1e7e17b 100644 --- a/hw/misc/virt_ctrl.c +++ b/hw/misc/virt_ctrl.c @@ -1,11 +1,10 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * Virt system Controller */ #include "qemu/osdep.h" -#include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "migration/vmstate.h" diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c index 5086e6b7ed2..8b702859618 100644 --- a/hw/misc/zynq_slcr.c +++ b/hw/misc/zynq_slcr.c @@ -269,6 +269,21 @@ static uint64_t zynq_slcr_compute_clock(const uint64_t periods[], zynq_slcr_compute_clock((plls), (state)->regs[reg], \ reg ## _ ## enable_field ## _SHIFT) +static void zynq_slcr_compute_clocks_internal(ZynqSLCRState *s, uint64_t ps_clk) +{ + uint64_t io_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_IO_PLL_CTRL]); + uint64_t arm_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_ARM_PLL_CTRL]); + uint64_t ddr_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_DDR_PLL_CTRL]); + + uint64_t uart_mux[4] = {io_pll, io_pll, arm_pll, ddr_pll}; + + /* compute uartX reference clocks */ + clock_set(s->uart0_ref_clk, + ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT0)); + clock_set(s->uart1_ref_clk, + ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT1)); +} + /** * Compute and set the ouputs clocks periods. * But do not propagate them further. Connected clocks @@ -283,17 +298,7 @@ static void zynq_slcr_compute_clocks(ZynqSLCRState *s) ps_clk = 0; } - uint64_t io_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_IO_PLL_CTRL]); - uint64_t arm_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_ARM_PLL_CTRL]); - uint64_t ddr_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_DDR_PLL_CTRL]); - - uint64_t uart_mux[4] = {io_pll, io_pll, arm_pll, ddr_pll}; - - /* compute uartX reference clocks */ - clock_set(s->uart0_ref_clk, - ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT0)); - clock_set(s->uart1_ref_clk, - ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT1)); + zynq_slcr_compute_clocks_internal(s, ps_clk); } /** @@ -416,7 +421,7 @@ static void zynq_slcr_reset_hold(Object *obj) ZynqSLCRState *s = ZYNQ_SLCR(obj); /* will disable all output clocks */ - zynq_slcr_compute_clocks(s); + zynq_slcr_compute_clocks_internal(s, 0); zynq_slcr_propagate_clocks(s); } @@ -425,7 +430,7 @@ static void zynq_slcr_reset_exit(Object *obj) ZynqSLCRState *s = ZYNQ_SLCR(obj); /* will compute output clocks according to ps_clk and registers */ - zynq_slcr_compute_clocks(s); + zynq_slcr_compute_clocks_internal(s, clock_get(s->ps_clk)); zynq_slcr_propagate_clocks(s); } diff --git a/hw/moxie/Kconfig b/hw/moxie/Kconfig deleted file mode 100644 index 3793ef03724..00000000000 --- a/hw/moxie/Kconfig +++ /dev/null @@ -1,3 +0,0 @@ -config MOXIESIM - bool - select SERIAL diff --git a/hw/moxie/meson.build b/hw/moxie/meson.build deleted file mode 100644 index 05a7c2e00fc..00000000000 --- a/hw/moxie/meson.build +++ /dev/null @@ -1,4 +0,0 @@ -moxie_ss = ss.source_set() -moxie_ss.add(when: 'CONFIG_MOXIESIM', if_true: files('moxiesim.c')) - -hw_arch += {'moxie': moxie_ss} diff --git a/hw/moxie/moxiesim.c b/hw/moxie/moxiesim.c deleted file mode 100644 index f7b57fcae19..00000000000 --- a/hw/moxie/moxiesim.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * QEMU/moxiesim emulation - * - * Emulates a very simple machine model similar to the one used by the - * GDB moxie simulator. - * - * Copyright (c) 2008, 2009, 2010, 2013 Anthony Green - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/error-report.h" -#include "qapi/error.h" -#include "cpu.h" -#include "hw/sysbus.h" -#include "net/net.h" -#include "sysemu/reset.h" -#include "sysemu/sysemu.h" -#include "hw/boards.h" -#include "hw/loader.h" -#include "hw/char/serial.h" -#include "exec/address-spaces.h" -#include "elf.h" - -#define PHYS_MEM_BASE 0x80000000 -#define FIRMWARE_BASE 0x1000 -#define FIRMWARE_SIZE (128 * 0x1000) - -typedef struct { - uint64_t ram_size; - const char *kernel_filename; - const char *kernel_cmdline; - const char *initrd_filename; -} LoaderParams; - -static void load_kernel(MoxieCPU *cpu, LoaderParams *loader_params) -{ - uint64_t entry, kernel_high; - int64_t initrd_size; - long kernel_size; - ram_addr_t initrd_offset; - - kernel_size = load_elf(loader_params->kernel_filename, NULL, NULL, NULL, - &entry, NULL, &kernel_high, NULL, 1, EM_MOXIE, - 0, 0); - - if (kernel_size <= 0) { - error_report("could not load kernel '%s'", - loader_params->kernel_filename); - exit(1); - } - - /* load initrd */ - initrd_size = 0; - initrd_offset = 0; - if (loader_params->initrd_filename) { - initrd_size = get_image_size(loader_params->initrd_filename); - if (initrd_size > 0) { - initrd_offset = (kernel_high + ~TARGET_PAGE_MASK) - & TARGET_PAGE_MASK; - if (initrd_offset + initrd_size > loader_params->ram_size) { - error_report("memory too small for initial ram disk '%s'", - loader_params->initrd_filename); - exit(1); - } - initrd_size = load_image_targphys(loader_params->initrd_filename, - initrd_offset, - loader_params->ram_size); - } - if (initrd_size == (target_ulong)-1) { - error_report("could not load initial ram disk '%s'", - loader_params->initrd_filename); - exit(1); - } - } -} - -static void main_cpu_reset(void *opaque) -{ - MoxieCPU *cpu = opaque; - - cpu_reset(CPU(cpu)); -} - -static void moxiesim_init(MachineState *machine) -{ - MoxieCPU *cpu = NULL; - ram_addr_t ram_size = machine->ram_size; - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - const char *initrd_filename = machine->initrd_filename; - CPUMoxieState *env; - MemoryRegion *address_space_mem = get_system_memory(); - MemoryRegion *ram = g_new(MemoryRegion, 1); - MemoryRegion *rom = g_new(MemoryRegion, 1); - hwaddr ram_base = 0x200000; - LoaderParams loader_params; - - /* Init CPUs. */ - cpu = MOXIE_CPU(cpu_create(machine->cpu_type)); - env = &cpu->env; - - qemu_register_reset(main_cpu_reset, cpu); - - /* Allocate RAM. */ - memory_region_init_ram(ram, NULL, "moxiesim.ram", ram_size, &error_fatal); - memory_region_add_subregion(address_space_mem, ram_base, ram); - - memory_region_init_ram(rom, NULL, "moxie.rom", FIRMWARE_SIZE, &error_fatal); - memory_region_add_subregion(get_system_memory(), FIRMWARE_BASE, rom); - - if (kernel_filename) { - loader_params.ram_size = ram_size; - loader_params.kernel_filename = kernel_filename; - loader_params.kernel_cmdline = kernel_cmdline; - loader_params.initrd_filename = initrd_filename; - load_kernel(cpu, &loader_params); - } - if (machine->firmware) { - if (load_image_targphys(machine->firmware, FIRMWARE_BASE, FIRMWARE_SIZE) < 0) { - error_report("Failed to load firmware '%s'", machine->firmware); - } - } - - /* A single 16450 sits at offset 0x3f8. */ - if (serial_hd(0)) { - serial_mm_init(address_space_mem, 0x3f8, 0, env->irq[4], - 8000000/16, serial_hd(0), DEVICE_LITTLE_ENDIAN); - } -} - -static void moxiesim_machine_init(MachineClass *mc) -{ - mc->desc = "Moxie simulator platform"; - mc->init = moxiesim_init; - mc->is_default = true; - mc->default_cpu_type = MOXIE_CPU_TYPE_NAME("MoxieLite"); -} - -DEFINE_MACHINE("moxiesim", moxiesim_machine_init) diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c index 42d2f99dfb1..34eea684ced 100644 --- a/hw/net/can/can_sja1000.c +++ b/hw/net/can/can_sja1000.c @@ -275,6 +275,10 @@ static void buff2frame_pel(const uint8_t *buff, qemu_can_frame *frame) } frame->can_dlc = buff[0] & 0x0f; + if (frame->can_dlc > 8) { + frame->can_dlc = 8; + } + if (buff[0] & 0x80) { /* Extended */ frame->can_id |= QEMU_CAN_EFF_FLAG; frame->can_id |= buff[1] << 21; /* ID.28~ID.21 */ @@ -311,6 +315,10 @@ static void buff2frame_bas(const uint8_t *buff, qemu_can_frame *frame) } frame->can_dlc = buff[1] & 0x0f; + if (frame->can_dlc > 8) { + frame->can_dlc = 8; + } + for (i = 0; i < frame->can_dlc; i++) { frame->data[i] = buff[2 + i]; } diff --git a/hw/net/can/xlnx-zynqmp-can.c b/hw/net/can/xlnx-zynqmp-can.c index affa21a5ed3..22bb8910fa8 100644 --- a/hw/net/can/xlnx-zynqmp-can.c +++ b/hw/net/can/xlnx-zynqmp-can.c @@ -37,7 +37,6 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "qemu/cutils.h" -#include "sysemu/sysemu.h" #include "migration/vmstate.h" #include "hw/qdev-properties.h" #include "net/can_emu.h" diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c index 533a8304d0b..45b954e46c2 100644 --- a/hw/net/dp8393x.c +++ b/hw/net/dp8393x.c @@ -28,15 +28,9 @@ #include "qemu/timer.h" #include #include "qom/object.h" +#include "trace.h" -//#define DEBUG_SONIC - -#define SONIC_PROM_SIZE 0x1000 - -#ifdef DEBUG_SONIC -#define DPRINTF(fmt, ...) \ -do { printf("sonic: " fmt , ## __VA_ARGS__); } while (0) -static const char* reg_names[] = { +static const char *reg_names[] = { "CR", "DCR", "RCR", "TCR", "IMR", "ISR", "UTDA", "CTDA", "TPS", "TFC", "TSA0", "TSA1", "TFS", "URDA", "CRDA", "CRBA0", "CRBA1", "RBWC0", "RBWC1", "EOBC", "URRA", "RSA", "REA", "RRP", @@ -45,12 +39,6 @@ static const char* reg_names[] = { "SR", "WT0", "WT1", "RSC", "CRCT", "FAET", "MPT", "MDT", "0x30", "0x31", "0x32", "0x33", "0x34", "0x35", "0x36", "0x37", "0x38", "0x39", "0x3a", "0x3b", "0x3c", "0x3d", "0x3e", "DCR2" }; -#else -#define DPRINTF(fmt, ...) do {} while (0) -#endif - -#define SONIC_ERROR(fmt, ...) \ -do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0) #define SONIC_CR 0x00 #define SONIC_DCR 0x01 @@ -97,6 +85,7 @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0) #define SONIC_MPT 0x2e #define SONIC_MDT 0x2f #define SONIC_DCR2 0x3f +#define SONIC_REG_COUNT 0x40 #define SONIC_CR_HTX 0x0001 #define SONIC_CR_TXP 0x0002 @@ -161,23 +150,19 @@ struct dp8393xState { bool big_endian; bool last_rba_is_full; qemu_irq irq; -#ifdef DEBUG_SONIC int irq_level; -#endif QEMUTimer *watchdog; int64_t wt_last_update; NICConf conf; NICState *nic; MemoryRegion mmio; - MemoryRegion prom; /* Registers */ - uint8_t cam[16][6]; - uint16_t regs[0x40]; + uint16_t cam[16][3]; + uint16_t regs[SONIC_REG_COUNT]; /* Temporaries */ uint8_t tx_buffer[0x10000]; - uint16_t data[12]; int loopback_packet; /* Memory access */ @@ -185,7 +170,8 @@ struct dp8393xState { AddressSpace as; }; -/* Accessor functions for values which are formed by +/* + * Accessor functions for values which are formed by * concatenating two 16 bit device registers. By putting these * in their own functions with a uint32_t return type we avoid the * pitfall of implicit sign extension where ((x << 16) | y) is a @@ -233,34 +219,48 @@ static uint32_t dp8393x_wt(dp8393xState *s) return s->regs[SONIC_WT1] << 16 | s->regs[SONIC_WT0]; } -static uint16_t dp8393x_get(dp8393xState *s, int width, int offset) +static uint16_t dp8393x_get(dp8393xState *s, hwaddr addr, int offset) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; uint16_t val; - if (s->big_endian) { - val = be16_to_cpu(s->data[offset * width + width - 1]); + if (s->regs[SONIC_DCR] & SONIC_DCR_DW) { + addr += offset << 2; + if (s->big_endian) { + val = address_space_ldl_be(&s->as, addr, attrs, NULL); + } else { + val = address_space_ldl_le(&s->as, addr, attrs, NULL); + } } else { - val = le16_to_cpu(s->data[offset * width]); + addr += offset << 1; + if (s->big_endian) { + val = address_space_lduw_be(&s->as, addr, attrs, NULL); + } else { + val = address_space_lduw_le(&s->as, addr, attrs, NULL); + } } + return val; } -static void dp8393x_put(dp8393xState *s, int width, int offset, - uint16_t val) +static void dp8393x_put(dp8393xState *s, + hwaddr addr, int offset, uint16_t val) { - if (s->big_endian) { - if (width == 2) { - s->data[offset * 2] = 0; - s->data[offset * 2 + 1] = cpu_to_be16(val); + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + + if (s->regs[SONIC_DCR] & SONIC_DCR_DW) { + addr += offset << 2; + if (s->big_endian) { + address_space_stl_be(&s->as, addr, val, attrs, NULL); } else { - s->data[offset] = cpu_to_be16(val); + address_space_stl_le(&s->as, addr, val, attrs, NULL); } } else { - if (width == 2) { - s->data[offset * 2] = cpu_to_le16(val); - s->data[offset * 2 + 1] = 0; + addr += offset << 1; + if (s->big_endian) { + address_space_stw_be(&s->as, addr, val, attrs, NULL); } else { - s->data[offset] = cpu_to_le16(val); + address_space_stw_le(&s->as, addr, val, attrs, NULL); } } } @@ -269,16 +269,14 @@ static void dp8393x_update_irq(dp8393xState *s) { int level = (s->regs[SONIC_IMR] & s->regs[SONIC_ISR]) ? 1 : 0; -#ifdef DEBUG_SONIC if (level != s->irq_level) { s->irq_level = level; if (level) { - DPRINTF("raise irq, isr is 0x%04x\n", s->regs[SONIC_ISR]); + trace_dp8393x_raise_irq(s->regs[SONIC_ISR]); } else { - DPRINTF("lower irq\n"); + trace_dp8393x_lower_irq(); } } -#endif qemu_set_irq(s->irq, level); } @@ -286,35 +284,29 @@ static void dp8393x_update_irq(dp8393xState *s) static void dp8393x_do_load_cam(dp8393xState *s) { int width, size; - uint16_t index = 0; + uint16_t index; width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1; size = sizeof(uint16_t) * 4 * width; while (s->regs[SONIC_CDC] & 0x1f) { /* Fill current entry */ - address_space_read(&s->as, dp8393x_cdp(s), - MEMTXATTRS_UNSPECIFIED, s->data, size); - s->cam[index][0] = dp8393x_get(s, width, 1) & 0xff; - s->cam[index][1] = dp8393x_get(s, width, 1) >> 8; - s->cam[index][2] = dp8393x_get(s, width, 2) & 0xff; - s->cam[index][3] = dp8393x_get(s, width, 2) >> 8; - s->cam[index][4] = dp8393x_get(s, width, 3) & 0xff; - s->cam[index][5] = dp8393x_get(s, width, 3) >> 8; - DPRINTF("load cam[%d] with %02x%02x%02x%02x%02x%02x\n", index, - s->cam[index][0], s->cam[index][1], s->cam[index][2], - s->cam[index][3], s->cam[index][4], s->cam[index][5]); + index = dp8393x_get(s, dp8393x_cdp(s), 0) & 0xf; + s->cam[index][0] = dp8393x_get(s, dp8393x_cdp(s), 1); + s->cam[index][1] = dp8393x_get(s, dp8393x_cdp(s), 2); + s->cam[index][2] = dp8393x_get(s, dp8393x_cdp(s), 3); + trace_dp8393x_load_cam(index, + s->cam[index][0] >> 8, s->cam[index][0] & 0xff, + s->cam[index][1] >> 8, s->cam[index][1] & 0xff, + s->cam[index][2] >> 8, s->cam[index][2] & 0xff); /* Move to next entry */ s->regs[SONIC_CDC]--; s->regs[SONIC_CDP] += size; - index++; } /* Read CAM enable */ - address_space_read(&s->as, dp8393x_cdp(s), - MEMTXATTRS_UNSPECIFIED, s->data, size); - s->regs[SONIC_CE] = dp8393x_get(s, width, 0); - DPRINTF("load cam done. cam enable mask 0x%04x\n", s->regs[SONIC_CE]); + s->regs[SONIC_CE] = dp8393x_get(s, dp8393x_cdp(s), 0); + trace_dp8393x_load_cam_done(s->regs[SONIC_CE]); /* Done */ s->regs[SONIC_CR] &= ~SONIC_CR_LCAM; @@ -329,17 +321,14 @@ static void dp8393x_do_read_rra(dp8393xState *s) /* Read memory */ width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1; size = sizeof(uint16_t) * 4 * width; - address_space_read(&s->as, dp8393x_rrp(s), - MEMTXATTRS_UNSPECIFIED, s->data, size); /* Update SONIC registers */ - s->regs[SONIC_CRBA0] = dp8393x_get(s, width, 0); - s->regs[SONIC_CRBA1] = dp8393x_get(s, width, 1); - s->regs[SONIC_RBWC0] = dp8393x_get(s, width, 2); - s->regs[SONIC_RBWC1] = dp8393x_get(s, width, 3); - DPRINTF("CRBA0/1: 0x%04x/0x%04x, RBWC0/1: 0x%04x/0x%04x\n", - s->regs[SONIC_CRBA0], s->regs[SONIC_CRBA1], - s->regs[SONIC_RBWC0], s->regs[SONIC_RBWC1]); + s->regs[SONIC_CRBA0] = dp8393x_get(s, dp8393x_rrp(s), 0); + s->regs[SONIC_CRBA1] = dp8393x_get(s, dp8393x_rrp(s), 1); + s->regs[SONIC_RBWC0] = dp8393x_get(s, dp8393x_rrp(s), 2); + s->regs[SONIC_RBWC1] = dp8393x_get(s, dp8393x_rrp(s), 3); + trace_dp8393x_read_rra_regs(s->regs[SONIC_CRBA0], s->regs[SONIC_CRBA1], + s->regs[SONIC_RBWC0], s->regs[SONIC_RBWC1]); /* Go to next entry */ s->regs[SONIC_RRP] += size; @@ -350,8 +339,7 @@ static void dp8393x_do_read_rra(dp8393xState *s) } /* Warn the host if CRBA now has the last available resource */ - if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) - { + if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) { s->regs[SONIC_ISR] |= SONIC_ISR_RBE; dp8393x_update_irq(s); } @@ -364,7 +352,8 @@ static void dp8393x_do_software_reset(dp8393xState *s) { timer_del(s->watchdog); - s->regs[SONIC_CR] &= ~(SONIC_CR_LCAM | SONIC_CR_RRRA | SONIC_CR_TXP | SONIC_CR_HTX); + s->regs[SONIC_CR] &= ~(SONIC_CR_LCAM | SONIC_CR_RRRA | SONIC_CR_TXP | + SONIC_CR_HTX); s->regs[SONIC_CR] |= SONIC_CR_RST | SONIC_CR_RXDIS; } @@ -433,28 +422,22 @@ static void dp8393x_do_receiver_disable(dp8393xState *s) static void dp8393x_do_transmit_packets(dp8393xState *s) { NetClientState *nc = qemu_get_queue(s->nic); - int width, size; int tx_len, len; uint16_t i; - width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1; - while (1) { /* Read memory */ - size = sizeof(uint16_t) * 6 * width; s->regs[SONIC_TTDA] = s->regs[SONIC_CTDA]; - DPRINTF("Transmit packet at %08x\n", dp8393x_ttda(s)); - address_space_read(&s->as, dp8393x_ttda(s) + sizeof(uint16_t) * width, - MEMTXATTRS_UNSPECIFIED, s->data, size); + trace_dp8393x_transmit_packet(dp8393x_ttda(s)); tx_len = 0; /* Update registers */ - s->regs[SONIC_TCR] = dp8393x_get(s, width, 0) & 0xf000; - s->regs[SONIC_TPS] = dp8393x_get(s, width, 1); - s->regs[SONIC_TFC] = dp8393x_get(s, width, 2); - s->regs[SONIC_TSA0] = dp8393x_get(s, width, 3); - s->regs[SONIC_TSA1] = dp8393x_get(s, width, 4); - s->regs[SONIC_TFS] = dp8393x_get(s, width, 5); + s->regs[SONIC_TCR] = dp8393x_get(s, dp8393x_ttda(s), 1) & 0xf000; + s->regs[SONIC_TPS] = dp8393x_get(s, dp8393x_ttda(s), 2); + s->regs[SONIC_TFC] = dp8393x_get(s, dp8393x_ttda(s), 3); + s->regs[SONIC_TSA0] = dp8393x_get(s, dp8393x_ttda(s), 4); + s->regs[SONIC_TSA1] = dp8393x_get(s, dp8393x_ttda(s), 5); + s->regs[SONIC_TFS] = dp8393x_get(s, dp8393x_ttda(s), 6); /* Handle programmable interrupt */ if (s->regs[SONIC_TCR] & SONIC_TCR_PINT) { @@ -476,27 +459,26 @@ static void dp8393x_do_transmit_packets(dp8393xState *s) i++; if (i != s->regs[SONIC_TFC]) { /* Read next fragment details */ - size = sizeof(uint16_t) * 3 * width; - address_space_read(&s->as, - dp8393x_ttda(s) - + sizeof(uint16_t) * width * (4 + 3 * i), - MEMTXATTRS_UNSPECIFIED, s->data, - size); - s->regs[SONIC_TSA0] = dp8393x_get(s, width, 0); - s->regs[SONIC_TSA1] = dp8393x_get(s, width, 1); - s->regs[SONIC_TFS] = dp8393x_get(s, width, 2); + s->regs[SONIC_TSA0] = dp8393x_get(s, dp8393x_ttda(s), + 4 + 3 * i); + s->regs[SONIC_TSA1] = dp8393x_get(s, dp8393x_ttda(s), + 5 + 3 * i); + s->regs[SONIC_TFS] = dp8393x_get(s, dp8393x_ttda(s), + 6 + 3 * i); } } /* Handle Ethernet checksum */ if (!(s->regs[SONIC_TCR] & SONIC_TCR_CRCI)) { - /* Don't append FCS there, to look like slirp packets - * which don't have one */ + /* + * Don't append FCS there, to look like slirp packets + * which don't have one + */ } else { /* Remove existing FCS */ tx_len -= 4; if (tx_len < 0) { - SONIC_ERROR("tx_len is %d\n", tx_len); + trace_dp8393x_transmit_txlen_error(tx_len); break; } } @@ -515,22 +497,12 @@ static void dp8393x_do_transmit_packets(dp8393xState *s) s->regs[SONIC_TCR] |= SONIC_TCR_PTX; /* Write status */ - dp8393x_put(s, width, 0, - s->regs[SONIC_TCR] & 0x0fff); /* status */ - size = sizeof(uint16_t) * width; - address_space_write(&s->as, dp8393x_ttda(s), - MEMTXATTRS_UNSPECIFIED, s->data, size); + dp8393x_put(s, dp8393x_ttda(s), 0, s->regs[SONIC_TCR] & 0x0fff); if (!(s->regs[SONIC_CR] & SONIC_CR_HTX)) { /* Read footer of packet */ - size = sizeof(uint16_t) * width; - address_space_read(&s->as, - dp8393x_ttda(s) - + sizeof(uint16_t) * width - * (4 + 3 * s->regs[SONIC_TFC]), - MEMTXATTRS_UNSPECIFIED, s->data, - size); - s->regs[SONIC_CTDA] = dp8393x_get(s, width, 0); + s->regs[SONIC_CTDA] = dp8393x_get(s, dp8393x_ttda(s), + 4 + 3 * s->regs[SONIC_TFC]); if (s->regs[SONIC_CTDA] & SONIC_DESC_EOL) { /* EOL detected */ break; @@ -558,26 +530,34 @@ static void dp8393x_do_command(dp8393xState *s, uint16_t command) s->regs[SONIC_CR] |= (command & SONIC_CR_MASK); - if (command & SONIC_CR_HTX) + if (command & SONIC_CR_HTX) { dp8393x_do_halt_transmission(s); - if (command & SONIC_CR_TXP) + } + if (command & SONIC_CR_TXP) { dp8393x_do_transmit_packets(s); - if (command & SONIC_CR_RXDIS) + } + if (command & SONIC_CR_RXDIS) { dp8393x_do_receiver_disable(s); - if (command & SONIC_CR_RXEN) + } + if (command & SONIC_CR_RXEN) { dp8393x_do_receiver_enable(s); - if (command & SONIC_CR_STP) + } + if (command & SONIC_CR_STP) { dp8393x_do_stop_timer(s); - if (command & SONIC_CR_ST) + } + if (command & SONIC_CR_ST) { dp8393x_do_start_timer(s); - if (command & SONIC_CR_RST) + } + if (command & SONIC_CR_RST) { dp8393x_do_software_reset(s); + } if (command & SONIC_CR_RRRA) { dp8393x_do_read_rra(s); s->regs[SONIC_CR] &= ~SONIC_CR_RRRA; } - if (command & SONIC_CR_LCAM) + if (command & SONIC_CR_LCAM) { dp8393x_do_load_cam(s); + } } static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size) @@ -587,110 +567,108 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size) uint16_t val = 0; switch (reg) { - /* Update data before reading it */ - case SONIC_WT0: - case SONIC_WT1: - dp8393x_update_wt_regs(s); - val = s->regs[reg]; - break; - /* Accept read to some registers only when in reset mode */ - case SONIC_CAP2: - case SONIC_CAP1: - case SONIC_CAP0: - if (s->regs[SONIC_CR] & SONIC_CR_RST) { - val = s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg) + 1] << 8; - val |= s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg)]; - } - break; - /* All other registers have no special contrainst */ - default: - val = s->regs[reg]; + /* Update data before reading it */ + case SONIC_WT0: + case SONIC_WT1: + dp8393x_update_wt_regs(s); + val = s->regs[reg]; + break; + /* Accept read to some registers only when in reset mode */ + case SONIC_CAP2: + case SONIC_CAP1: + case SONIC_CAP0: + if (s->regs[SONIC_CR] & SONIC_CR_RST) { + val = s->cam[s->regs[SONIC_CEP] & 0xf][SONIC_CAP0 - reg]; + } + break; + /* All other registers have no special contraints */ + default: + val = s->regs[reg]; } - DPRINTF("read 0x%04x from reg %s\n", val, reg_names[reg]); + trace_dp8393x_read(reg, reg_names[reg], val, size); - return s->big_endian ? val << 16 : val; + return val; } -static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data, +static void dp8393x_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) { dp8393xState *s = opaque; int reg = addr >> s->it_shift; - uint32_t val = s->big_endian ? data >> 16 : data; - DPRINTF("write 0x%04x to reg %s\n", (uint16_t)val, reg_names[reg]); + trace_dp8393x_write(reg, reg_names[reg], val, size); switch (reg) { - /* Command register */ - case SONIC_CR: - dp8393x_do_command(s, val); - break; - /* Prevent write to read-only registers */ - case SONIC_CAP2: - case SONIC_CAP1: - case SONIC_CAP0: - case SONIC_SR: - case SONIC_MDT: - DPRINTF("writing to reg %d invalid\n", reg); - break; - /* Accept write to some registers only when in reset mode */ - case SONIC_DCR: - if (s->regs[SONIC_CR] & SONIC_CR_RST) { - s->regs[reg] = val & 0xbfff; - } else { - DPRINTF("writing to DCR invalid\n"); - } - break; - case SONIC_DCR2: - if (s->regs[SONIC_CR] & SONIC_CR_RST) { - s->regs[reg] = val & 0xf017; - } else { - DPRINTF("writing to DCR2 invalid\n"); - } - break; - /* 12 lower bytes are Read Only */ - case SONIC_TCR: - s->regs[reg] = val & 0xf000; - break; - /* 9 lower bytes are Read Only */ - case SONIC_RCR: - s->regs[reg] = val & 0xffe0; - break; - /* Ignore most significant bit */ - case SONIC_IMR: - s->regs[reg] = val & 0x7fff; - dp8393x_update_irq(s); - break; - /* Clear bits by writing 1 to them */ - case SONIC_ISR: - val &= s->regs[reg]; - s->regs[reg] &= ~val; - if (val & SONIC_ISR_RBE) { - dp8393x_do_read_rra(s); - } - dp8393x_update_irq(s); - break; - /* The guest is required to store aligned pointers here */ - case SONIC_RSA: - case SONIC_REA: - case SONIC_RRP: - case SONIC_RWP: - if (s->regs[SONIC_DCR] & SONIC_DCR_DW) { - s->regs[reg] = val & 0xfffc; - } else { - s->regs[reg] = val & 0xfffe; - } - break; - /* Invert written value for some registers */ - case SONIC_CRCT: - case SONIC_FAET: - case SONIC_MPT: - s->regs[reg] = val ^ 0xffff; - break; - /* All other registers have no special contrainst */ - default: - s->regs[reg] = val; + /* Command register */ + case SONIC_CR: + dp8393x_do_command(s, val); + break; + /* Prevent write to read-only registers */ + case SONIC_CAP2: + case SONIC_CAP1: + case SONIC_CAP0: + case SONIC_SR: + case SONIC_MDT: + trace_dp8393x_write_invalid(reg); + break; + /* Accept write to some registers only when in reset mode */ + case SONIC_DCR: + if (s->regs[SONIC_CR] & SONIC_CR_RST) { + s->regs[reg] = val & 0xbfff; + } else { + trace_dp8393x_write_invalid_dcr("DCR"); + } + break; + case SONIC_DCR2: + if (s->regs[SONIC_CR] & SONIC_CR_RST) { + s->regs[reg] = val & 0xf017; + } else { + trace_dp8393x_write_invalid_dcr("DCR2"); + } + break; + /* 12 lower bytes are Read Only */ + case SONIC_TCR: + s->regs[reg] = val & 0xf000; + break; + /* 9 lower bytes are Read Only */ + case SONIC_RCR: + s->regs[reg] = val & 0xffe0; + break; + /* Ignore most significant bit */ + case SONIC_IMR: + s->regs[reg] = val & 0x7fff; + dp8393x_update_irq(s); + break; + /* Clear bits by writing 1 to them */ + case SONIC_ISR: + val &= s->regs[reg]; + s->regs[reg] &= ~val; + if (val & SONIC_ISR_RBE) { + dp8393x_do_read_rra(s); + } + dp8393x_update_irq(s); + break; + /* The guest is required to store aligned pointers here */ + case SONIC_RSA: + case SONIC_REA: + case SONIC_RRP: + case SONIC_RWP: + if (s->regs[SONIC_DCR] & SONIC_DCR_DW) { + s->regs[reg] = val & 0xfffc; + } else { + s->regs[reg] = val & 0xfffe; + } + break; + /* Invert written value for some registers */ + case SONIC_CRCT: + case SONIC_FAET: + case SONIC_MPT: + s->regs[reg] = val ^ 0xffff; + break; + /* All other registers have no special contrainst */ + default: + s->regs[reg] = val; } if (reg == SONIC_WT0 || reg == SONIC_WT1) { @@ -698,11 +676,16 @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data, } } +/* + * Since .impl.max_access_size is effectively controlled by the it_shift + * property, leave it unspecified for now to allow the memory API to + * correctly zero extend the 16-bit register values to the access size up to and + * including it_shift. + */ static const MemoryRegionOps dp8393x_ops = { .read = dp8393x_read, .write = dp8393x_write, - .impl.min_access_size = 4, - .impl.max_access_size = 4, + .impl.min_access_size = 2, .endianness = DEVICE_NATIVE_ENDIAN, }; @@ -747,17 +730,18 @@ static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf, } /* Check broadcast */ - if ((s->regs[SONIC_RCR] & SONIC_RCR_BRD) && !memcmp(buf, bcast, sizeof(bcast))) { + if ((s->regs[SONIC_RCR] & SONIC_RCR_BRD) && + !memcmp(buf, bcast, sizeof(bcast))) { return SONIC_RCR_BC; } /* Check CAM */ for (i = 0; i < 16; i++) { if (s->regs[SONIC_CE] & (1 << i)) { - /* Entry enabled */ - if (!memcmp(buf, s->cam[i], sizeof(s->cam[i]))) { - return 0; - } + /* Entry enabled */ + if (!memcmp(buf, s->cam[i], sizeof(s->cam[i]))) { + return 0; + } } } @@ -770,7 +754,7 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, dp8393xState *s = qemu_get_nic_opaque(nc); int packet_type; uint32_t available, address; - int width, rx_len, padded_len; + int rx_len, padded_len; uint32_t checksum; int size; @@ -783,15 +767,13 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, rx_len = pkt_size + sizeof(checksum); if (s->regs[SONIC_DCR] & SONIC_DCR_DW) { - width = 2; padded_len = ((rx_len - 1) | 3) + 1; } else { - width = 1; padded_len = ((rx_len - 1) | 1) + 1; } if (padded_len > dp8393x_rbwc(s) * 2) { - DPRINTF("oversize packet, pkt_size is %d\n", pkt_size); + trace_dp8393x_receive_oversize(pkt_size); s->regs[SONIC_ISR] |= SONIC_ISR_RBAE; dp8393x_update_irq(s); s->regs[SONIC_RCR] |= SONIC_RCR_LPKT; @@ -800,18 +782,14 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, packet_type = dp8393x_receive_filter(s, buf, pkt_size); if (packet_type < 0) { - DPRINTF("packet not for netcard\n"); + trace_dp8393x_receive_not_netcard(); return -1; } /* Check for EOL */ if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) { /* Are we still in resource exhaustion? */ - size = sizeof(uint16_t) * 1 * width; - address = dp8393x_crda(s) + sizeof(uint16_t) * 5 * width; - address_space_read(&s->as, address, MEMTXATTRS_UNSPECIFIED, - s->data, size); - s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0); + s->regs[SONIC_LLFA] = dp8393x_get(s, dp8393x_crda(s), 5); if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) { /* Still EOL ; stop reception */ return -1; @@ -819,11 +797,7 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, /* Link has been updated by host */ /* Clear in_use */ - size = sizeof(uint16_t) * width; - address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width; - dp8393x_put(s, width, 0, 0); - address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED, - (uint8_t *)s->data, size, 1); + dp8393x_put(s, dp8393x_crda(s), 6, 0x0000); /* Move to next descriptor */ s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA]; @@ -838,7 +812,7 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, checksum = cpu_to_le32(crc32(0, buf, pkt_size)); /* Put packet into RBA */ - DPRINTF("Receive packet at %08x\n", dp8393x_crba(s)); + trace_dp8393x_receive_packet(dp8393x_crba(s)); address = dp8393x_crba(s); address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED, buf, pkt_size); @@ -852,8 +826,8 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, /* Pad short packets to keep pointers aligned */ if (rx_len < padded_len) { size = padded_len - rx_len; - address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED, - (uint8_t *)"\xFF\xFF\xFF", size, 1); + address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED, + "\xFF\xFF\xFF", size); address += size; } @@ -876,33 +850,21 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf, } /* Write status to memory */ - DPRINTF("Write status at %08x\n", dp8393x_crda(s)); - dp8393x_put(s, width, 0, s->regs[SONIC_RCR]); /* status */ - dp8393x_put(s, width, 1, rx_len); /* byte count */ - dp8393x_put(s, width, 2, s->regs[SONIC_TRBA0]); /* pkt_ptr0 */ - dp8393x_put(s, width, 3, s->regs[SONIC_TRBA1]); /* pkt_ptr1 */ - dp8393x_put(s, width, 4, s->regs[SONIC_RSC]); /* seq_no */ - size = sizeof(uint16_t) * 5 * width; - address_space_write(&s->as, dp8393x_crda(s), - MEMTXATTRS_UNSPECIFIED, - s->data, size); + trace_dp8393x_receive_write_status(dp8393x_crda(s)); + dp8393x_put(s, dp8393x_crda(s), 0, s->regs[SONIC_RCR]); /* status */ + dp8393x_put(s, dp8393x_crda(s), 1, rx_len); /* byte count */ + dp8393x_put(s, dp8393x_crda(s), 2, s->regs[SONIC_TRBA0]); /* pkt_ptr0 */ + dp8393x_put(s, dp8393x_crda(s), 3, s->regs[SONIC_TRBA1]); /* pkt_ptr1 */ + dp8393x_put(s, dp8393x_crda(s), 4, s->regs[SONIC_RSC]); /* seq_no */ /* Check link field */ - size = sizeof(uint16_t) * width; - address_space_read(&s->as, - dp8393x_crda(s) + sizeof(uint16_t) * 5 * width, - MEMTXATTRS_UNSPECIFIED, s->data, size); - s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0); + s->regs[SONIC_LLFA] = dp8393x_get(s, dp8393x_crda(s), 5); if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) { /* EOL detected */ s->regs[SONIC_ISR] |= SONIC_ISR_RDE; } else { /* Clear in_use */ - size = sizeof(uint16_t) * width; - address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width; - dp8393x_put(s, width, 0, 0); - address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED, - s->data, size); + dp8393x_put(s, dp8393x_crda(s), 6, 0x0000); /* Move to next descriptor */ s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA]; @@ -938,7 +900,8 @@ static void dp8393x_reset(DeviceState *dev) s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux/mips */ s->regs[SONIC_CR] = SONIC_CR_RST | SONIC_CR_STP | SONIC_CR_RXDIS; s->regs[SONIC_DCR] &= ~(SONIC_DCR_EXBUS | SONIC_DCR_LBR); - s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | SONIC_RCR_RNT); + s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | + SONIC_RCR_RNT); s->regs[SONIC_TCR] |= SONIC_TCR_NCRS | SONIC_TCR_PTX; s->regs[SONIC_TCR] &= ~SONIC_TCR_BCM; s->regs[SONIC_IMR] = 0; @@ -968,52 +931,31 @@ static void dp8393x_instance_init(Object *obj) dp8393xState *s = DP8393X(obj); sysbus_init_mmio(sbd, &s->mmio); - sysbus_init_mmio(sbd, &s->prom); sysbus_init_irq(sbd, &s->irq); } static void dp8393x_realize(DeviceState *dev, Error **errp) { dp8393xState *s = DP8393X(dev); - int i, checksum; - uint8_t *prom; - Error *local_err = NULL; address_space_init(&s->as, s->dma_mr, "dp8393x"); memory_region_init_io(&s->mmio, OBJECT(dev), &dp8393x_ops, s, - "dp8393x-regs", 0x40 << s->it_shift); + "dp8393x-regs", SONIC_REG_COUNT << s->it_shift); s->nic = qemu_new_nic(&net_dp83932_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s); - - memory_region_init_rom(&s->prom, OBJECT(dev), "dp8393x-prom", - SONIC_PROM_SIZE, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - prom = memory_region_get_ram_ptr(&s->prom); - checksum = 0; - for (i = 0; i < 6; i++) { - prom[i] = s->conf.macaddr.a[i]; - checksum += prom[i]; - if (checksum > 0xff) { - checksum = (checksum + 1) & 0xff; - } - } - prom[7] = 0xff - checksum; } static const VMStateDescription vmstate_dp8393x = { .name = "dp8393x", - .version_id = 0, - .minimum_version_id = 0, + .version_id = 1, + .minimum_version_id = 1, .fields = (VMStateField []) { - VMSTATE_BUFFER_UNSAFE(cam, dp8393xState, 0, 16 * 6), - VMSTATE_UINT16_ARRAY(regs, dp8393xState, 0x40), + VMSTATE_UINT16_2DARRAY(cam, dp8393xState, 16, 3), + VMSTATE_UINT16_ARRAY(regs, dp8393xState, SONIC_REG_COUNT), VMSTATE_END_OF_LIST() } }; diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 4f75b44cfcb..f5bc81296d1 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -29,6 +29,7 @@ #include "hw/pci/pci.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" +#include "net/eth.h" #include "net/net.h" #include "net/checksum.h" #include "sysemu/sysemu.h" @@ -106,6 +107,7 @@ struct E1000State_st { e1000x_txd_props props; e1000x_txd_props tso_props; uint16_t tso_frames; + bool busy; } tx; struct { @@ -130,10 +132,13 @@ struct E1000State_st { #define E1000_FLAG_MIT_BIT 1 #define E1000_FLAG_MAC_BIT 2 #define E1000_FLAG_TSO_BIT 3 +#define E1000_FLAG_VET_BIT 4 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT) #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT) #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT) #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT) +#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT) + uint32_t compat_flags; bool received_tx_tso; bool use_tso_for_migration; @@ -361,6 +366,13 @@ e1000_autoneg_timer(void *opaque) } } +static bool e1000_vet_init_need(void *opaque) +{ + E1000State *s = opaque; + + return chkflag(VET); +} + static void e1000_reset(void *opaque) { E1000State *d = opaque; @@ -386,6 +398,10 @@ static void e1000_reset(void *opaque) } e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr); + + if (e1000_vet_init_need(d)) { + d->mac_reg[VET] = ETH_P_VLAN; + } } static void @@ -748,6 +764,11 @@ start_xmit(E1000State *s) return; } + if (s->tx.busy) { + return; + } + s->tx.busy = true; + while (s->mac_reg[TDH] != s->mac_reg[TDT]) { base = tx_desc_base(s) + sizeof(struct e1000_tx_desc) * s->mac_reg[TDH]; @@ -774,6 +795,7 @@ start_xmit(E1000State *s) break; } } + s->tx.busy = false; set_ics(s, 0, cause); } @@ -1737,6 +1759,8 @@ static Property e1000_properties[] = { compat_flags, E1000_FLAG_MAC_BIT, true), DEFINE_PROP_BIT("migrate_tso_props", E1000State, compat_flags, E1000_FLAG_TSO_BIT, true), + DEFINE_PROP_BIT("init-vet", E1000State, + compat_flags, E1000_FLAG_VET_BIT, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c index a8a77eca956..ac96f7665af 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -35,6 +35,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "net/eth.h" #include "net/net.h" #include "net/tap.h" #include "qemu/module.h" @@ -79,7 +80,7 @@ struct E1000EState { bool disable_vnet; E1000ECore core; - + bool init_vet; }; #define E1000E_MMIO_IDX 0 @@ -527,6 +528,10 @@ static void e1000e_qdev_reset(DeviceState *dev) trace_e1000e_cb_qdev_reset(); e1000e_core_reset(&s->core); + + if (s->init_vet) { + s->core.mac[VET] = ETH_P_VLAN; + } } static int e1000e_pre_save(void *opaque) @@ -666,6 +671,7 @@ static Property e1000e_properties[] = { e1000e_prop_subsys_ven, uint16_t), DEFINE_PROP_SIGNED("subsys", E1000EState, subsys, 0, e1000e_prop_subsys, uint16_t), + DEFINE_PROP_BOOL("init-vet", E1000EState, init_vet, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index b75f2ab8fc1..8ae6fb7e145 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -731,7 +731,7 @@ e1000e_process_tx_desc(E1000ECore *core, if (e1000x_vlan_enabled(core->mac) && e1000x_is_vlan_txd(txd_lower)) { net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, - le16_to_cpu(dp->upper.fields.special), core->vet); + le16_to_cpu(dp->upper.fields.special), core->mac[VET]); } if (e1000e_tx_pkt_send(core, tx, queue_index)) { e1000e_on_tx_done_update_stats(core, tx->tx_pkt); @@ -1012,7 +1012,7 @@ e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size) { uint32_t rctl = core->mac[RCTL]; - if (e1000x_is_vlan_packet(buf, core->vet) && + if (e1000x_is_vlan_packet(buf, core->mac[VET]) && e1000x_vlan_rx_filter_enabled(core->mac)) { uint16_t vid = lduw_be_p(buf + 14); uint32_t vfta = ldl_le_p((uint32_t *)(core->mac + VFTA) + @@ -1285,7 +1285,6 @@ e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, &d->special); d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); d->status = (uint8_t) le32_to_cpu(status_flags); - d->special = 0; } static inline void @@ -1686,7 +1685,7 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt) } net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, - e1000x_vlan_enabled(core->mac), core->vet); + e1000x_vlan_enabled(core->mac), core->mac[VET]); e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info); e1000e_rx_ring_init(core, &rxr, rss_info.queue); @@ -2397,8 +2396,7 @@ static void e1000e_set_vet(E1000ECore *core, int index, uint32_t val) { core->mac[VET] = val & 0xffff; - core->vet = le16_to_cpu(core->mac[VET]); - trace_e1000e_vlan_vet(core->vet); + trace_e1000e_vlan_vet(core->mac[VET]); } static void diff --git a/hw/net/i82596.c b/hw/net/i82596.c index 055c3a1470c..ec21e2699a1 100644 --- a/hw/net/i82596.c +++ b/hw/net/i82596.c @@ -12,7 +12,6 @@ #include "qemu/timer.h" #include "net/net.h" #include "net/eth.h" -#include "sysemu/sysemu.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index f03450c0280..9c7035bc948 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -283,9 +283,8 @@ static uint32_t imx_phy_read(IMXFECState *s, int reg) uint32_t phy = reg / 32; if (phy != s->phy_num) { - qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad phy num %u\n", - TYPE_IMX_FEC, __func__, phy); - return 0; + trace_imx_phy_read_num(phy, s->phy_num); + return 0xffff; } reg %= 32; @@ -345,8 +344,7 @@ static void imx_phy_write(IMXFECState *s, int reg, uint32_t val) uint32_t phy = reg / 32; if (phy != s->phy_num) { - qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad phy num %u\n", - TYPE_IMX_FEC, __func__, phy); + trace_imx_phy_write_num(phy, s->phy_num); return; } diff --git a/hw/net/lasi_i82596.c b/hw/net/lasi_i82596.c index 820b63f3509..e37f7fabe95 100644 --- a/hw/net/lasi_i82596.c +++ b/hw/net/lasi_i82596.c @@ -18,7 +18,6 @@ #include "hw/net/lasi_82596.h" #include "hw/net/i82596.h" #include "trace.h" -#include "sysemu/sysemu.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" diff --git a/hw/net/meson.build b/hw/net/meson.build index af0749c42bb..bdf71f1f405 100644 --- a/hw/net/meson.build +++ b/hw/net/meson.build @@ -39,7 +39,6 @@ softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_emc.c')) softmmu_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_eth.c')) softmmu_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_fec.c')) -specific_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-minimac2.c')) specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_llan.c')) specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c')) diff --git a/hw/net/milkymist-minimac2.c b/hw/net/milkymist-minimac2.c deleted file mode 100644 index 5826944fd57..00000000000 --- a/hw/net/milkymist-minimac2.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * QEMU model of the Milkymist minimac2 block. - * - * Copyright (c) 2011 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * not available yet - * - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "qom/object.h" -#include "cpu.h" /* FIXME: why does this use TARGET_PAGE_ALIGN? */ -#include "hw/irq.h" -#include "hw/qdev-properties.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "net/net.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "qemu/error-report.h" - -#include - -enum { - R_SETUP = 0, - R_MDIO, - R_STATE0, - R_COUNT0, - R_STATE1, - R_COUNT1, - R_TXCOUNT, - R_MAX -}; - -enum { - SETUP_PHY_RST = (1<<0), -}; - -enum { - MDIO_DO = (1<<0), - MDIO_DI = (1<<1), - MDIO_OE = (1<<2), - MDIO_CLK = (1<<3), -}; - -enum { - STATE_EMPTY = 0, - STATE_LOADED = 1, - STATE_PENDING = 2, -}; - -enum { - MDIO_OP_WRITE = 1, - MDIO_OP_READ = 2, -}; - -enum mdio_state { - MDIO_STATE_IDLE, - MDIO_STATE_READING, - MDIO_STATE_WRITING, -}; - -enum { - R_PHY_ID1 = 2, - R_PHY_ID2 = 3, - R_PHY_MAX = 32 -}; - -#define MINIMAC2_MTU 1530 -#define MINIMAC2_BUFFER_SIZE 2048 - -struct MilkymistMinimac2MdioState { - int last_clk; - int count; - uint32_t data; - uint16_t data_out; - int state; - - uint8_t phy_addr; - uint8_t reg_addr; -}; -typedef struct MilkymistMinimac2MdioState MilkymistMinimac2MdioState; - -#define TYPE_MILKYMIST_MINIMAC2 "milkymist-minimac2" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistMinimac2State, MILKYMIST_MINIMAC2) - -struct MilkymistMinimac2State { - SysBusDevice parent_obj; - - NICState *nic; - NICConf conf; - char *phy_model; - MemoryRegion buffers; - MemoryRegion regs_region; - - qemu_irq rx_irq; - qemu_irq tx_irq; - - uint32_t regs[R_MAX]; - - MilkymistMinimac2MdioState mdio; - - uint16_t phy_regs[R_PHY_MAX]; - - uint8_t *rx0_buf; - uint8_t *rx1_buf; - uint8_t *tx_buf; -}; - -static const uint8_t preamble_sfd[] = { - 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0xd5 -}; - -static void minimac2_mdio_write_reg(MilkymistMinimac2State *s, - uint8_t phy_addr, uint8_t reg_addr, uint16_t value) -{ - trace_milkymist_minimac2_mdio_write(phy_addr, reg_addr, value); - - /* nop */ -} - -static uint16_t minimac2_mdio_read_reg(MilkymistMinimac2State *s, - uint8_t phy_addr, uint8_t reg_addr) -{ - uint16_t r = s->phy_regs[reg_addr]; - - trace_milkymist_minimac2_mdio_read(phy_addr, reg_addr, r); - - return r; -} - -static void minimac2_update_mdio(MilkymistMinimac2State *s) -{ - MilkymistMinimac2MdioState *m = &s->mdio; - - /* detect rising clk edge */ - if (m->last_clk == 0 && (s->regs[R_MDIO] & MDIO_CLK)) { - /* shift data in */ - int bit = ((s->regs[R_MDIO] & MDIO_DO) - && (s->regs[R_MDIO] & MDIO_OE)) ? 1 : 0; - m->data = (m->data << 1) | bit; - - /* check for sync */ - if (m->data == 0xffffffff) { - m->count = 32; - } - - if (m->count == 16) { - uint8_t start = (m->data >> 14) & 0x3; - uint8_t op = (m->data >> 12) & 0x3; - uint8_t ta = (m->data) & 0x3; - - if (start == 1 && op == MDIO_OP_WRITE && ta == 2) { - m->state = MDIO_STATE_WRITING; - } else if (start == 1 && op == MDIO_OP_READ && (ta & 1) == 0) { - m->state = MDIO_STATE_READING; - } else { - m->state = MDIO_STATE_IDLE; - } - - if (m->state != MDIO_STATE_IDLE) { - m->phy_addr = (m->data >> 7) & 0x1f; - m->reg_addr = (m->data >> 2) & 0x1f; - } - - if (m->state == MDIO_STATE_READING) { - m->data_out = minimac2_mdio_read_reg(s, m->phy_addr, - m->reg_addr); - } - } - - if (m->count < 16 && m->state == MDIO_STATE_READING) { - int bit = (m->data_out & 0x8000) ? 1 : 0; - m->data_out <<= 1; - - if (bit) { - s->regs[R_MDIO] |= MDIO_DI; - } else { - s->regs[R_MDIO] &= ~MDIO_DI; - } - } - - if (m->count == 0 && m->state) { - if (m->state == MDIO_STATE_WRITING) { - uint16_t data = m->data & 0xffff; - minimac2_mdio_write_reg(s, m->phy_addr, m->reg_addr, data); - } - m->state = MDIO_STATE_IDLE; - } - m->count--; - } - - m->last_clk = (s->regs[R_MDIO] & MDIO_CLK) ? 1 : 0; -} - -static size_t assemble_frame(uint8_t *buf, size_t size, - const uint8_t *payload, size_t payload_size) -{ - uint32_t crc; - - if (size < payload_size + 12) { - qemu_log_mask(LOG_GUEST_ERROR, "milkymist_minimac2: frame too big " - "(%zd bytes)\n", payload_size); - return 0; - } - - /* prepend preamble and sfd */ - memcpy(buf, preamble_sfd, 8); - - /* now copy the payload */ - memcpy(buf + 8, payload, payload_size); - - /* pad frame if needed */ - if (payload_size < 60) { - memset(buf + payload_size + 8, 0, 60 - payload_size); - payload_size = 60; - } - - /* append fcs */ - crc = cpu_to_le32(crc32(0, buf + 8, payload_size)); - memcpy(buf + payload_size + 8, &crc, 4); - - return payload_size + 12; -} - -static void minimac2_tx(MilkymistMinimac2State *s) -{ - uint32_t txcount = s->regs[R_TXCOUNT]; - uint8_t *buf = s->tx_buf; - - if (txcount < 64) { - error_report("milkymist_minimac2: ethernet frame too small (%u < %u)", - txcount, 64); - goto err; - } - - if (txcount > MINIMAC2_MTU) { - error_report("milkymist_minimac2: MTU exceeded (%u > %u)", - txcount, MINIMAC2_MTU); - goto err; - } - - if (memcmp(buf, preamble_sfd, 8) != 0) { - error_report("milkymist_minimac2: frame doesn't contain the preamble " - "and/or the SFD (%02x %02x %02x %02x %02x %02x %02x %02x)", - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); - goto err; - } - - trace_milkymist_minimac2_tx_frame(txcount - 12); - - /* send packet, skipping preamble and sfd */ - qemu_send_packet_raw(qemu_get_queue(s->nic), buf + 8, txcount - 12); - - s->regs[R_TXCOUNT] = 0; - -err: - trace_milkymist_minimac2_pulse_irq_tx(); - qemu_irq_pulse(s->tx_irq); -} - -static void update_rx_interrupt(MilkymistMinimac2State *s) -{ - if (s->regs[R_STATE0] == STATE_PENDING - || s->regs[R_STATE1] == STATE_PENDING) { - trace_milkymist_minimac2_raise_irq_rx(); - qemu_irq_raise(s->rx_irq); - } else { - trace_milkymist_minimac2_lower_irq_rx(); - qemu_irq_lower(s->rx_irq); - } -} - -static ssize_t minimac2_rx(NetClientState *nc, const uint8_t *buf, size_t size) -{ - MilkymistMinimac2State *s = qemu_get_nic_opaque(nc); - - uint32_t r_count; - uint32_t r_state; - uint8_t *rx_buf; - - size_t frame_size; - - trace_milkymist_minimac2_rx_frame(buf, size); - - /* choose appropriate slot */ - if (s->regs[R_STATE0] == STATE_LOADED) { - r_count = R_COUNT0; - r_state = R_STATE0; - rx_buf = s->rx0_buf; - } else if (s->regs[R_STATE1] == STATE_LOADED) { - r_count = R_COUNT1; - r_state = R_STATE1; - rx_buf = s->rx1_buf; - } else { - return 0; - } - - /* assemble frame */ - frame_size = assemble_frame(rx_buf, MINIMAC2_BUFFER_SIZE, buf, size); - - if (frame_size == 0) { - return size; - } - - trace_milkymist_minimac2_rx_transfer(rx_buf, frame_size); - - /* update slot */ - s->regs[r_count] = frame_size; - s->regs[r_state] = STATE_PENDING; - - update_rx_interrupt(s); - - return size; -} - -static uint64_t -minimac2_read(void *opaque, hwaddr addr, unsigned size) -{ - MilkymistMinimac2State *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_SETUP: - case R_MDIO: - case R_STATE0: - case R_COUNT0: - case R_STATE1: - case R_COUNT1: - case R_TXCOUNT: - r = s->regs[addr]; - break; - - default: - qemu_log_mask(LOG_GUEST_ERROR, - "milkymist_minimac2_rd%d: 0x%" HWADDR_PRIx "\n", - size, addr << 2); - break; - } - - trace_milkymist_minimac2_memory_read(addr << 2, r); - - return r; -} - -static int minimac2_can_rx(MilkymistMinimac2State *s) -{ - if (s->regs[R_STATE0] == STATE_LOADED) { - return 1; - } - if (s->regs[R_STATE1] == STATE_LOADED) { - return 1; - } - - return 0; -} - -static void -minimac2_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistMinimac2State *s = opaque; - - trace_milkymist_minimac2_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_MDIO: - { - /* MDIO_DI is read only */ - int mdio_di = (s->regs[R_MDIO] & MDIO_DI); - s->regs[R_MDIO] = value; - if (mdio_di) { - s->regs[R_MDIO] |= mdio_di; - } else { - s->regs[R_MDIO] &= ~mdio_di; - } - - minimac2_update_mdio(s); - } break; - case R_TXCOUNT: - s->regs[addr] = value; - if (value > 0) { - minimac2_tx(s); - } - break; - case R_STATE0: - case R_STATE1: - s->regs[addr] = value; - update_rx_interrupt(s); - if (minimac2_can_rx(s)) { - qemu_flush_queued_packets(qemu_get_queue(s->nic)); - } - break; - case R_SETUP: - case R_COUNT0: - case R_COUNT1: - s->regs[addr] = value; - break; - - default: - qemu_log_mask(LOG_GUEST_ERROR, - "milkymist_minimac2_wr%d: 0x%" HWADDR_PRIx - " = 0x%" PRIx64 "\n", - size, addr << 2, value); - break; - } -} - -static const MemoryRegionOps minimac2_ops = { - .read = minimac2_read, - .write = minimac2_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void milkymist_minimac2_reset(DeviceState *d) -{ - MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - for (i = 0; i < R_PHY_MAX; i++) { - s->phy_regs[i] = 0; - } - - /* defaults */ - s->phy_regs[R_PHY_ID1] = 0x0022; /* Micrel KSZ8001L */ - s->phy_regs[R_PHY_ID2] = 0x161a; -} - -static NetClientInfo net_milkymist_minimac2_info = { - .type = NET_CLIENT_DRIVER_NIC, - .size = sizeof(NICState), - .receive = minimac2_rx, -}; - -static void milkymist_minimac2_realize(DeviceState *dev, Error **errp) -{ - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(dev); - size_t buffers_size = TARGET_PAGE_ALIGN(3 * MINIMAC2_BUFFER_SIZE); - - sysbus_init_irq(sbd, &s->rx_irq); - sysbus_init_irq(sbd, &s->tx_irq); - - memory_region_init_io(&s->regs_region, OBJECT(dev), &minimac2_ops, s, - "milkymist-minimac2", R_MAX * 4); - sysbus_init_mmio(sbd, &s->regs_region); - - /* register buffers memory */ - memory_region_init_ram_nomigrate(&s->buffers, OBJECT(dev), "milkymist-minimac2.buffers", - buffers_size, &error_fatal); - vmstate_register_ram_global(&s->buffers); - s->rx0_buf = memory_region_get_ram_ptr(&s->buffers); - s->rx1_buf = s->rx0_buf + MINIMAC2_BUFFER_SIZE; - s->tx_buf = s->rx1_buf + MINIMAC2_BUFFER_SIZE; - - sysbus_init_mmio(sbd, &s->buffers); - - qemu_macaddr_default_if_unset(&s->conf.macaddr); - s->nic = qemu_new_nic(&net_milkymist_minimac2_info, &s->conf, - object_get_typename(OBJECT(dev)), dev->id, s); - qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); -} - -static const VMStateDescription vmstate_milkymist_minimac2_mdio = { - .name = "milkymist-minimac2-mdio", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_INT32(last_clk, MilkymistMinimac2MdioState), - VMSTATE_INT32(count, MilkymistMinimac2MdioState), - VMSTATE_UINT32(data, MilkymistMinimac2MdioState), - VMSTATE_UINT16(data_out, MilkymistMinimac2MdioState), - VMSTATE_INT32(state, MilkymistMinimac2MdioState), - VMSTATE_UINT8(phy_addr, MilkymistMinimac2MdioState), - VMSTATE_UINT8(reg_addr, MilkymistMinimac2MdioState), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription vmstate_milkymist_minimac2 = { - .name = "milkymist-minimac2", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistMinimac2State, R_MAX), - VMSTATE_UINT16_ARRAY(phy_regs, MilkymistMinimac2State, R_PHY_MAX), - VMSTATE_STRUCT(mdio, MilkymistMinimac2State, 0, - vmstate_milkymist_minimac2_mdio, MilkymistMinimac2MdioState), - VMSTATE_END_OF_LIST() - } -}; - -static Property milkymist_minimac2_properties[] = { - DEFINE_NIC_PROPERTIES(MilkymistMinimac2State, conf), - DEFINE_PROP_STRING("phy_model", MilkymistMinimac2State, phy_model), - DEFINE_PROP_END_OF_LIST(), -}; - -static void milkymist_minimac2_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_minimac2_realize; - dc->reset = milkymist_minimac2_reset; - dc->vmsd = &vmstate_milkymist_minimac2; - device_class_set_props(dc, milkymist_minimac2_properties); -} - -static const TypeInfo milkymist_minimac2_info = { - .name = TYPE_MILKYMIST_MINIMAC2, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistMinimac2State), - .class_init = milkymist_minimac2_class_init, -}; - -static void milkymist_minimac2_register_types(void) -{ - type_register_static(&milkymist_minimac2_info); -} - -type_init(milkymist_minimac2_register_types) diff --git a/hw/net/msf2-emac.c b/hw/net/msf2-emac.c index 3e6206044f8..9278fdce0b3 100644 --- a/hw/net/msf2-emac.c +++ b/hw/net/msf2-emac.c @@ -32,7 +32,6 @@ #include "qemu-common.h" #include "qemu/log.h" #include "qapi/error.h" -#include "exec/address-spaces.h" #include "hw/registerfields.h" #include "hw/net/msf2-emac.h" #include "hw/net/mii.h" diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index 1f9aa59eca2..1cb1125d9fe 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -450,11 +450,13 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) pkt->payload_len = 0; pkt->payload_frags = 0; - assert(pkt->raw); - for (i = 0; i < pkt->raw_frags; i++) { - assert(pkt->raw[i].iov_base); - pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, pkt->raw[i].iov_len, - DMA_DIRECTION_TO_DEVICE, 0); + if (pkt->max_raw_frags > 0) { + assert(pkt->raw); + for (i = 0; i < pkt->raw_frags; i++) { + assert(pkt->raw[i].iov_base); + pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, + pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0); + } } pkt->raw_frags = 0; diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index 941c9322658..412fa44d017 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -25,14 +25,9 @@ #if defined(DEBUG_ROCKER) # define DPRINTF(fmt, ...) \ do { \ - struct timeval tv; \ - char timestr[64]; \ - time_t now; \ - gettimeofday(&tv, NULL); \ - now = tv.tv_sec; \ - strftime(timestr, sizeof(timestr), "%T", localtime(&now)); \ - fprintf(stderr, "%s.%06ld ", timestr, tv.tv_usec); \ - fprintf(stderr, "ROCKER: " fmt, ## __VA_ARGS__); \ + g_autoptr(GDateTime) now = g_date_time_new_now_local(); \ + g_autofree char *nowstr = g_date_time_format(now, "%T.%f");\ + fprintf(stderr, "%s ROCKER: " fmt, nowstr, ## __VA_ARGS__);\ } while (0) #else static inline GCC_FMT_ATTR(1, 2) int DPRINTF(const char *fmt, ...) diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 10e85a45560..a6876a936db 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -26,7 +26,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "qemu/log.h" #include "qemu/module.h" #include "net/net.h" diff --git a/hw/net/trace-events b/hw/net/trace-events index baf25ffa7e7..643338f6109 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # allwinner-sun8i-emac.c allwinner_sun8i_emac_mii_write_reg(uint32_t reg, uint32_t value) "MII write: reg=0x%" PRIx32 " value=0x%" PRIx32 @@ -19,18 +19,6 @@ mdio_bitbang(bool mdc, bool mdio, int state, uint16_t cnt, unsigned int drive) " lance_mem_readw(uint64_t addr, uint32_t ret) "addr=0x%"PRIx64"val=0x%04x" lance_mem_writew(uint64_t addr, uint32_t val) "addr=0x%"PRIx64"val=0x%04x" -# milkymist-minimac2.c -milkymist_minimac2_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_minimac2_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_minimac2_mdio_write(uint8_t phy_addr, uint8_t addr, uint16_t value) "phy_addr 0x%02x addr 0x%02x value 0x%04x" -milkymist_minimac2_mdio_read(uint8_t phy_addr, uint8_t addr, uint16_t value) "phy_addr 0x%02x addr 0x%02x value 0x%04x" -milkymist_minimac2_tx_frame(uint32_t length) "length %u" -milkymist_minimac2_rx_frame(const void *buf, uint32_t length) "buf %p length %u" -milkymist_minimac2_rx_transfer(const void *buf, uint32_t length) "buf %p length %d" -milkymist_minimac2_raise_irq_rx(void) "Raise IRQ RX" -milkymist_minimac2_lower_irq_rx(void) "Lower IRQ RX" -milkymist_minimac2_pulse_irq_tx(void) "Pulse IRQ TX" - # mipsnet.c mipsnet_send(uint32_t size) "sending len=%u" mipsnet_receive(uint32_t size) "receiving len=%u" @@ -414,7 +402,9 @@ i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION" # imx_fec.c imx_phy_read(uint32_t val, int phy, int reg) "0x%04"PRIx32" <= phy[%d].reg[%d]" +imx_phy_read_num(int phy, int configured) "read request from unconfigured phy %d (configured %d)" imx_phy_write(uint32_t val, int phy, int reg) "0x%04"PRIx32" => phy[%d].reg[%d]" +imx_phy_write_num(int phy, int configured) "write request to unconfigured phy %d (configured %d)" imx_phy_update_link(const char *s) "%s" imx_phy_reset(void) "" imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x" @@ -446,3 +436,20 @@ npcm7xx_emc_received_packet(uint32_t len) "Received %u byte packet" npcm7xx_emc_rx_done(uint32_t crxdsa) "RX done, CRXDSA=0x%x" npcm7xx_emc_reg_read(int emc_num, uint32_t result, const char *name, int regno) "emc%d: 0x%x = reg[%s/%d]" npcm7xx_emc_reg_write(int emc_num, const char *name, int regno, uint32_t value) "emc%d: reg[%s/%d] = 0x%x" + +# dp8398x.c +dp8393x_raise_irq(int isr) "raise irq, isr is 0x%04x" +dp8393x_lower_irq(void) "lower irq" +dp8393x_load_cam(int idx, int cam0, int cam1, int cam2, int cam3, int cam4, int cam5) "load cam[%d] with 0x%02x0x%02x0x%02x0x%02x0x%02x0x%02x" +dp8393x_load_cam_done(int cen) "load cam done. cam enable mask 0x%04x" +dp8393x_read_rra_regs(int crba0, int crba1, int rbwc0, int rbwc1) "CRBA0/1: 0x%04x/0x%04x, RBWC0/1: 0x%04x/0x%04x" +dp8393x_transmit_packet(int ttda) "Transmit packet at 0x%"PRIx32 +dp8393x_transmit_txlen_error(int len) "tx_len is %d" +dp8393x_read(int reg, const char *name, int val, int size) "reg=0x%x [%s] val=0x%04x size=%d" +dp8393x_write(int reg, const char *name, int val, int size) "reg=0x%x [%s] val=0x%04x size=%d" +dp8393x_write_invalid(int reg) "writing to reg %d invalid" +dp8393x_write_invalid_dcr(const char *name) "writing to %s invalid" +dp8393x_receive_oversize(int size) "oversize packet, pkt_size is %d" +dp8393x_receive_not_netcard(void) "packet not for netcard" +dp8393x_receive_packet(int crba) "Receive packet at 0x%"PRIx32 +dp8393x_receive_write_status(int crba) "Write status at 0x%"PRIx32 diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c index a7f42526304..89d71cfb8e1 100644 --- a/hw/net/vhost_net-stub.c +++ b/hw/net/vhost_net-stub.c @@ -33,13 +33,13 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, - int total_queues) + int data_queue_pairs, int cvq) { return -ENOSYS; } void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, - int total_queues) + int data_queue_pairs, int cvq) { } diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 24d555e764a..30379d2ca41 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -22,6 +22,7 @@ #include "standard-headers/linux/vhost_types.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -45,6 +46,7 @@ static const int kernel_feature_bits[] = { VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, + VIRTIO_NET_F_HASH_REPORT, VHOST_INVALID_FEATURE_BIT }; @@ -71,6 +73,8 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, + VIRTIO_NET_F_RSS, + VIRTIO_NET_F_HASH_REPORT, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, @@ -113,7 +117,7 @@ uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) int vhost_net_get_config(struct vhost_net *net, uint8_t *config, uint32_t config_len) { - return vhost_dev_get_config(&net->dev, config, config_len); + return vhost_dev_get_config(&net->dev, config, config_len, NULL); } int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, uint32_t offset, uint32_t size, uint32_t flags) @@ -154,15 +158,16 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; struct vhost_net *net = g_new0(struct vhost_net, 1); uint64_t features = 0; + Error *local_err = NULL; if (!options->net_backend) { fprintf(stderr, "vhost-net requires net backend to be setup\n"); goto fail; } net->nc = options->net_backend; + net->dev.nvqs = options->nvqs; net->dev.max_queues = 1; - net->dev.nvqs = 2; net->dev.vqs = net->vqs; if (backend_kernel) { @@ -184,8 +189,10 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) } r = vhost_dev_init(&net->dev, options->opaque, - options->backend_type, options->busyloop_timeout); + options->backend_type, options->busyloop_timeout, + &local_err); if (r < 0) { + error_report_err(local_err); goto fail; } if (backend_kernel) { @@ -224,9 +231,11 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) return NULL; } -static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index) +static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, + int vq_index_end) { net->dev.vq_index = vq_index; + net->dev.vq_index_end = vq_index_end; } static int vhost_net_start_one(struct vhost_net *net, @@ -235,9 +244,6 @@ static int vhost_net_start_one(struct vhost_net *net, struct vhost_vring_file file = { }; int r; - net->dev.nvqs = 2; - net->dev.vqs = net->vqs; - r = vhost_dev_enable_notifiers(&net->dev, dev); if (r < 0) { goto fail_notifiers; @@ -311,25 +317,37 @@ static void vhost_net_stop_one(struct vhost_net *net, } int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, - int total_queues) + int data_queue_pairs, int cvq) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); VirtioBusState *vbus = VIRTIO_BUS(qbus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); + int total_notifiers = data_queue_pairs * 2 + cvq; + VirtIONet *n = VIRTIO_NET(dev); + int nvhosts = data_queue_pairs + cvq; struct vhost_net *net; - int r, e, i; + int r, e, i, index_end = data_queue_pairs * 2; NetClientState *peer; + if (cvq) { + index_end += 1; + } + if (!k->set_guest_notifiers) { error_report("binding does not support guest notifiers"); return -ENOSYS; } - for (i = 0; i < total_queues; i++) { + for (i = 0; i < nvhosts; i++) { + + if (i < data_queue_pairs) { + peer = qemu_get_peer(ncs, i); + } else { /* Control Virtqueue */ + peer = qemu_get_peer(ncs, n->max_queue_pairs); + } - peer = qemu_get_peer(ncs, i); net = get_vhost_net(peer); - vhost_net_set_vq_index(net, i * 2); + vhost_net_set_vq_index(net, i * 2, index_end); /* Suppress the masking guest notifiers on vhost user * because vhost user doesn't interrupt masking/unmasking @@ -340,14 +358,18 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } } - r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true); + r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); if (r < 0) { error_report("Error binding guest notifier: %d", -r); goto err; } - for (i = 0; i < total_queues; i++) { - peer = qemu_get_peer(ncs, i); + for (i = 0; i < nvhosts; i++) { + if (i < data_queue_pairs) { + peer = qemu_get_peer(ncs, i); + } else { + peer = qemu_get_peer(ncs, n->max_queue_pairs); + } r = vhost_net_start_one(get_vhost_net(peer), dev); if (r < 0) { @@ -371,7 +393,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, peer = qemu_get_peer(ncs , i); vhost_net_stop_one(get_vhost_net(peer), dev); } - e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); + e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); if (e < 0) { fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); fflush(stderr); @@ -381,18 +403,27 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, - int total_queues) + int data_queue_pairs, int cvq) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); VirtioBusState *vbus = VIRTIO_BUS(qbus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); + VirtIONet *n = VIRTIO_NET(dev); + NetClientState *peer; + int total_notifiers = data_queue_pairs * 2 + cvq; + int nvhosts = data_queue_pairs + cvq; int i, r; - for (i = 0; i < total_queues; i++) { - vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); + for (i = 0; i < nvhosts; i++) { + if (i < data_queue_pairs) { + peer = qemu_get_peer(ncs, i); + } else { + peer = qemu_get_peer(ncs, n->max_queue_pairs); + } + vhost_net_stop_one(get_vhost_net(peer), dev); } - r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); + r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); if (r < 0) { fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); fflush(stderr); diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 760203309ba..f2014d5ea0b 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -54,7 +54,7 @@ #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 -/* for now, only allow larger queues; with virtio-1, guest can downsize */ +/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE @@ -89,7 +89,7 @@ VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) -static VirtIOFeature feature_sizes[] = { +static const VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_NET_F_MAC, .end = endof(struct virtio_net_config, mac)}, {.flags = 1ULL << VIRTIO_NET_F_STATUS, @@ -131,7 +131,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) int ret = 0; memset(&netcfg, 0 , sizeof(struct virtio_net_config)); virtio_stw_p(vdev, &netcfg.status, n->status); - virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); + virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); memcpy(netcfg.mac, n->mac, ETH_ALEN); virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); @@ -243,7 +243,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) { VirtIODevice *vdev = VIRTIO_DEVICE(n); NetClientState *nc = qemu_get_queue(n->nic); - int queues = n->multiqueue ? n->max_queues : 1; + int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; + int cvq = n->max_ncs - n->max_queue_pairs; if (!get_vhost_net(nc->peer)) { return; @@ -266,7 +267,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) /* Any packets outstanding? Purge them to avoid touching rings * when vhost is running. */ - for (i = 0; i < queues; i++) { + for (i = 0; i < queue_pairs; i++) { NetClientState *qnc = qemu_get_subqueue(n->nic, i); /* Purge both directions: TX and RX. */ @@ -285,14 +286,14 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) } n->vhost_started = 1; - r = vhost_net_start(vdev, n->nic->ncs, queues); + r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); if (r < 0) { error_report("unable to start vhost net: %d: " "falling back on userspace virtio", -r); n->vhost_started = 0; } } else { - vhost_net_stop(vdev, n->nic->ncs, queues); + vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); n->vhost_started = 0; } } @@ -309,11 +310,11 @@ static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, } static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, - int queues, bool enable) + int queue_pairs, bool enable) { int i; - for (i = 0; i < queues; i++) { + for (i = 0; i < queue_pairs; i++) { if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && enable) { while (--i >= 0) { @@ -330,7 +331,7 @@ static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) { VirtIODevice *vdev = VIRTIO_DEVICE(n); - int queues = n->multiqueue ? n->max_queues : 1; + int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; if (virtio_net_started(n, status)) { /* Before using the device, we tell the network backend about the @@ -339,14 +340,14 @@ static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) * virtio-net code. */ n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, - queues, true); + queue_pairs, true); } else if (virtio_net_started(n, vdev->status)) { /* After using the device, we need to reset the network backend to * the default (guest native endianness), otherwise the guest may * lose network connectivity if it is rebooted into a different * endianness. */ - virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false); + virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); } } @@ -368,12 +369,12 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) virtio_net_vnet_endian_status(n, status); virtio_net_vhost_status(n, status); - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { NetClientState *ncs = qemu_get_subqueue(n->nic, i); bool queue_started; q = &n->vqs[i]; - if ((!n->multiqueue && i != 0) || i >= n->curr_queues) { + if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { queue_status = 0; } else { queue_status = status; @@ -540,7 +541,7 @@ static void virtio_net_reset(VirtIODevice *vdev) n->nouni = 0; n->nobcast = 0; /* multiqueue is disabled by default */ - n->curr_queues = 1; + n->curr_queue_pairs = 1; timer_del(n->announce_timer.tm); n->announce_timer.round = 0; n->status &= ~VIRTIO_NET_S_ANNOUNCE; @@ -556,7 +557,7 @@ static void virtio_net_reset(VirtIODevice *vdev) memset(n->vlans, 0, MAX_VLAN >> 3); /* Flush any async TX */ - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { NetClientState *nc = qemu_get_subqueue(n->nic, i); if (nc->peer) { @@ -610,7 +611,7 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, sizeof(struct virtio_net_hdr); } - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { nc = qemu_get_subqueue(n->nic, i); if (peer_has_vnet_hdr(n) && @@ -655,7 +656,7 @@ static int peer_attach(VirtIONet *n, int index) return 0; } - if (n->max_queues == 1) { + if (n->max_queue_pairs == 1) { return 0; } @@ -681,7 +682,7 @@ static int peer_detach(VirtIONet *n, int index) return tap_disable(nc->peer); } -static void virtio_net_set_queues(VirtIONet *n) +static void virtio_net_set_queue_pairs(VirtIONet *n) { int i; int r; @@ -690,8 +691,8 @@ static void virtio_net_set_queues(VirtIONet *n) return; } - for (i = 0; i < n->max_queues; i++) { - if (i < n->curr_queues) { + for (i = 0; i < n->max_queue_pairs; i++) { + if (i < n->curr_queue_pairs) { r = peer_attach(n, i); assert(!r); } else { @@ -737,8 +738,9 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, return features; } - virtio_clear_feature(&features, VIRTIO_NET_F_RSS); - virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + } features = vhost_net_get_features(get_vhost_net(nc->peer), features); vdev->backend_features = features; @@ -795,48 +797,34 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) typedef struct { VirtIONet *n; - char *id; -} FailoverId; + DeviceState *dev; +} FailoverDevice; /** - * Set the id of the failover primary device + * Set the failover primary device * * @opaque: FailoverId to setup * @opts: opts for device we are handling * @errp: returns an error if this function fails */ -static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp) +static int failover_set_primary(DeviceState *dev, void *opaque) { - FailoverId *fid = opaque; - const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); + FailoverDevice *fdev = opaque; + PCIDevice *pci_dev = (PCIDevice *) + object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); + + if (!pci_dev) { + return 0; + } - if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) { - fid->id = g_strdup(opts->id); + if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { + fdev->dev = dev; return 1; } return 0; } -/** - * Find the primary device id for this failover virtio-net - * - * @n: VirtIONet device - * @errp: returns an error if this function fails - */ -static char *failover_find_primary_device_id(VirtIONet *n) -{ - Error *err = NULL; - FailoverId fid; - - fid.n = n; - if (!qemu_opts_foreach(qemu_find_opts("device"), - failover_set_primary, &fid, &err)) { - return NULL; - } - return fid.id; -} - /** * Find the primary device for this failover virtio-net * @@ -845,39 +833,38 @@ static char *failover_find_primary_device_id(VirtIONet *n) */ static DeviceState *failover_find_primary_device(VirtIONet *n) { - char *id = failover_find_primary_device_id(n); - - if (!id) { - return NULL; - } + FailoverDevice fdev = { + .n = n, + }; - return qdev_find_recursive(sysbus_get_default(), id); + qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, + NULL, NULL, &fdev); + return fdev.dev; } static void failover_add_primary(VirtIONet *n, Error **errp) { Error *err = NULL; - QemuOpts *opts; - char *id; DeviceState *dev = failover_find_primary_device(n); if (dev) { return; } - id = failover_find_primary_device_id(n); - if (!id) { + if (!n->primary_opts) { error_setg(errp, "Primary device not found"); error_append_hint(errp, "Virtio-net failover will not work. Make " "sure primary device has parameter" " failover_pair_id=%s\n", n->netclient_name); return; } - opts = qemu_opts_find(qemu_find_opts("device"), id); - g_assert(opts); /* cannot be NULL because id was found using opts list */ - dev = qdev_device_add(opts, &err); + + dev = qdev_device_add_from_qdict(n->primary_opts, + n->primary_opts_from_json, + &err); if (err) { - qemu_opts_del(opts); + qobject_unref(n->primary_opts); + n->primary_opts = NULL; } else { object_unref(OBJECT(dev)); } @@ -919,7 +906,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) virtio_net_apply_guest_offloads(n); } - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { NetClientState *nc = qemu_get_subqueue(n->nic, i); if (!get_vhost_net(nc->peer)) { @@ -1163,12 +1150,79 @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, } } +static void virtio_net_detach_epbf_rss(VirtIONet *n); + static void virtio_net_disable_rss(VirtIONet *n) { if (n->rss_data.enabled) { trace_virtio_net_rss_disable(); } n->rss_data.enabled = false; + + virtio_net_detach_epbf_rss(n); +} + +static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) +{ + NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); + if (nc == NULL || nc->info->set_steering_ebpf == NULL) { + return false; + } + + return nc->info->set_steering_ebpf(nc, prog_fd); +} + +static void rss_data_to_rss_config(struct VirtioNetRssData *data, + struct EBPFRSSConfig *config) +{ + config->redirect = data->redirect; + config->populate_hash = data->populate_hash; + config->hash_types = data->hash_types; + config->indirections_len = data->indirections_len; + config->default_queue = data->default_queue; +} + +static bool virtio_net_attach_epbf_rss(VirtIONet *n) +{ + struct EBPFRSSConfig config = {}; + + if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { + return false; + } + + rss_data_to_rss_config(&n->rss_data, &config); + + if (!ebpf_rss_set_all(&n->ebpf_rss, &config, + n->rss_data.indirections_table, n->rss_data.key)) { + return false; + } + + if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { + return false; + } + + return true; +} + +static void virtio_net_detach_epbf_rss(VirtIONet *n) +{ + virtio_net_attach_ebpf_to_backend(n->nic, -1); +} + +static bool virtio_net_load_ebpf(VirtIONet *n) +{ + if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + /* backend does't support steering ebpf */ + return false; + } + + return ebpf_rss_load(&n->ebpf_rss); +} + +static void virtio_net_unload_ebpf(VirtIONet *n) +{ + virtio_net_attach_ebpf_to_backend(n->nic, -1); + ebpf_rss_unload(&n->ebpf_rss); } static uint16_t virtio_net_handle_rss(VirtIONet *n, @@ -1179,7 +1233,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, VirtIODevice *vdev = VIRTIO_DEVICE(n); struct virtio_net_rss_config cfg; size_t s, offset = 0, size_get; - uint16_t queues, i; + uint16_t queue_pairs, i; struct { uint16_t us; uint8_t b; @@ -1221,7 +1275,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, } n->rss_data.default_queue = do_rss ? virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; - if (n->rss_data.default_queue >= n->max_queues) { + if (n->rss_data.default_queue >= n->max_queue_pairs) { err_msg = "Invalid default queue"; err_value = n->rss_data.default_queue; goto error; @@ -1250,14 +1304,14 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, size_get = sizeof(temp); s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); if (s != size_get) { - err_msg = "Can't get queues"; + err_msg = "Can't get queue_pairs"; err_value = (uint32_t)s; goto error; } - queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues; - if (queues == 0 || queues > n->max_queues) { - err_msg = "Invalid number of queues"; - err_value = queues; + queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; + if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { + err_msg = "Invalid number of queue_pairs"; + err_value = queue_pairs; goto error; } if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { @@ -1272,7 +1326,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, } if (!temp.b && !n->rss_data.hash_types) { virtio_net_disable_rss(n); - return queues; + return queue_pairs; } offset += size_get; size_get = temp.b; @@ -1283,10 +1337,29 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, goto error; } n->rss_data.enabled = true; + + if (!n->rss_data.populate_hash) { + if (!virtio_net_attach_epbf_rss(n)) { + /* EBPF must be loaded for vhost */ + if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { + warn_report("Can't load eBPF RSS for vhost"); + goto error; + } + /* fallback to software RSS */ + warn_report("Can't load eBPF RSS - fallback to software RSS"); + n->rss_data.enabled_software_rss = true; + } + } else { + /* use software RSS for hash populating */ + /* and detach eBPF if was loaded before */ + virtio_net_detach_epbf_rss(n); + n->rss_data.enabled_software_rss = true; + } + trace_virtio_net_rss_enable(n->rss_data.hash_types, n->rss_data.indirections_len, temp.b); - return queues; + return queue_pairs; error: trace_virtio_net_rss_error(err_msg, err_value); virtio_net_disable_rss(n); @@ -1297,15 +1370,15 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, struct iovec *iov, unsigned int iov_cnt) { VirtIODevice *vdev = VIRTIO_DEVICE(n); - uint16_t queues; + uint16_t queue_pairs; virtio_net_disable_rss(n); if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { - queues = virtio_net_handle_rss(n, iov, iov_cnt, false); - return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR; + queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); + return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; } if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { - queues = virtio_net_handle_rss(n, iov, iov_cnt, true); + queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { struct virtio_net_ctrl_mq mq; size_t s; @@ -1316,24 +1389,24 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, if (s != sizeof(mq)) { return VIRTIO_NET_ERR; } - queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); + queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); } else { return VIRTIO_NET_ERR; } - if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || - queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || - queues > n->max_queues || + if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || + queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || + queue_pairs > n->max_queue_pairs || !n->multiqueue) { return VIRTIO_NET_ERR; } - n->curr_queues = queues; - /* stop the backend before changing the number of queues to avoid handling a + n->curr_queue_pairs = queue_pairs; + /* stop the backend before changing the number of queue_pairs to avoid handling a * disabled queue */ virtio_net_set_status(vdev, vdev->status); - virtio_net_set_queues(n); + virtio_net_set_queue_pairs(n); return VIRTIO_NET_OK; } @@ -1411,7 +1484,7 @@ static bool virtio_net_can_receive(NetClientState *nc) return false; } - if (nc->queue_index >= n->curr_queues) { + if (nc->queue_index >= n->curr_queue_pairs) { return false; } @@ -1671,7 +1744,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, return -1; } - if (!no_rss && n->rss_data.enabled) { + if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { int index = virtio_net_process_rss(nc, buf, size); if (index >= 0) { NetClientState *nc2 = qemu_get_subqueue(n->nic, index); @@ -2691,11 +2764,11 @@ static void virtio_net_del_queue(VirtIONet *n, int index) virtio_del_queue(vdev, index * 2 + 1); } -static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) +static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) { VirtIODevice *vdev = VIRTIO_DEVICE(n); int old_num_queues = virtio_get_num_queues(vdev); - int new_num_queues = new_max_queues * 2 + 1; + int new_num_queues = new_max_queue_pairs * 2 + 1; int i; assert(old_num_queues >= 3); @@ -2728,12 +2801,12 @@ static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) { - int max = multiqueue ? n->max_queues : 1; + int max = multiqueue ? n->max_queue_pairs : 1; n->multiqueue = multiqueue; - virtio_net_change_num_queues(n, max); + virtio_net_change_num_queue_pairs(n, max); - virtio_net_set_queues(n); + virtio_net_set_queue_pairs(n); } static int virtio_net_post_load_device(void *opaque, int version_id) @@ -2766,7 +2839,7 @@ static int virtio_net_post_load_device(void *opaque, int version_id) */ n->saved_guest_offloads = n->curr_guest_offloads; - virtio_net_set_queues(n); + virtio_net_set_queue_pairs(n); /* Find the first multicast entry in the saved MAC filter */ for (i = 0; i < n->mac_table.in_use; i++) { @@ -2779,7 +2852,7 @@ static int virtio_net_post_load_device(void *opaque, int version_id) /* nc.link_down can't be migrated, so infer link_down according * to link status bit in n->status */ link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { qemu_get_subqueue(n->nic, i)->link_down = link_down; } @@ -2797,6 +2870,19 @@ static int virtio_net_post_load_device(void *opaque, int version_id) } if (n->rss_data.enabled) { + n->rss_data.enabled_software_rss = n->rss_data.populate_hash; + if (!n->rss_data.populate_hash) { + if (!virtio_net_attach_epbf_rss(n)) { + if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { + warn_report("Can't post-load eBPF RSS for vhost"); + } else { + warn_report("Can't post-load eBPF RSS - " + "fallback to software RSS"); + n->rss_data.enabled_software_rss = true; + } + } + } + trace_virtio_net_rss_enable(n->rss_data.hash_types, n->rss_data.indirections_len, sizeof(n->rss_data.key)); @@ -2831,9 +2917,9 @@ static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { }, }; -static bool max_queues_gt_1(void *opaque, int version_id) +static bool max_queue_pairs_gt_1(void *opaque, int version_id) { - return VIRTIO_NET(opaque)->max_queues > 1; + return VIRTIO_NET(opaque)->max_queue_pairs > 1; } static bool has_ctrl_guest_offloads(void *opaque, int version_id) @@ -2858,13 +2944,13 @@ static bool mac_table_doesnt_fit(void *opaque, int version_id) struct VirtIONetMigTmp { VirtIONet *parent; VirtIONetQueue *vqs_1; - uint16_t curr_queues_1; + uint16_t curr_queue_pairs_1; uint8_t has_ufo; uint32_t has_vnet_hdr; }; /* The 2nd and subsequent tx_waiting flags are loaded later than - * the 1st entry in the queues and only if there's more than one + * the 1st entry in the queue_pairs and only if there's more than one * entry. We use the tmp mechanism to calculate a temporary * pointer and count and also validate the count. */ @@ -2874,9 +2960,9 @@ static int virtio_net_tx_waiting_pre_save(void *opaque) struct VirtIONetMigTmp *tmp = opaque; tmp->vqs_1 = tmp->parent->vqs + 1; - tmp->curr_queues_1 = tmp->parent->curr_queues - 1; - if (tmp->parent->curr_queues == 0) { - tmp->curr_queues_1 = 0; + tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; + if (tmp->parent->curr_queue_pairs == 0) { + tmp->curr_queue_pairs_1 = 0; } return 0; @@ -2889,9 +2975,9 @@ static int virtio_net_tx_waiting_pre_load(void *opaque) /* Reuse the pointer setup from save */ virtio_net_tx_waiting_pre_save(opaque); - if (tmp->parent->curr_queues > tmp->parent->max_queues) { - error_report("virtio-net: curr_queues %x > max_queues %x", - tmp->parent->curr_queues, tmp->parent->max_queues); + if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { + error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", + tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); return -EINVAL; } @@ -2905,7 +2991,7 @@ static const VMStateDescription vmstate_virtio_net_tx_waiting = { .pre_save = virtio_net_tx_waiting_pre_save, .fields = (VMStateField[]) { VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, - curr_queues_1, + curr_queue_pairs_1, vmstate_virtio_net_queue_tx_waiting, struct VirtIONetQueue), VMSTATE_END_OF_LIST() @@ -3047,9 +3133,9 @@ static const VMStateDescription vmstate_virtio_net_device = { VMSTATE_UINT8(nobcast, VirtIONet), VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, vmstate_virtio_net_has_ufo), - VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0, + VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, vmstate_info_uint16_equal, uint16_t), - VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1), + VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, vmstate_virtio_net_tx_waiting), VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, @@ -3159,6 +3245,7 @@ static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, } hotplug_handler_plug(hotplug_ctrl, dev, &err); } + pdev->partially_hotplugged = false; out: error_propagate(errp, err); @@ -3203,7 +3290,9 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) } static bool failover_hide_primary_device(DeviceListener *listener, - QemuOpts *device_opts) + const QDict *device_opts, + bool from_json, + Error **errp) { VirtIONet *n = container_of(listener, VirtIONet, primary_listener); const char *standby_id; @@ -3211,11 +3300,42 @@ static bool failover_hide_primary_device(DeviceListener *listener, if (!device_opts) { return false; } - standby_id = qemu_opt_get(device_opts, "failover_pair_id"); + + if (!qdict_haskey(device_opts, "failover_pair_id")) { + return false; + } + + if (!qdict_haskey(device_opts, "id")) { + error_setg(errp, "Device with failover_pair_id needs to have id"); + return false; + } + + standby_id = qdict_get_str(device_opts, "failover_pair_id"); if (g_strcmp0(standby_id, n->netclient_name) != 0) { return false; } + /* + * The hide helper can be called several times for a given device. + * Check there is only one primary for a virtio-net device but + * don't duplicate the qdict several times if it's called for the same + * device. + */ + if (n->primary_opts) { + const char *old, *new; + /* devices with failover_pair_id always have an id */ + old = qdict_get_str(n->primary_opts, "id"); + new = qdict_get_str(device_opts, "id"); + if (strcmp(old, new) != 0) { + error_setg(errp, "Cannot attach more than one primary device to " + "'%s': '%s' and '%s'", n->netclient_name, old, new); + return false; + } + } else { + n->primary_opts = qdict_clone_shallow(device_opts); + n->primary_opts_from_json = from_json; + } + /* failover_primary_hidden is set during feature negotiation */ return qatomic_read(&n->failover_primary_hidden); } @@ -3292,16 +3412,30 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) return; } - n->max_queues = MAX(n->nic_conf.peers.queues, 1); - if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { - error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " + n->max_ncs = MAX(n->nic_conf.peers.queues, 1); + + /* + * Figure out the datapath queue pairs since the backend could + * provide control queue via peers as well. + */ + if (n->nic_conf.peers.queues) { + for (i = 0; i < n->max_ncs; i++) { + if (n->nic_conf.peers.ncs[i]->is_datapath) { + ++n->max_queue_pairs; + } + } + } + n->max_queue_pairs = MAX(n->max_queue_pairs, 1); + + if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { + error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " "must be a positive integer less than %d.", - n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2); + n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); virtio_cleanup(vdev); return; } - n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); - n->curr_queues = 1; + n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queue_pairs); + n->curr_queue_pairs = 1; n->tx_timeout = n->net_conf.txtimer; if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") @@ -3315,7 +3449,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), n->net_conf.tx_queue_size); - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { virtio_net_add_queue(n, i); } @@ -3339,13 +3473,13 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) object_get_typename(OBJECT(dev)), dev->id, n); } - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { n->nic->ncs[i].do_not_pad = true; } peer_test_vnet_hdr(n); if (peer_has_vnet_hdr(n)) { - for (i = 0; i < n->max_queues; i++) { + for (i = 0; i < n->max_queue_pairs; i++) { qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); } n->host_hdr_len = sizeof(struct virtio_net_hdr); @@ -3377,13 +3511,21 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->qdev = dev; net_rx_pkt_init(&n->rx_pkt, false); + + if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { + virtio_net_load_ebpf(n); + } } static void virtio_net_device_unrealize(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIONet *n = VIRTIO_NET(dev); - int i, max_queues; + int i, max_queue_pairs; + + if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { + virtio_net_unload_ebpf(n); + } /* This will stop vhost backend if appropriate. */ virtio_net_set_status(vdev, 0); @@ -3397,15 +3539,19 @@ static void virtio_net_device_unrealize(DeviceState *dev) g_free(n->vlans); if (n->failover) { + qobject_unref(n->primary_opts); device_listener_unregister(&n->primary_listener); + remove_migration_state_change_notifier(&n->migration_state); + } else { + assert(n->primary_opts == NULL); } - max_queues = n->multiqueue ? n->max_queues : 1; - for (i = 0; i < max_queues; i++) { + max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; + for (i = 0; i < max_queue_pairs; i++) { virtio_net_del_queue(n, i); } /* delete also control vq */ - virtio_del_queue(vdev, max_queues * 2); + virtio_del_queue(vdev, max_queue_pairs * 2); qemu_announce_timer_del(&n->announce_timer, false); g_free(n->vqs); qemu_del_nic(n->nic); @@ -3427,6 +3573,8 @@ static void virtio_net_instance_init(Object *obj) device_add_bootindex_property(obj, &n->nic_conf.bootindex, "bootindex", "/ethernet-phy@0", DEVICE(n)); + + ebpf_rss_init(&n->ebpf_rss); } static int virtio_net_pre_save(void *opaque) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index eff299f6290..f65af4e9ef2 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -23,6 +23,7 @@ #include "net/checksum.h" #include "sysemu/sysemu.h" #include "qemu/bswap.h" +#include "qemu/log.h" #include "qemu/module.h" #include "hw/pci/msix.h" #include "hw/pci/msi.h" @@ -1093,8 +1094,12 @@ vmxnet3_io_bar0_write(void *opaque, hwaddr addr, int tx_queue_idx = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD, VMXNET3_REG_ALIGN); - assert(tx_queue_idx <= s->txq_num); - vmxnet3_process_tx_queue(s, tx_queue_idx); + if (tx_queue_idx <= s->txq_num) { + vmxnet3_process_tx_queue(s, tx_queue_idx); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Illegal TX queue %d/%d\n", + tx_queue_idx, s->txq_num); + } return; } @@ -1376,7 +1381,7 @@ static void vmxnet3_validate_interrupts(VMXNET3State *s) } } -static void vmxnet3_validate_queues(VMXNET3State *s) +static bool vmxnet3_validate_queues(VMXNET3State *s) { /* * txq_num and rxq_num are total number of queues @@ -1385,12 +1390,18 @@ static void vmxnet3_validate_queues(VMXNET3State *s) */ if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) { - hw_error("Bad TX queues number: %d\n", s->txq_num); + qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad TX queues number: %d\n", + s->txq_num); + return false; } if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) { - hw_error("Bad RX queues number: %d\n", s->rxq_num); + qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad RX queues number: %d\n", + s->rxq_num); + return false; } + + return true; } static void vmxnet3_activate_device(VMXNET3State *s) @@ -1414,12 +1425,23 @@ static void vmxnet3_activate_device(VMXNET3State *s) return; } + s->txq_num = + VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues); + s->rxq_num = + VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues); + + VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num); + if (!vmxnet3_validate_queues(s)) { + return; + } + vmxnet3_adjust_by_guest_type(s); vmxnet3_update_features(s); vmxnet3_update_pm_state(s); vmxnet3_setup_rx_filtering(s); /* Cache fields from shared memory */ s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); + assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU); VMW_CFPRN("MTU is %u", s->mtu); s->max_rx_frags = @@ -1440,14 +1462,6 @@ static void vmxnet3_activate_device(VMXNET3State *s) VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask); VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking); - s->txq_num = - VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues); - s->rxq_num = - VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues); - - VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num); - vmxnet3_validate_queues(s); - qdescr_table_pa = VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA); VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa); @@ -1473,6 +1487,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* Read rings memory locations for TX queues */ pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA); size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize); + if (size > VMXNET3_TX_RING_MAX_SIZE) { + size = VMXNET3_TX_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size, sizeof(struct Vmxnet3_TxDesc), false); @@ -1483,6 +1500,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* TXC ring */ pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA); size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize); + if (size > VMXNET3_TC_RING_MAX_SIZE) { + size = VMXNET3_TC_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size, sizeof(struct Vmxnet3_TxCompDesc), true); VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); @@ -1524,6 +1544,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* RX rings */ pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]); size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]); + if (size > VMXNET3_RX_RING_MAX_SIZE) { + size = VMXNET3_RX_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size, sizeof(struct Vmxnet3_RxDesc), false); VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", @@ -1533,6 +1556,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* RXC ring */ pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA); size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize); + if (size > VMXNET3_RC_RING_MAX_SIZE) { + size = VMXNET3_RC_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size, sizeof(struct Vmxnet3_RxCompDesc), true); VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); @@ -2399,7 +2425,9 @@ static int vmxnet3_post_load(void *opaque, int version_id) } } - vmxnet3_validate_queues(s); + if (!vmxnet3_validate_queues(s)) { + return -1; + } vmxnet3_validate_interrupts(s); return 0; diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c index 00859a7d508..0ab6ae91aa1 100644 --- a/hw/net/xgmac.c +++ b/hw/net/xgmac.c @@ -29,7 +29,6 @@ #include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "migration/vmstate.h" -#include "qemu/log.h" #include "qemu/module.h" #include "net/net.h" #include "qom/object.h" diff --git a/hw/nios2/10m50_devboard.c b/hw/nios2/10m50_devboard.c index a14fc31e86b..3d1205b8bd7 100644 --- a/hw/nios2/10m50_devboard.c +++ b/hw/nios2/10m50_devboard.c @@ -24,7 +24,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/sysbus.h" #include "hw/char/serial.h" diff --git a/hw/nios2/boot.c b/hw/nios2/boot.c index d9969ac1482..5b3e4efed5b 100644 --- a/hw/nios2/boot.c +++ b/hw/nios2/boot.c @@ -32,13 +32,11 @@ #include "qemu/units.h" #include "qemu-common.h" #include "qemu/datadir.h" -#include "cpu.h" #include "qemu/option.h" #include "qemu/config-file.h" #include "qemu/error-report.h" #include "sysemu/device_tree.h" #include "sysemu/reset.h" -#include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/loader.h" #include "elf.h" diff --git a/hw/nios2/generic_nommu.c b/hw/nios2/generic_nommu.c index 19899e2c1ef..fbc18dbd04c 100644 --- a/hw/nios2/generic_nommu.c +++ b/hw/nios2/generic_nommu.c @@ -29,9 +29,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" -#include "cpu.h" -#include "hw/sysbus.h" #include "hw/char/serial.h" #include "hw/boards.h" #include "exec/memory.h" diff --git a/hw/nubus/mac-nubus-bridge.c b/hw/nubus/mac-nubus-bridge.c index 7c329300b82..a0da5a8b2fa 100644 --- a/hw/nubus/mac-nubus-bridge.c +++ b/hw/nubus/mac-nubus-bridge.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2013-2018 Laurent Vivier + * QEMU Macintosh Nubus + * + * Copyright (c) 2013-2018 Laurent Vivier * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -13,13 +15,29 @@ static void mac_nubus_bridge_init(Object *obj) { - MacNubusState *s = MAC_NUBUS_BRIDGE(obj); + MacNubusBridge *s = MAC_NUBUS_BRIDGE(obj); + NubusBridge *nb = NUBUS_BRIDGE(obj); SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - - s->bus = NUBUS_BUS(qbus_create(TYPE_NUBUS_BUS, DEVICE(s), NULL)); - - sysbus_init_mmio(sbd, &s->bus->super_slot_io); - sysbus_init_mmio(sbd, &s->bus->slot_io); + NubusBus *bus = &nb->bus; + + /* Macintosh only has slots 0x9 to 0xe available */ + bus->slot_available_mask = MAKE_64BIT_MASK(MAC_NUBUS_FIRST_SLOT, + MAC_NUBUS_SLOT_NB); + + /* Aliases for slots 0x9 to 0xe */ + memory_region_init_alias(&s->super_slot_alias, obj, "super-slot-alias", + &bus->nubus_mr, + MAC_NUBUS_FIRST_SLOT * NUBUS_SUPER_SLOT_SIZE, + MAC_NUBUS_SLOT_NB * NUBUS_SUPER_SLOT_SIZE); + + memory_region_init_alias(&s->slot_alias, obj, "slot-alias", + &bus->nubus_mr, + NUBUS_SLOT_BASE + + MAC_NUBUS_FIRST_SLOT * NUBUS_SLOT_SIZE, + MAC_NUBUS_SLOT_NB * NUBUS_SLOT_SIZE); + + sysbus_init_mmio(sbd, &s->super_slot_alias); + sysbus_init_mmio(sbd, &s->slot_alias); } static void mac_nubus_bridge_class_init(ObjectClass *klass, void *data) @@ -33,7 +51,7 @@ static const TypeInfo mac_nubus_bridge_info = { .name = TYPE_MAC_NUBUS_BRIDGE, .parent = TYPE_NUBUS_BRIDGE, .instance_init = mac_nubus_bridge_init, - .instance_size = sizeof(MacNubusState), + .instance_size = sizeof(MacNubusBridge), .class_init = mac_nubus_bridge_class_init, }; diff --git a/hw/nubus/nubus-bridge.c b/hw/nubus/nubus-bridge.c index cd8c6a91eb5..a42c86080f2 100644 --- a/hw/nubus/nubus-bridge.c +++ b/hw/nubus/nubus-bridge.c @@ -1,5 +1,5 @@ /* - * QEMU Macintosh Nubus + * QEMU Nubus * * Copyright (c) 2013-2018 Laurent Vivier * @@ -12,17 +12,36 @@ #include "hw/sysbus.h" #include "hw/nubus/nubus.h" + +static void nubus_bridge_init(Object *obj) +{ + NubusBridge *s = NUBUS_BRIDGE(obj); + NubusBus *bus = &s->bus; + + qbus_init(bus, sizeof(s->bus), TYPE_NUBUS_BUS, DEVICE(s), NULL); + + qdev_init_gpio_out(DEVICE(s), bus->irqs, NUBUS_IRQS); +} + +static Property nubus_bridge_properties[] = { + DEFINE_PROP_UINT16("slot-available-mask", NubusBridge, + bus.slot_available_mask, 0xffff), + DEFINE_PROP_END_OF_LIST() +}; + static void nubus_bridge_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->fw_name = "nubus"; + device_class_set_props(dc, nubus_bridge_properties); } static const TypeInfo nubus_bridge_info = { .name = TYPE_NUBUS_BRIDGE, .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(SysBusDevice), + .instance_init = nubus_bridge_init, + .instance_size = sizeof(NubusBridge), .class_init = nubus_bridge_class_init, }; diff --git a/hw/nubus/nubus-bus.c b/hw/nubus/nubus-bus.c index 942a6d5342d..07c279bde5c 100644 --- a/hw/nubus/nubus-bus.c +++ b/hw/nubus/nubus-bus.c @@ -8,10 +8,18 @@ * */ +/* + * References: + * Nubus Specification (TI) + * http://www.bitsavers.org/pdf/ti/nubus/2242825-0001_NuBus_Spec1983.pdf + * + * Designing Cards and Drivers for the Macintosh Family (Apple) + */ + #include "qemu/osdep.h" #include "hw/nubus/nubus.h" -#include "hw/sysbus.h" #include "qapi/error.h" +#include "trace.h" static NubusBus *nubus_find(void) @@ -20,72 +28,138 @@ static NubusBus *nubus_find(void) return NUBUS_BUS(object_resolve_path_type("", TYPE_NUBUS_BUS, NULL)); } -static void nubus_slot_write(void *opaque, hwaddr addr, uint64_t val, - unsigned int size) +static MemTxResult nubus_slot_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size, MemTxAttrs attrs) { - /* read only */ + trace_nubus_slot_write(addr, val, size); + return MEMTX_DECODE_ERROR; } - -static uint64_t nubus_slot_read(void *opaque, hwaddr addr, - unsigned int size) +static MemTxResult nubus_slot_read(void *opaque, hwaddr addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) { - return 0; + trace_nubus_slot_read(addr, size); + return MEMTX_DECODE_ERROR; } static const MemoryRegionOps nubus_slot_ops = { - .read = nubus_slot_read, - .write = nubus_slot_write, + .read_with_attrs = nubus_slot_read, + .write_with_attrs = nubus_slot_write, .endianness = DEVICE_BIG_ENDIAN, .valid = { .min_access_size = 1, - .max_access_size = 1, + .max_access_size = 4, }, }; -static void nubus_super_slot_write(void *opaque, hwaddr addr, uint64_t val, - unsigned int size) +static MemTxResult nubus_super_slot_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size, + MemTxAttrs attrs) { - /* read only */ + trace_nubus_super_slot_write(addr, val, size); + return MEMTX_DECODE_ERROR; } -static uint64_t nubus_super_slot_read(void *opaque, hwaddr addr, - unsigned int size) +static MemTxResult nubus_super_slot_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) { - return 0; + trace_nubus_super_slot_read(addr, size); + return MEMTX_DECODE_ERROR; } static const MemoryRegionOps nubus_super_slot_ops = { - .read = nubus_super_slot_read, - .write = nubus_super_slot_write, + .read_with_attrs = nubus_super_slot_read, + .write_with_attrs = nubus_super_slot_write, .endianness = DEVICE_BIG_ENDIAN, .valid = { .min_access_size = 1, - .max_access_size = 1, + .max_access_size = 4, }, }; +static void nubus_unrealize(BusState *bus) +{ + NubusBus *nubus = NUBUS_BUS(bus); + + address_space_destroy(&nubus->nubus_as); +} + static void nubus_realize(BusState *bus, Error **errp) { + NubusBus *nubus = NUBUS_BUS(bus); + if (!nubus_find()) { error_setg(errp, "at most one %s device is permitted", TYPE_NUBUS_BUS); return; } + + address_space_init(&nubus->nubus_as, &nubus->nubus_mr, "nubus"); } static void nubus_init(Object *obj) { NubusBus *nubus = NUBUS_BUS(obj); + memory_region_init(&nubus->nubus_mr, obj, "nubus", 0x100000000); + memory_region_init_io(&nubus->super_slot_io, obj, &nubus_super_slot_ops, nubus, "nubus-super-slots", - NUBUS_SUPER_SLOT_NB * NUBUS_SUPER_SLOT_SIZE); + (NUBUS_SUPER_SLOT_NB + 1) * NUBUS_SUPER_SLOT_SIZE); + memory_region_add_subregion(&nubus->nubus_mr, 0x0, &nubus->super_slot_io); memory_region_init_io(&nubus->slot_io, obj, &nubus_slot_ops, nubus, "nubus-slots", NUBUS_SLOT_NB * NUBUS_SLOT_SIZE); + memory_region_add_subregion(&nubus->nubus_mr, + (NUBUS_SUPER_SLOT_NB + 1) * + NUBUS_SUPER_SLOT_SIZE, &nubus->slot_io); + + nubus->slot_available_mask = MAKE_64BIT_MASK(NUBUS_FIRST_SLOT, + NUBUS_SLOT_NB); +} + +static char *nubus_get_dev_path(DeviceState *dev) +{ + NubusDevice *nd = NUBUS_DEVICE(dev); + BusState *bus = qdev_get_parent_bus(dev); + char *p = qdev_get_dev_path(bus->parent); + + if (p) { + char *ret = g_strdup_printf("%s/%s/%02x", p, bus->name, nd->slot); + g_free(p); + return ret; + } else { + return g_strdup_printf("%s/%02x", bus->name, nd->slot); + } +} + +static bool nubus_check_address(BusState *bus, DeviceState *dev, Error **errp) +{ + NubusDevice *nd = NUBUS_DEVICE(dev); + NubusBus *nubus = NUBUS_BUS(bus); + + if (nd->slot == -1) { + /* No slot specified, find first available free slot */ + int s = ctz32(nubus->slot_available_mask); + if (s != 32) { + nd->slot = s; + } else { + error_setg(errp, "Cannot register nubus card, no free slot " + "available"); + return false; + } + } else { + /* Slot specified, make sure the slot is available */ + if (!(nubus->slot_available_mask & BIT(nd->slot))) { + error_setg(errp, "Cannot register nubus card, slot %d is " + "unavailable or already occupied", nd->slot); + return false; + } + } - nubus->current_slot = NUBUS_FIRST_SLOT; + nubus->slot_available_mask &= ~BIT(nd->slot); + return true; } static void nubus_class_init(ObjectClass *oc, void *data) @@ -93,6 +167,9 @@ static void nubus_class_init(ObjectClass *oc, void *data) BusClass *bc = BUS_CLASS(oc); bc->realize = nubus_realize; + bc->unrealize = nubus_unrealize; + bc->check_address = nubus_check_address; + bc->get_dev_path = nubus_get_dev_path; } static const TypeInfo nubus_bus_info = { diff --git a/hw/nubus/nubus-device.c b/hw/nubus/nubus-device.c index ffe78a88231..0f1852f671e 100644 --- a/hw/nubus/nubus-device.c +++ b/hw/nubus/nubus-device.c @@ -9,194 +9,99 @@ */ #include "qemu/osdep.h" +#include "qemu/datadir.h" +#include "hw/irq.h" +#include "hw/loader.h" #include "hw/nubus/nubus.h" #include "qapi/error.h" +#include "qemu/error-report.h" -/* The Format Block Structure */ - -#define FBLOCK_DIRECTORY_OFFSET 0 -#define FBLOCK_LENGTH 4 -#define FBLOCK_CRC 8 -#define FBLOCK_REVISION_LEVEL 12 -#define FBLOCK_FORMAT 13 -#define FBLOCK_TEST_PATTERN 14 -#define FBLOCK_RESERVED 18 -#define FBLOCK_BYTE_LANES 19 - -#define FBLOCK_SIZE 20 -#define FBLOCK_PATTERN_VAL 0x5a932bc7 - -static uint64_t nubus_fblock_read(void *opaque, hwaddr addr, unsigned int size) -{ - NubusDevice *dev = opaque; - uint64_t val; - -#define BYTE(v, b) (((v) >> (24 - 8 * (b))) & 0xff) - switch (addr) { - case FBLOCK_BYTE_LANES: - val = dev->byte_lanes; - val |= (val ^ 0xf) << 4; - break; - case FBLOCK_RESERVED: - val = 0x00; - break; - case FBLOCK_TEST_PATTERN...FBLOCK_TEST_PATTERN + 3: - val = BYTE(FBLOCK_PATTERN_VAL, addr - FBLOCK_TEST_PATTERN); - break; - case FBLOCK_FORMAT: - val = dev->rom_format; - break; - case FBLOCK_REVISION_LEVEL: - val = dev->rom_rev; - break; - case FBLOCK_CRC...FBLOCK_CRC + 3: - val = BYTE(dev->rom_crc, addr - FBLOCK_CRC); - break; - case FBLOCK_LENGTH...FBLOCK_LENGTH + 3: - val = BYTE(dev->rom_length, addr - FBLOCK_LENGTH); - break; - case FBLOCK_DIRECTORY_OFFSET...FBLOCK_DIRECTORY_OFFSET + 3: - val = BYTE(dev->directory_offset, addr - FBLOCK_DIRECTORY_OFFSET); - break; - default: - val = 0; - break; - } - return val; -} - -static void nubus_fblock_write(void *opaque, hwaddr addr, uint64_t val, - unsigned int size) -{ - /* read only */ -} - -static const MemoryRegionOps nubus_format_block_ops = { - .read = nubus_fblock_read, - .write = nubus_fblock_write, - .endianness = DEVICE_BIG_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 1, - } -}; - -static void nubus_register_format_block(NubusDevice *dev) -{ - char *fblock_name; - - fblock_name = g_strdup_printf("nubus-slot-%d-format-block", - dev->slot_nb); - - hwaddr fblock_offset = memory_region_size(&dev->slot_mem) - FBLOCK_SIZE; - memory_region_init_io(&dev->fblock_io, NULL, &nubus_format_block_ops, - dev, fblock_name, FBLOCK_SIZE); - memory_region_add_subregion(&dev->slot_mem, fblock_offset, - &dev->fblock_io); - - g_free(fblock_name); -} - -static void mac_nubus_rom_write(void *opaque, hwaddr addr, uint64_t val, - unsigned int size) -{ - /* read only */ -} - -static uint64_t mac_nubus_rom_read(void *opaque, hwaddr addr, - unsigned int size) +void nubus_set_irq(NubusDevice *nd, int level) { - NubusDevice *dev = opaque; + NubusBus *nubus = NUBUS_BUS(qdev_get_parent_bus(DEVICE(nd))); - return dev->rom[addr]; -} - -static const MemoryRegionOps mac_nubus_rom_ops = { - .read = mac_nubus_rom_read, - .write = mac_nubus_rom_write, - .endianness = DEVICE_BIG_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 1, - }, -}; - - -void nubus_register_rom(NubusDevice *dev, const uint8_t *rom, uint32_t size, - int revision, int format, uint8_t byte_lanes) -{ - hwaddr rom_offset; - char *rom_name; - - /* FIXME : really compute CRC */ - dev->rom_length = 0; - dev->rom_crc = 0; - - dev->rom_rev = revision; - dev->rom_format = format; - - dev->byte_lanes = byte_lanes; - dev->directory_offset = -size; - - /* ROM */ - - dev->rom = rom; - rom_name = g_strdup_printf("nubus-slot-%d-rom", dev->slot_nb); - memory_region_init_io(&dev->rom_io, NULL, &mac_nubus_rom_ops, - dev, rom_name, size); - memory_region_set_readonly(&dev->rom_io, true); - - rom_offset = memory_region_size(&dev->slot_mem) - FBLOCK_SIZE + - dev->directory_offset; - memory_region_add_subregion(&dev->slot_mem, rom_offset, &dev->rom_io); - - g_free(rom_name); + qemu_set_irq(nubus->irqs[nd->slot], level); } static void nubus_device_realize(DeviceState *dev, Error **errp) { NubusBus *nubus = NUBUS_BUS(qdev_get_parent_bus(dev)); NubusDevice *nd = NUBUS_DEVICE(dev); - char *name; + char *name, *path; hwaddr slot_offset; + int64_t size; + int ret; - if (nubus->current_slot < NUBUS_FIRST_SLOT || - nubus->current_slot > NUBUS_LAST_SLOT) { - error_setg(errp, "Cannot register nubus card, not enough slots"); - return; - } + /* Super */ + slot_offset = nd->slot * NUBUS_SUPER_SLOT_SIZE; - nd->slot_nb = nubus->current_slot++; - name = g_strdup_printf("nubus-slot-%d", nd->slot_nb); + name = g_strdup_printf("nubus-super-slot-%x", nd->slot); + memory_region_init(&nd->super_slot_mem, OBJECT(dev), name, + NUBUS_SUPER_SLOT_SIZE); + memory_region_add_subregion(&nubus->super_slot_io, slot_offset, + &nd->super_slot_mem); + g_free(name); - if (nd->slot_nb < NUBUS_FIRST_SLOT) { - /* Super */ - slot_offset = (nd->slot_nb - 6) * NUBUS_SUPER_SLOT_SIZE; + /* Normal */ + slot_offset = nd->slot * NUBUS_SLOT_SIZE; - memory_region_init(&nd->slot_mem, OBJECT(dev), name, - NUBUS_SUPER_SLOT_SIZE); - memory_region_add_subregion(&nubus->super_slot_io, slot_offset, - &nd->slot_mem); - } else { - /* Normal */ - slot_offset = nd->slot_nb * NUBUS_SLOT_SIZE; + name = g_strdup_printf("nubus-slot-%x", nd->slot); + memory_region_init(&nd->slot_mem, OBJECT(dev), name, NUBUS_SLOT_SIZE); + memory_region_add_subregion(&nubus->slot_io, slot_offset, + &nd->slot_mem); + g_free(name); - memory_region_init(&nd->slot_mem, OBJECT(dev), name, NUBUS_SLOT_SIZE); - memory_region_add_subregion(&nubus->slot_io, slot_offset, - &nd->slot_mem); + /* Declaration ROM */ + if (nd->romfile != NULL) { + path = qemu_find_file(QEMU_FILE_TYPE_BIOS, nd->romfile); + if (path == NULL) { + path = g_strdup(nd->romfile); + } + + size = get_image_size(path); + if (size < 0) { + error_setg(errp, "failed to find romfile \"%s\"", nd->romfile); + g_free(path); + return; + } else if (size == 0) { + error_setg(errp, "romfile \"%s\" is empty", nd->romfile); + g_free(path); + return; + } else if (size > NUBUS_DECL_ROM_MAX_SIZE) { + error_setg(errp, "romfile \"%s\" too large (maximum size 128K)", + nd->romfile); + g_free(path); + return; + } + + name = g_strdup_printf("nubus-slot-%x-declaration-rom", nd->slot); + memory_region_init_rom(&nd->decl_rom, OBJECT(dev), name, size, + &error_abort); + ret = load_image_mr(path, &nd->decl_rom); + g_free(path); + if (ret < 0) { + error_setg(errp, "could not load romfile \"%s\"", nd->romfile); + return; + } + memory_region_add_subregion(&nd->slot_mem, NUBUS_SLOT_SIZE - size, + &nd->decl_rom); } - - g_free(name); - nubus_register_format_block(nd); } +static Property nubus_device_properties[] = { + DEFINE_PROP_INT32("slot", NubusDevice, slot, -1), + DEFINE_PROP_STRING("romfile", NubusDevice, romfile), + DEFINE_PROP_END_OF_LIST() +}; + static void nubus_device_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = nubus_device_realize; dc->bus_type = TYPE_NUBUS_BUS; + device_class_set_props(dc, nubus_device_properties); } static const TypeInfo nubus_device_type_info = { diff --git a/hw/nubus/trace-events b/hw/nubus/trace-events new file mode 100644 index 00000000000..e31833d694a --- /dev/null +++ b/hw/nubus/trace-events @@ -0,0 +1,7 @@ +# See docs/devel/tracing.txt for syntax documentation. + +# nubus-bus.c +nubus_slot_read(uint64_t addr, int size) "reading unassigned addr 0x%"PRIx64 " size %d" +nubus_slot_write(uint64_t addr, uint64_t val, int size) "writing unassigned addr 0x%"PRIx64 " value 0x%"PRIx64 " size %d" +nubus_super_slot_read(uint64_t addr, int size) "reading unassigned addr 0x%"PRIx64 " size %d" +nubus_super_slot_write(uint64_t addr, uint64_t val, int size) "writing unassigned addr 0x%"PRIx64 " value 0x%"PRIx64 " size %d" diff --git a/hw/nubus/trace.h b/hw/nubus/trace.h new file mode 100644 index 00000000000..3749420da17 --- /dev/null +++ b/hw/nubus/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_nubus.h" diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig new file mode 100644 index 00000000000..8ac90942e55 --- /dev/null +++ b/hw/nvme/Kconfig @@ -0,0 +1,4 @@ +config NVME_PCI + bool + default y if PCI_DEVICES + depends on PCI diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c new file mode 100644 index 00000000000..5f573c417b3 --- /dev/null +++ b/hw/nvme/ctrl.c @@ -0,0 +1,6731 @@ +/* + * QEMU NVM Express Controller + * + * Copyright (c) 2012, Intel Corporation + * + * Written by Keith Busch + * + * This code is licensed under the GNU GPL v2 or later. + */ + +/** + * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e + * + * https://nvmexpress.org/developers/nvme-specification/ + * + * + * Notes on coding style + * --------------------- + * While QEMU coding style prefers lowercase hexadecimals in constants, the + * NVMe subsystem use thes format from the NVMe specifications in the comments + * (i.e. 'h' suffix instead of '0x' prefix). + * + * Usage + * ----- + * See docs/system/nvme.rst for extensive documentation. + * + * Add options: + * -drive file=,if=none,id= + * -device nvme-subsys,id=,nqn= + * -device nvme,serial=,id=, \ + * cmb_size_mb=, \ + * [pmrdev=,] \ + * max_ioqpairs=, \ + * aerl=,aer_max_queued=, \ + * mdts=,vsl=, \ + * zoned.zasl=, \ + * zoned.auto_transition=, \ + * subsys= + * -device nvme-ns,drive=,bus=,nsid=,\ + * zoned=, \ + * subsys=,detached= + * + * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at + * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the + * device will use the "v1.4 CMB scheme" - use the `legacy-cmb` parameter to + * always enable the CMBLOC and CMBSZ registers (v1.3 behavior). + * + * Enabling pmr emulation can be achieved by pointing to memory-backend-file. + * For example: + * -object memory-backend-file,id=,share=on,mem-path=, \ + * size= .... -device nvme,...,pmrdev= + * + * The PMR will use BAR 4/5 exclusively. + * + * To place controller(s) and namespace(s) to a subsystem, then provide + * nvme-subsys device as above. + * + * nvme subsystem device parameters + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * - `nqn` + * This parameter provides the `` part of the string + * `nqn.2019-08.org.qemu:` which will be reported in the SUBNQN field + * of subsystem controllers. Note that `` should be unique per + * subsystem, but this is not enforced by QEMU. If not specified, it will + * default to the value of the `id` parameter (``). + * + * nvme device parameters + * ~~~~~~~~~~~~~~~~~~~~~~ + * - `subsys` + * Specifying this parameter attaches the controller to the subsystem and + * the SUBNQN field in the controller will report the NQN of the subsystem + * device. This also enables multi controller capability represented in + * Identify Controller data structure in CMIC (Controller Multi-path I/O and + * Namesapce Sharing Capabilities). + * + * - `aerl` + * The Asynchronous Event Request Limit (AERL). Indicates the maximum number + * of concurrently outstanding Asynchronous Event Request commands support + * by the controller. This is a 0's based value. + * + * - `aer_max_queued` + * This is the maximum number of events that the device will enqueue for + * completion when there are no outstanding AERs. When the maximum number of + * enqueued events are reached, subsequent events will be dropped. + * + * - `mdts` + * Indicates the maximum data transfer size for a command that transfers data + * between host-accessible memory and the controller. The value is specified + * as a power of two (2^n) and is in units of the minimum memory page size + * (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB). + * + * - `vsl` + * Indicates the maximum data size limit for the Verify command. Like `mdts`, + * this value is specified as a power of two (2^n) and is in units of the + * minimum memory page size (CAP.MPSMIN). The default value is 7 (i.e. 512 + * KiB). + * + * - `zoned.zasl` + * Indicates the maximum data transfer size for the Zone Append command. Like + * `mdts`, the value is specified as a power of two (2^n) and is in units of + * the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e. + * defaulting to the value of `mdts`). + * + * - `zoned.auto_transition` + * Indicates if zones in zone state implicitly opened can be automatically + * transitioned to zone state closed for resource management purposes. + * Defaults to 'on'. + * + * nvme namespace device parameters + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * - `shared` + * When the parent nvme device (as defined explicitly by the 'bus' parameter + * or implicitly by the most recently defined NvmeBus) is linked to an + * nvme-subsys device, the namespace will be attached to all controllers in + * the subsystem. If set to 'off' (the default), the namespace will remain a + * private namespace and may only be attached to a single controller at a + * time. + * + * - `detached` + * This parameter is only valid together with the `subsys` parameter. If left + * at the default value (`false/off`), the namespace will be attached to all + * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the + * namespace will be available in the subsystem but not attached to any + * controllers. + * + * Setting `zoned` to true selects Zoned Command Set at the namespace. + * In this case, the following namespace properties are available to configure + * zoned operation: + * zoned.zone_size= + * The number may be followed by K, M, G as in kilo-, mega- or giga-. + * + * zoned.zone_capacity= + * The value 0 (default) forces zone capacity to be the same as zone + * size. The value of this property may not exceed zone size. + * + * zoned.descr_ext_size= + * This value needs to be specified in 64B units. If it is zero, + * namespace(s) will not support zone descriptor extensions. + * + * zoned.max_active= + * The default value means there is no limit to the number of + * concurrently active zones. + * + * zoned.max_open= + * The default value means there is no limit to the number of + * concurrently open zones. + * + * zoned.cross_read= + * Setting this property to true enables Read Across Zone Boundaries. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "sysemu/sysemu.h" +#include "sysemu/block-backend.h" +#include "sysemu/hostmem.h" +#include "hw/pci/msix.h" +#include "migration/vmstate.h" + +#include "nvme.h" +#include "trace.h" + +#define NVME_MAX_IOQPAIRS 0xffff +#define NVME_DB_SIZE 4 +#define NVME_SPEC_VER 0x00010400 +#define NVME_CMB_BIR 2 +#define NVME_PMR_BIR 4 +#define NVME_TEMPERATURE 0x143 +#define NVME_TEMPERATURE_WARNING 0x157 +#define NVME_TEMPERATURE_CRITICAL 0x175 +#define NVME_NUM_FW_SLOTS 1 +#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) + +#define NVME_GUEST_ERR(trace, fmt, ...) \ + do { \ + (trace_##trace)(__VA_ARGS__); \ + qemu_log_mask(LOG_GUEST_ERROR, #trace \ + " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ + } while (0) + +static const bool nvme_feature_support[NVME_FID_MAX] = { + [NVME_ARBITRATION] = true, + [NVME_POWER_MANAGEMENT] = true, + [NVME_TEMPERATURE_THRESHOLD] = true, + [NVME_ERROR_RECOVERY] = true, + [NVME_VOLATILE_WRITE_CACHE] = true, + [NVME_NUMBER_OF_QUEUES] = true, + [NVME_INTERRUPT_COALESCING] = true, + [NVME_INTERRUPT_VECTOR_CONF] = true, + [NVME_WRITE_ATOMICITY] = true, + [NVME_ASYNCHRONOUS_EVENT_CONF] = true, + [NVME_TIMESTAMP] = true, + [NVME_COMMAND_SET_PROFILE] = true, +}; + +static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { + [NVME_TEMPERATURE_THRESHOLD] = NVME_FEAT_CAP_CHANGE, + [NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, + [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE, + [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, + [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, + [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, + [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, +}; + +static const uint32_t nvme_cse_acs[256] = { + [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_DELETE_CQ] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_CREATE_CQ] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_ABORT] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, + [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, +}; + +static const uint32_t nvme_cse_iocs_none[256]; + +static const uint32_t nvme_cse_iocs_nvm[256] = { + [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, +}; + +static const uint32_t nvme_cse_iocs_zoned[256] = { + [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, +}; + +static void nvme_process_sq(void *opaque); + +static uint16_t nvme_sqid(NvmeRequest *req) +{ + return le16_to_cpu(req->sq->sqid); +} + +static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone, + NvmeZoneState state) +{ + if (QTAILQ_IN_USE(zone, entry)) { + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); + break; + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); + break; + case NVME_ZONE_STATE_CLOSED: + QTAILQ_REMOVE(&ns->closed_zones, zone, entry); + break; + case NVME_ZONE_STATE_FULL: + QTAILQ_REMOVE(&ns->full_zones, zone, entry); + default: + ; + } + } + + nvme_set_zone_state(zone, state); + + switch (state) { + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + QTAILQ_INSERT_TAIL(&ns->exp_open_zones, zone, entry); + break; + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + QTAILQ_INSERT_TAIL(&ns->imp_open_zones, zone, entry); + break; + case NVME_ZONE_STATE_CLOSED: + QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry); + break; + case NVME_ZONE_STATE_FULL: + QTAILQ_INSERT_TAIL(&ns->full_zones, zone, entry); + case NVME_ZONE_STATE_READ_ONLY: + break; + default: + zone->d.za = 0; + } +} + +/* + * Check if we can open a zone without exceeding open/active limits. + * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5). + */ +static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) +{ + if (ns->params.max_active_zones != 0 && + ns->nr_active_zones + act > ns->params.max_active_zones) { + trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones); + return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR; + } + if (ns->params.max_open_zones != 0 && + ns->nr_open_zones + opn > ns->params.max_open_zones) { + trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones); + return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) +{ + hwaddr hi, lo; + + if (!n->cmb.cmse) { + return false; + } + + lo = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba; + hi = lo + int128_get64(n->cmb.mem.size); + + return addr >= lo && addr < hi; +} + +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr) +{ + hwaddr base = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba; + return &n->cmb.buf[addr - base]; +} + +static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr) +{ + hwaddr hi; + + if (!n->pmr.cmse) { + return false; + } + + hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size); + + return addr >= n->pmr.cba && addr < hi; +} + +static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr) +{ + return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba); +} + +static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) +{ + hwaddr hi = addr + size - 1; + if (hi < addr) { + return 1; + } + + if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) { + memcpy(buf, nvme_addr_to_cmb(n, addr), size); + return 0; + } + + if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) { + memcpy(buf, nvme_addr_to_pmr(n, addr), size); + return 0; + } + + return pci_dma_read(&n->parent_obj, addr, buf, size); +} + +static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) +{ + hwaddr hi = addr + size - 1; + if (hi < addr) { + return 1; + } + + if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) { + memcpy(nvme_addr_to_cmb(n, addr), buf, size); + return 0; + } + + if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) { + memcpy(nvme_addr_to_pmr(n, addr), buf, size); + return 0; + } + + return pci_dma_write(&n->parent_obj, addr, buf, size); +} + +static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) +{ + return nsid && + (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES); +} + +static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) +{ + return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; +} + +static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) +{ + return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; +} + +static void nvme_inc_cq_tail(NvmeCQueue *cq) +{ + cq->tail++; + if (cq->tail >= cq->size) { + cq->tail = 0; + cq->phase = !cq->phase; + } +} + +static void nvme_inc_sq_head(NvmeSQueue *sq) +{ + sq->head = (sq->head + 1) % sq->size; +} + +static uint8_t nvme_cq_full(NvmeCQueue *cq) +{ + return (cq->tail + 1) % cq->size == cq->head; +} + +static uint8_t nvme_sq_empty(NvmeSQueue *sq) +{ + return sq->head == sq->tail; +} + +static void nvme_irq_check(NvmeCtrl *n) +{ + uint32_t intms = ldl_le_p(&n->bar.intms); + + if (msix_enabled(&(n->parent_obj))) { + return; + } + if (~intms & n->irq_status) { + pci_irq_assert(&n->parent_obj); + } else { + pci_irq_deassert(&n->parent_obj); + } +} + +static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) +{ + if (cq->irq_enabled) { + if (msix_enabled(&(n->parent_obj))) { + trace_pci_nvme_irq_msix(cq->vector); + msix_notify(&(n->parent_obj), cq->vector); + } else { + trace_pci_nvme_irq_pin(); + assert(cq->vector < 32); + n->irq_status |= 1 << cq->vector; + nvme_irq_check(n); + } + } else { + trace_pci_nvme_irq_masked(); + } +} + +static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) +{ + if (cq->irq_enabled) { + if (msix_enabled(&(n->parent_obj))) { + return; + } else { + assert(cq->vector < 32); + if (!n->cq_pending) { + n->irq_status &= ~(1 << cq->vector); + } + nvme_irq_check(n); + } + } +} + +static void nvme_req_clear(NvmeRequest *req) +{ + req->ns = NULL; + req->opaque = NULL; + req->aiocb = NULL; + memset(&req->cqe, 0x0, sizeof(req->cqe)); + req->status = NVME_SUCCESS; +} + +static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma) +{ + if (dma) { + pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0); + sg->flags = NVME_SG_DMA; + } else { + qemu_iovec_init(&sg->iov, 0); + } + + sg->flags |= NVME_SG_ALLOC; +} + +static inline void nvme_sg_unmap(NvmeSg *sg) +{ + if (!(sg->flags & NVME_SG_ALLOC)) { + return; + } + + if (sg->flags & NVME_SG_DMA) { + qemu_sglist_destroy(&sg->qsg); + } else { + qemu_iovec_destroy(&sg->iov); + } + + memset(sg, 0x0, sizeof(*sg)); +} + +/* + * When metadata is transfered as extended LBAs, the DPTR mapped into `sg` + * holds both data and metadata. This function splits the data and metadata + * into two separate QSG/IOVs. + */ +static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data, + NvmeSg *mdata) +{ + NvmeSg *dst = data; + uint32_t trans_len, count = ns->lbasz; + uint64_t offset = 0; + bool dma = sg->flags & NVME_SG_DMA; + size_t sge_len; + size_t sg_len = dma ? sg->qsg.size : sg->iov.size; + int sg_idx = 0; + + assert(sg->flags & NVME_SG_ALLOC); + + while (sg_len) { + sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len; + + trans_len = MIN(sg_len, count); + trans_len = MIN(trans_len, sge_len - offset); + + if (dst) { + if (dma) { + qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset, + trans_len); + } else { + qemu_iovec_add(&dst->iov, + sg->iov.iov[sg_idx].iov_base + offset, + trans_len); + } + } + + sg_len -= trans_len; + count -= trans_len; + offset += trans_len; + + if (count == 0) { + dst = (dst == data) ? mdata : data; + count = (dst == data) ? ns->lbasz : ns->lbaf.ms; + } + + if (sge_len == offset) { + offset = 0; + sg_idx++; + } + } +} + +static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, + size_t len) +{ + if (!len) { + return NVME_SUCCESS; + } + + trace_pci_nvme_map_addr_cmb(addr, len); + + if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) { + return NVME_DATA_TRAS_ERROR; + } + + qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len); + + return NVME_SUCCESS; +} + +static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, + size_t len) +{ + if (!len) { + return NVME_SUCCESS; + } + + if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) { + return NVME_DATA_TRAS_ERROR; + } + + qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len); + + return NVME_SUCCESS; +} + +static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len) +{ + bool cmb = false, pmr = false; + + if (!len) { + return NVME_SUCCESS; + } + + trace_pci_nvme_map_addr(addr, len); + + if (nvme_addr_is_cmb(n, addr)) { + cmb = true; + } else if (nvme_addr_is_pmr(n, addr)) { + pmr = true; + } + + if (cmb || pmr) { + if (sg->flags & NVME_SG_DMA) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + if (sg->iov.niov + 1 > IOV_MAX) { + goto max_mappings_exceeded; + } + + if (cmb) { + return nvme_map_addr_cmb(n, &sg->iov, addr, len); + } else { + return nvme_map_addr_pmr(n, &sg->iov, addr, len); + } + } + + if (!(sg->flags & NVME_SG_DMA)) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + if (sg->qsg.nsg + 1 > IOV_MAX) { + goto max_mappings_exceeded; + } + + qemu_sglist_add(&sg->qsg, addr, len); + + return NVME_SUCCESS; + +max_mappings_exceeded: + NVME_GUEST_ERR(pci_nvme_ub_too_many_mappings, + "number of mappings exceed 1024"); + return NVME_INTERNAL_DEV_ERROR | NVME_DNR; +} + +static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr) +{ + return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr)); +} + +static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1, + uint64_t prp2, uint32_t len) +{ + hwaddr trans_len = n->page_size - (prp1 % n->page_size); + trans_len = MIN(len, trans_len); + int num_prps = (len >> n->page_bits) + 1; + uint16_t status; + int ret; + + trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps); + + nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1)); + + status = nvme_map_addr(n, sg, prp1, trans_len); + if (status) { + goto unmap; + } + + len -= trans_len; + if (len) { + if (len > n->page_size) { + uint64_t prp_list[n->max_prp_ents]; + uint32_t nents, prp_trans; + int i = 0; + + /* + * The first PRP list entry, pointed to by PRP2 may contain offset. + * Hence, we need to calculate the number of entries in based on + * that offset. + */ + nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3; + prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); + ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); + if (ret) { + trace_pci_nvme_err_addr_read(prp2); + status = NVME_DATA_TRAS_ERROR; + goto unmap; + } + while (len != 0) { + uint64_t prp_ent = le64_to_cpu(prp_list[i]); + + if (i == nents - 1 && len > n->page_size) { + if (unlikely(prp_ent & (n->page_size - 1))) { + trace_pci_nvme_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_PRP_OFFSET | NVME_DNR; + goto unmap; + } + + i = 0; + nents = (len + n->page_size - 1) >> n->page_bits; + nents = MIN(nents, n->max_prp_ents); + prp_trans = nents * sizeof(uint64_t); + ret = nvme_addr_read(n, prp_ent, (void *)prp_list, + prp_trans); + if (ret) { + trace_pci_nvme_err_addr_read(prp_ent); + status = NVME_DATA_TRAS_ERROR; + goto unmap; + } + prp_ent = le64_to_cpu(prp_list[i]); + } + + if (unlikely(prp_ent & (n->page_size - 1))) { + trace_pci_nvme_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_PRP_OFFSET | NVME_DNR; + goto unmap; + } + + trans_len = MIN(len, n->page_size); + status = nvme_map_addr(n, sg, prp_ent, trans_len); + if (status) { + goto unmap; + } + + len -= trans_len; + i++; + } + } else { + if (unlikely(prp2 & (n->page_size - 1))) { + trace_pci_nvme_err_invalid_prp2_align(prp2); + status = NVME_INVALID_PRP_OFFSET | NVME_DNR; + goto unmap; + } + status = nvme_map_addr(n, sg, prp2, len); + if (status) { + goto unmap; + } + } + } + + return NVME_SUCCESS; + +unmap: + nvme_sg_unmap(sg); + return status; +} + +/* + * Map 'nsgld' data descriptors from 'segment'. The function will subtract the + * number of bytes mapped in len. + */ +static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg, + NvmeSglDescriptor *segment, uint64_t nsgld, + size_t *len, NvmeCmd *cmd) +{ + dma_addr_t addr, trans_len; + uint32_t dlen; + uint16_t status; + + for (int i = 0; i < nsgld; i++) { + uint8_t type = NVME_SGL_TYPE(segment[i].type); + + switch (type) { + case NVME_SGL_DESCR_TYPE_BIT_BUCKET: + if (cmd->opcode == NVME_CMD_WRITE) { + continue; + } + case NVME_SGL_DESCR_TYPE_DATA_BLOCK: + break; + case NVME_SGL_DESCR_TYPE_SEGMENT: + case NVME_SGL_DESCR_TYPE_LAST_SEGMENT: + return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR; + default: + return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR; + } + + dlen = le32_to_cpu(segment[i].len); + + if (!dlen) { + continue; + } + + if (*len == 0) { + /* + * All data has been mapped, but the SGL contains additional + * segments and/or descriptors. The controller might accept + * ignoring the rest of the SGL. + */ + uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls); + if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) { + break; + } + + trace_pci_nvme_err_invalid_sgl_excess_length(dlen); + return NVME_DATA_SGL_LEN_INVALID | NVME_DNR; + } + + trans_len = MIN(*len, dlen); + + if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) { + goto next; + } + + addr = le64_to_cpu(segment[i].addr); + + if (UINT64_MAX - addr < dlen) { + return NVME_DATA_SGL_LEN_INVALID | NVME_DNR; + } + + status = nvme_map_addr(n, sg, addr, trans_len); + if (status) { + return status; + } + +next: + *len -= trans_len; + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, + size_t len, NvmeCmd *cmd) +{ + /* + * Read the segment in chunks of 256 descriptors (one 4k page) to avoid + * dynamically allocating a potentially huge SGL. The spec allows the SGL + * to be larger (as in number of bytes required to describe the SGL + * descriptors and segment chain) than the command transfer size, so it is + * not bounded by MDTS. + */ + const int SEG_CHUNK_SIZE = 256; + + NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld; + uint64_t nsgld; + uint32_t seg_len; + uint16_t status; + hwaddr addr; + int ret; + + sgld = &sgl; + addr = le64_to_cpu(sgl.addr); + + trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len); + + nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr)); + + /* + * If the entire transfer can be described with a single data block it can + * be mapped directly. + */ + if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) { + status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd); + if (status) { + goto unmap; + } + + goto out; + } + + for (;;) { + switch (NVME_SGL_TYPE(sgld->type)) { + case NVME_SGL_DESCR_TYPE_SEGMENT: + case NVME_SGL_DESCR_TYPE_LAST_SEGMENT: + break; + default: + return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; + } + + seg_len = le32_to_cpu(sgld->len); + + /* check the length of the (Last) Segment descriptor */ + if ((!seg_len || seg_len & 0xf) && + (NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) { + return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; + } + + if (UINT64_MAX - addr < seg_len) { + return NVME_DATA_SGL_LEN_INVALID | NVME_DNR; + } + + nsgld = seg_len / sizeof(NvmeSglDescriptor); + + while (nsgld > SEG_CHUNK_SIZE) { + if (nvme_addr_read(n, addr, segment, sizeof(segment))) { + trace_pci_nvme_err_addr_read(addr); + status = NVME_DATA_TRAS_ERROR; + goto unmap; + } + + status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE, + &len, cmd); + if (status) { + goto unmap; + } + + nsgld -= SEG_CHUNK_SIZE; + addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor); + } + + ret = nvme_addr_read(n, addr, segment, nsgld * + sizeof(NvmeSglDescriptor)); + if (ret) { + trace_pci_nvme_err_addr_read(addr); + status = NVME_DATA_TRAS_ERROR; + goto unmap; + } + + last_sgld = &segment[nsgld - 1]; + + /* + * If the segment ends with a Data Block or Bit Bucket Descriptor Type, + * then we are done. + */ + switch (NVME_SGL_TYPE(last_sgld->type)) { + case NVME_SGL_DESCR_TYPE_DATA_BLOCK: + case NVME_SGL_DESCR_TYPE_BIT_BUCKET: + status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd); + if (status) { + goto unmap; + } + + goto out; + + default: + break; + } + + /* + * If the last descriptor was not a Data Block or Bit Bucket, then the + * current segment must not be a Last Segment. + */ + if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) { + status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; + goto unmap; + } + + sgld = last_sgld; + addr = le64_to_cpu(sgld->addr); + + /* + * Do not map the last descriptor; it will be a Segment or Last Segment + * descriptor and is handled by the next iteration. + */ + status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd); + if (status) { + goto unmap; + } + } + +out: + /* if there is any residual left in len, the SGL was too short */ + if (len) { + status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR; + goto unmap; + } + + return NVME_SUCCESS; + +unmap: + nvme_sg_unmap(sg); + return status; +} + +uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd) +{ + uint64_t prp1, prp2; + + switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + case NVME_PSDT_PRP: + prp1 = le64_to_cpu(cmd->dptr.prp1); + prp2 = le64_to_cpu(cmd->dptr.prp2); + + return nvme_map_prp(n, sg, prp1, prp2, len); + case NVME_PSDT_SGL_MPTR_CONTIGUOUS: + case NVME_PSDT_SGL_MPTR_SGL: + return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd); + default: + return NVME_INVALID_FIELD; + } +} + +static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd) +{ + int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags); + hwaddr mptr = le64_to_cpu(cmd->mptr); + uint16_t status; + + if (psdt == NVME_PSDT_SGL_MPTR_SGL) { + NvmeSglDescriptor sgl; + + if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) { + return NVME_DATA_TRAS_ERROR; + } + + status = nvme_map_sgl(n, sg, sgl, len, cmd); + if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) { + status = NVME_MD_SGL_LEN_INVALID | NVME_DNR; + } + + return status; + } + + nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr)); + status = nvme_map_addr(n, sg, mptr, len); + if (status) { + nvme_sg_unmap(sg); + } + + return status; +} + +static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps); + bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT); + size_t len = nvme_l2b(ns, nlb); + uint16_t status; + + if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { + NvmeSg sg; + + len += nvme_m2b(ns, nlb); + + status = nvme_map_dptr(n, &sg, len, &req->cmd); + if (status) { + return status; + } + + nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA); + nvme_sg_split(&sg, ns, &req->sg, NULL); + nvme_sg_unmap(&sg); + + return NVME_SUCCESS; + } + + return nvme_map_dptr(n, &req->sg, len, &req->cmd); +} + +static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + size_t len = nvme_m2b(ns, nlb); + uint16_t status; + + if (nvme_ns_ext(ns)) { + NvmeSg sg; + + len += nvme_l2b(ns, nlb); + + status = nvme_map_dptr(n, &sg, len, &req->cmd); + if (status) { + return status; + } + + nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA); + nvme_sg_split(&sg, ns, NULL, &req->sg); + nvme_sg_unmap(&sg); + + return NVME_SUCCESS; + } + + return nvme_map_mptr(n, &req->sg, len, &req->cmd); +} + +static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, + uint32_t len, uint32_t bytes, + int32_t skip_bytes, int64_t offset, + NvmeTxDirection dir) +{ + hwaddr addr; + uint32_t trans_len, count = bytes; + bool dma = sg->flags & NVME_SG_DMA; + int64_t sge_len; + int sg_idx = 0; + int ret; + + assert(sg->flags & NVME_SG_ALLOC); + + while (len) { + sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len; + + if (sge_len - offset < 0) { + offset -= sge_len; + sg_idx++; + continue; + } + + if (sge_len == offset) { + offset = 0; + sg_idx++; + continue; + } + + trans_len = MIN(len, count); + trans_len = MIN(trans_len, sge_len - offset); + + if (dma) { + addr = sg->qsg.sg[sg_idx].base + offset; + } else { + addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset; + } + + if (dir == NVME_TX_DIRECTION_TO_DEVICE) { + ret = nvme_addr_read(n, addr, ptr, trans_len); + } else { + ret = nvme_addr_write(n, addr, ptr, trans_len); + } + + if (ret) { + return NVME_DATA_TRAS_ERROR; + } + + ptr += trans_len; + len -= trans_len; + count -= trans_len; + offset += trans_len; + + if (count == 0) { + count = bytes; + offset += skip_bytes; + } + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir) +{ + assert(sg->flags & NVME_SG_ALLOC); + + if (sg->flags & NVME_SG_DMA) { + uint64_t residual; + + if (dir == NVME_TX_DIRECTION_TO_DEVICE) { + residual = dma_buf_write(ptr, len, &sg->qsg); + } else { + residual = dma_buf_read(ptr, len, &sg->qsg); + } + + if (unlikely(residual)) { + trace_pci_nvme_err_invalid_dma(); + return NVME_INVALID_FIELD | NVME_DNR; + } + } else { + size_t bytes; + + if (dir == NVME_TX_DIRECTION_TO_DEVICE) { + bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len); + } else { + bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len); + } + + if (unlikely(bytes != len)) { + trace_pci_nvme_err_invalid_dma(); + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + return NVME_SUCCESS; +} + +static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeRequest *req) +{ + uint16_t status; + + status = nvme_map_dptr(n, &req->sg, len, &req->cmd); + if (status) { + return status; + } + + return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE); +} + +static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeRequest *req) +{ + uint16_t status; + + status = nvme_map_dptr(n, &req->sg, len, &req->cmd); + if (status) { + return status; + } + + return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE); +} + +uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps); + bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT); + + if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { + return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz, + ns->lbaf.ms, 0, dir); + } + + return nvme_tx(n, &req->sg, ptr, len, dir); +} + +uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + uint16_t status; + + if (nvme_ns_ext(ns)) { + return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms, + ns->lbasz, ns->lbasz, dir); + } + + nvme_sg_unmap(&req->sg); + + status = nvme_map_mptr(n, &req->sg, len, &req->cmd); + if (status) { + return status; + } + + return nvme_tx(n, &req->sg, ptr, len, dir); +} + +static inline void nvme_blk_read(BlockBackend *blk, int64_t offset, + BlockCompletionFunc *cb, NvmeRequest *req) +{ + assert(req->sg.flags & NVME_SG_ALLOC); + + if (req->sg.flags & NVME_SG_DMA) { + req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE, + cb, req); + } else { + req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req); + } +} + +static inline void nvme_blk_write(BlockBackend *blk, int64_t offset, + BlockCompletionFunc *cb, NvmeRequest *req) +{ + assert(req->sg.flags & NVME_SG_ALLOC); + + if (req->sg.flags & NVME_SG_DMA) { + req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE, + cb, req); + } else { + req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req); + } +} + +static void nvme_post_cqes(void *opaque) +{ + NvmeCQueue *cq = opaque; + NvmeCtrl *n = cq->ctrl; + NvmeRequest *req, *next; + bool pending = cq->head != cq->tail; + int ret; + + QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { + NvmeSQueue *sq; + hwaddr addr; + + if (nvme_cq_full(cq)) { + break; + } + + sq = req->sq; + req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); + req->cqe.sq_id = cpu_to_le16(sq->sqid); + req->cqe.sq_head = cpu_to_le16(sq->head); + addr = cq->dma_addr + cq->tail * n->cqe_size; + ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, + sizeof(req->cqe)); + if (ret) { + trace_pci_nvme_err_addr_write(addr); + trace_pci_nvme_err_cfs(); + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); + break; + } + QTAILQ_REMOVE(&cq->req_list, req, entry); + nvme_inc_cq_tail(cq); + nvme_sg_unmap(&req->sg); + QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); + } + if (cq->tail != cq->head) { + if (cq->irq_enabled && !pending) { + n->cq_pending++; + } + + nvme_irq_assert(n, cq); + } +} + +static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) +{ + assert(cq->cqid == req->sq->cqid); + trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid, + le32_to_cpu(req->cqe.result), + le32_to_cpu(req->cqe.dw1), + req->status); + + if (req->status) { + trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns), + req->status, req->cmd.opcode); + } + + QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); + QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); + timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +} + +static void nvme_process_aers(void *opaque) +{ + NvmeCtrl *n = opaque; + NvmeAsyncEvent *event, *next; + + trace_pci_nvme_process_aers(n->aer_queued); + + QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { + NvmeRequest *req; + NvmeAerResult *result; + + /* can't post cqe if there is nothing to complete */ + if (!n->outstanding_aers) { + trace_pci_nvme_no_outstanding_aers(); + break; + } + + /* ignore if masked (cqe posted, but event not cleared) */ + if (n->aer_mask & (1 << event->result.event_type)) { + trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask); + continue; + } + + QTAILQ_REMOVE(&n->aer_queue, event, entry); + n->aer_queued--; + + n->aer_mask |= 1 << event->result.event_type; + n->outstanding_aers--; + + req = n->aer_reqs[n->outstanding_aers]; + + result = (NvmeAerResult *) &req->cqe.result; + result->event_type = event->result.event_type; + result->event_info = event->result.event_info; + result->log_page = event->result.log_page; + g_free(event); + + trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info, + result->log_page); + + nvme_enqueue_req_completion(&n->admin_cq, req); + } +} + +static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, + uint8_t event_info, uint8_t log_page) +{ + NvmeAsyncEvent *event; + + trace_pci_nvme_enqueue_event(event_type, event_info, log_page); + + if (n->aer_queued == n->params.aer_max_queued) { + trace_pci_nvme_enqueue_event_noqueue(n->aer_queued); + return; + } + + event = g_new(NvmeAsyncEvent, 1); + event->result = (NvmeAerResult) { + .event_type = event_type, + .event_info = event_info, + .log_page = log_page, + }; + + QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry); + n->aer_queued++; + + nvme_process_aers(n); +} + +static void nvme_smart_event(NvmeCtrl *n, uint8_t event) +{ + uint8_t aer_info; + + /* Ref SPEC */ + if (!(NVME_AEC_SMART(n->features.async_config) & event)) { + return; + } + + switch (event) { + case NVME_SMART_SPARE: + aer_info = NVME_AER_INFO_SMART_SPARE_THRESH; + break; + case NVME_SMART_TEMPERATURE: + aer_info = NVME_AER_INFO_SMART_TEMP_THRESH; + break; + case NVME_SMART_RELIABILITY: + case NVME_SMART_MEDIA_READ_ONLY: + case NVME_SMART_FAILED_VOLATILE_MEDIA: + case NVME_SMART_PMR_UNRELIABLE: + aer_info = NVME_AER_INFO_SMART_RELIABILITY; + break; + default: + return; + } + + nvme_enqueue_event(n, NVME_AER_TYPE_SMART, aer_info, NVME_LOG_SMART_INFO); +} + +static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) +{ + n->aer_mask &= ~(1 << event_type); + if (!QTAILQ_EMPTY(&n->aer_queue)) { + nvme_process_aers(n); + } +} + +static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len) +{ + uint8_t mdts = n->params.mdts; + + if (mdts && len > n->page_size << mdts) { + trace_pci_nvme_err_mdts(len); + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba, + uint32_t nlb) +{ + uint64_t nsze = le64_to_cpu(ns->id_ns.nsze); + + if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) { + trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze); + return NVME_LBA_RANGE | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static int nvme_block_status_all(NvmeNamespace *ns, uint64_t slba, + uint32_t nlb, int flags) +{ + BlockDriverState *bs = blk_bs(ns->blkconf.blk); + + int64_t pnum = 0, bytes = nvme_l2b(ns, nlb); + int64_t offset = nvme_l2b(ns, slba); + int ret; + + /* + * `pnum` holds the number of bytes after offset that shares the same + * allocation status as the byte at offset. If `pnum` is different from + * `bytes`, we should check the allocation status of the next range and + * continue this until all bytes have been checked. + */ + do { + bytes -= pnum; + + ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); + if (ret < 0) { + return ret; + } + + + trace_pci_nvme_block_status(offset, bytes, pnum, ret, + !!(ret & BDRV_BLOCK_ZERO)); + + if (!(ret & flags)) { + return 1; + } + + offset += pnum; + } while (pnum != bytes); + + return 0; +} + +static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, + uint32_t nlb) +{ + int ret; + Error *err = NULL; + + ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_DATA); + if (ret) { + if (ret < 0) { + error_setg_errno(&err, -ret, "unable to get block status"); + error_report_err(err); + + return NVME_INTERNAL_DEV_ERROR; + } + + return NVME_DULB; + } + + return NVME_SUCCESS; +} + +static void nvme_aio_err(NvmeRequest *req, int ret) +{ + uint16_t status = NVME_SUCCESS; + Error *local_err = NULL; + + switch (req->cmd.opcode) { + case NVME_CMD_READ: + status = NVME_UNRECOVERED_READ; + break; + case NVME_CMD_FLUSH: + case NVME_CMD_WRITE: + case NVME_CMD_WRITE_ZEROES: + case NVME_CMD_ZONE_APPEND: + status = NVME_WRITE_FAULT; + break; + default: + status = NVME_INTERNAL_DEV_ERROR; + break; + } + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status); + + error_setg_errno(&local_err, -ret, "aio failed"); + error_report_err(local_err); + + /* + * Set the command status code to the first encountered error but allow a + * subsequent Internal Device Error to trump it. + */ + if (req->status && status != NVME_INTERNAL_DEV_ERROR) { + return; + } + + req->status = status; +} + +static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba) +{ + return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 : + slba / ns->zone_size; +} + +static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba) +{ + uint32_t zone_idx = nvme_zone_idx(ns, slba); + + if (zone_idx >= ns->num_zones) { + return NULL; + } + + return &ns->zone_array[zone_idx]; +} + +static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone) +{ + uint64_t zslba = zone->d.zslba; + + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EMPTY: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_CLOSED: + return NVME_SUCCESS; + case NVME_ZONE_STATE_FULL: + trace_pci_nvme_err_zone_is_full(zslba); + return NVME_ZONE_FULL; + case NVME_ZONE_STATE_OFFLINE: + trace_pci_nvme_err_zone_is_offline(zslba); + return NVME_ZONE_OFFLINE; + case NVME_ZONE_STATE_READ_ONLY: + trace_pci_nvme_err_zone_is_read_only(zslba); + return NVME_ZONE_READ_ONLY; + default: + assert(false); + } + + return NVME_INTERNAL_DEV_ERROR; +} + +static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone, + uint64_t slba, uint32_t nlb) +{ + uint64_t zcap = nvme_zone_wr_boundary(zone); + uint16_t status; + + status = nvme_check_zone_state_for_write(zone); + if (status) { + return status; + } + + if (unlikely(slba != zone->w_ptr)) { + trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr); + return NVME_ZONE_INVALID_WRITE; + } + + if (unlikely((slba + nlb) > zcap)) { + trace_pci_nvme_err_zone_boundary(slba, nlb, zcap); + return NVME_ZONE_BOUNDARY_ERROR; + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone) +{ + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EMPTY: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_FULL: + case NVME_ZONE_STATE_CLOSED: + case NVME_ZONE_STATE_READ_ONLY: + return NVME_SUCCESS; + case NVME_ZONE_STATE_OFFLINE: + trace_pci_nvme_err_zone_is_offline(zone->d.zslba); + return NVME_ZONE_OFFLINE; + default: + assert(false); + } + + return NVME_INTERNAL_DEV_ERROR; +} + +static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba, + uint32_t nlb) +{ + NvmeZone *zone; + uint64_t bndry, end; + uint16_t status; + + zone = nvme_get_zone_by_slba(ns, slba); + assert(zone); + + bndry = nvme_zone_rd_boundary(ns, zone); + end = slba + nlb; + + status = nvme_check_zone_state_for_read(zone); + if (status) { + ; + } else if (unlikely(end > bndry)) { + if (!ns->params.cross_zone_read) { + status = NVME_ZONE_BOUNDARY_ERROR; + } else { + /* + * Read across zone boundary - check that all subsequent + * zones that are being read have an appropriate state. + */ + do { + zone++; + status = nvme_check_zone_state_for_read(zone); + if (status) { + break; + } + } while (end > nvme_zone_rd_boundary(ns, zone)); + } + } + + return status; +} + +static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone) +{ + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_FULL: + return NVME_SUCCESS; + + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + nvme_aor_dec_open(ns); + /* fallthrough */ + case NVME_ZONE_STATE_CLOSED: + nvme_aor_dec_active(ns); + /* fallthrough */ + case NVME_ZONE_STATE_EMPTY: + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL); + return NVME_SUCCESS; + + default: + return NVME_ZONE_INVAL_TRANSITION; + } +} + +static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone) +{ + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + nvme_aor_dec_open(ns); + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED); + /* fall through */ + case NVME_ZONE_STATE_CLOSED: + return NVME_SUCCESS; + + default: + return NVME_ZONE_INVAL_TRANSITION; + } +} + +static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone) +{ + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + nvme_aor_dec_open(ns); + /* fallthrough */ + case NVME_ZONE_STATE_CLOSED: + nvme_aor_dec_active(ns); + /* fallthrough */ + case NVME_ZONE_STATE_FULL: + zone->w_ptr = zone->d.zslba; + zone->d.wp = zone->w_ptr; + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY); + /* fallthrough */ + case NVME_ZONE_STATE_EMPTY: + return NVME_SUCCESS; + + default: + return NVME_ZONE_INVAL_TRANSITION; + } +} + +static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns) +{ + NvmeZone *zone; + + if (ns->params.max_open_zones && + ns->nr_open_zones == ns->params.max_open_zones) { + zone = QTAILQ_FIRST(&ns->imp_open_zones); + if (zone) { + /* + * Automatically close this implicitly open zone. + */ + QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); + nvme_zrm_close(ns, zone); + } + } +} + +enum { + NVME_ZRM_AUTO = 1 << 0, +}; + +static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone, int flags) +{ + int act = 0; + uint16_t status; + + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EMPTY: + act = 1; + + /* fallthrough */ + + case NVME_ZONE_STATE_CLOSED: + if (n->params.auto_transition_zones) { + nvme_zrm_auto_transition_zone(ns); + } + status = nvme_aor_check(ns, act, 1); + if (status) { + return status; + } + + if (act) { + nvme_aor_inc_active(ns); + } + + nvme_aor_inc_open(ns); + + if (flags & NVME_ZRM_AUTO) { + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN); + return NVME_SUCCESS; + } + + /* fallthrough */ + + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + if (flags & NVME_ZRM_AUTO) { + return NVME_SUCCESS; + } + + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN); + + /* fallthrough */ + + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + return NVME_SUCCESS; + + default: + return NVME_ZONE_INVAL_TRANSITION; + } +} + +static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone) +{ + return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO); +} + +static inline uint16_t nvme_zrm_open(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone) +{ + return nvme_zrm_open_flags(n, ns, zone, 0); +} + +static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, + uint32_t nlb) +{ + zone->d.wp += nlb; + + if (zone->d.wp == nvme_zone_wr_boundary(zone)) { + nvme_zrm_finish(ns, zone); + } +} + +static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeZone *zone; + uint64_t slba; + uint32_t nlb; + + slba = le64_to_cpu(rw->slba); + nlb = le16_to_cpu(rw->nlb) + 1; + zone = nvme_get_zone_by_slba(ns, slba); + assert(zone); + + nvme_advance_zone_wp(ns, zone, nlb); +} + +static inline bool nvme_is_write(NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + + return rw->opcode == NVME_CMD_WRITE || + rw->opcode == NVME_CMD_ZONE_APPEND || + rw->opcode == NVME_CMD_WRITE_ZEROES; +} + +static AioContext *nvme_get_aio_context(BlockAIOCB *acb) +{ + return qemu_get_aio_context(); +} + +static void nvme_misc_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + + trace_pci_nvme_misc_cb(nvme_cid(req)); + + if (ret) { + nvme_aio_err(req, ret); + } + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + +void nvme_rw_complete_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + + trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + } else { + block_acct_done(stats, acct); + } + + if (ns->params.zoned && nvme_is_write(req)) { + nvme_finalize_zoned_write(ns, req); + } + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + +static void nvme_rw_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeNamespace *ns = req->ns; + + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + if (ns->lbaf.ms) { + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint64_t offset = nvme_moff(ns, slba); + + if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) { + size_t mlen = nvme_m2b(ns, nlb); + + req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen, + BDRV_REQ_MAY_UNMAP, + nvme_rw_complete_cb, req); + return; + } + + if (nvme_ns_ext(ns) || req->cmd.mptr) { + uint16_t status; + + nvme_sg_unmap(&req->sg); + status = nvme_map_mdata(nvme_ctrl(req), nlb, req); + if (status) { + ret = -EFAULT; + goto out; + } + + if (req->cmd.opcode == NVME_CMD_READ) { + return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req); + } + + return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req); + } + } + +out: + nvme_rw_complete_cb(req, ret); +} + +static void nvme_verify_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + uint16_t status; + + trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + goto out; + } + + block_acct_done(stats, acct); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, + ctx->mdata.iov.size, slba); + if (status) { + req->status = status; + goto out; + } + + req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, + prinfo, slba, apptag, appmask, &reftag); + } + +out: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + + +static void nvme_verify_mdata_in_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint64_t offset = nvme_moff(ns, slba); + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_reset(&ctx->mdata.iov); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, + nvme_verify_cb, ctx); + return; + +out: + nvme_verify_cb(ctx, ret); +} + +struct nvme_compare_ctx { + struct { + QEMUIOVector iov; + uint8_t *bounce; + } data; + + struct { + QEMUIOVector iov; + uint8_t *bounce; + } mdata; +}; + +static void nvme_compare_mdata_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeNamespace *ns = req->ns; + NvmeCtrl *n = nvme_ctrl(req); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + struct nvme_compare_ctx *ctx = req->opaque; + g_autofree uint8_t *buf = NULL; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + uint16_t status = NVME_SUCCESS; + + trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + goto out; + } + + buf = g_malloc(ctx->mdata.iov.size); + + status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + uint64_t slba = le64_to_cpu(rw->slba); + uint8_t *bufp; + uint8_t *mbufp = ctx->mdata.bounce; + uint8_t *end = mbufp + ctx->mdata.iov.size; + int16_t pil = 0; + + status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, + slba, apptag, appmask, &reftag); + if (status) { + req->status = status; + goto out; + } + + /* + * When formatted with protection information, do not compare the DIF + * tuple. + */ + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + } + + for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) { + if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) { + req->status = NVME_CMP_FAILURE; + goto out; + } + } + + goto out; + } + + if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) { + req->status = NVME_CMP_FAILURE; + goto out; + } + + block_acct_done(stats, acct); + +out: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + +static void nvme_compare_data_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeCtrl *n = nvme_ctrl(req); + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + + struct nvme_compare_ctx *ctx = req->opaque; + g_autofree uint8_t *buf = NULL; + uint16_t status; + + trace_pci_nvme_compare_data_cb(nvme_cid(req)); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + goto out; + } + + buf = g_malloc(ctx->data.iov.size); + + status = nvme_bounce_data(n, buf, ctx->data.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + + if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) { + req->status = NVME_CMP_FAILURE; + goto out; + } + + if (ns->lbaf.ms) { + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint64_t offset = nvme_moff(ns, slba); + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_init(&ctx->mdata.iov, 1); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, + nvme_compare_mdata_cb, req); + return; + } + + block_acct_done(stats, acct); + +out: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + g_free(ctx); + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + +typedef struct NvmeDSMAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + + NvmeDsmRange *range; + unsigned int nr; + unsigned int idx; +} NvmeDSMAIOCB; + +static void nvme_dsm_cancel(BlockAIOCB *aiocb) +{ + NvmeDSMAIOCB *iocb = container_of(aiocb, NvmeDSMAIOCB, common); + + /* break nvme_dsm_cb loop */ + iocb->idx = iocb->nr; + iocb->ret = -ECANCELED; + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; + } else { + /* + * We only reach this if nvme_dsm_cancel() has already been called or + * the command ran to completion and nvme_dsm_bh is scheduled to run. + */ + assert(iocb->idx == iocb->nr); + } +} + +static const AIOCBInfo nvme_dsm_aiocb_info = { + .aiocb_size = sizeof(NvmeDSMAIOCB), + .cancel_async = nvme_dsm_cancel, +}; + +static void nvme_dsm_bh(void *opaque) +{ + NvmeDSMAIOCB *iocb = opaque; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); +} + +static void nvme_dsm_cb(void *opaque, int ret); + +static void nvme_dsm_md_cb(void *opaque, int ret) +{ + NvmeDSMAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeDsmRange *range; + uint64_t slba; + uint32_t nlb; + + if (ret < 0) { + iocb->ret = ret; + goto done; + } + + if (!ns->lbaf.ms) { + nvme_dsm_cb(iocb, 0); + return; + } + + range = &iocb->range[iocb->idx - 1]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb); + + /* + * Check that all block were discarded (zeroed); otherwise we do not zero + * the metadata. + */ + + ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO); + if (ret) { + if (ret < 0) { + iocb->ret = ret; + goto done; + } + + nvme_dsm_cb(iocb, 0); + } + + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba), + nvme_m2b(ns, nlb), BDRV_REQ_MAY_UNMAP, + nvme_dsm_cb, iocb); + return; + +done: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} + +static void nvme_dsm_cb(void *opaque, int ret) +{ + NvmeDSMAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); + NvmeNamespace *ns = req->ns; + NvmeDsmRange *range; + uint64_t slba; + uint32_t nlb; + + if (ret < 0) { + iocb->ret = ret; + goto done; + } + +next: + if (iocb->idx == iocb->nr) { + goto done; + } + + range = &iocb->range[iocb->idx++]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb); + + trace_pci_nvme_dsm_deallocate(slba, nlb); + + if (nlb > n->dmrsl) { + trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl); + goto next; + } + + if (nvme_check_bounds(ns, slba, nlb)) { + trace_pci_nvme_err_invalid_lba_range(slba, nlb, + ns->id_ns.nsze); + goto next; + } + + iocb->aiocb = blk_aio_pdiscard(ns->blkconf.blk, nvme_l2b(ns, slba), + nvme_l2b(ns, nlb), + nvme_dsm_md_cb, iocb); + return; + +done: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} + +static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd; + uint32_t attr = le32_to_cpu(dsm->attributes); + uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1; + uint16_t status = NVME_SUCCESS; + + trace_pci_nvme_dsm(nr, attr); + + if (attr & NVME_DSMGMT_AD) { + NvmeDSMAIOCB *iocb = blk_aio_get(&nvme_dsm_aiocb_info, ns->blkconf.blk, + nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_dsm_bh, iocb); + iocb->ret = 0; + iocb->range = g_new(NvmeDsmRange, nr); + iocb->nr = nr; + iocb->idx = 0; + + status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr, + req); + if (status) { + return status; + } + + req->aiocb = &iocb->common; + nvme_dsm_cb(iocb, 0); + + return NVME_NO_COMPLETE; + } + + return status; +} + +static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t len = nvme_l2b(ns, nlb); + int64_t offset = nvme_l2b(ns, slba); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint32_t reftag = le32_to_cpu(rw->reftag); + NvmeBounceContext *ctx = NULL; + uint16_t status; + + trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + status = nvme_check_prinfo(ns, prinfo, slba, reftag); + if (status) { + return status; + } + + if (prinfo & NVME_PRINFO_PRACT) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + } + + if (len > n->page_size << n->params.vsl) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + return status; + } + + if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { + status = nvme_check_dulbe(ns, slba, nlb); + if (status) { + return status; + } + } + + ctx = g_new0(NvmeBounceContext, 1); + ctx->req = req; + + ctx->data.bounce = g_malloc(len); + + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); + + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_READ); + + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_verify_mdata_in_cb, ctx); + return NVME_NO_COMPLETE; +} + +typedef struct NvmeCopyAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + + NvmeCopySourceRange *ranges; + int nr; + int idx; + + uint8_t *bounce; + QEMUIOVector iov; + struct { + BlockAcctCookie read; + BlockAcctCookie write; + } acct; + + uint32_t reftag; + uint64_t slba; + + NvmeZone *zone; +} NvmeCopyAIOCB; + +static void nvme_copy_cancel(BlockAIOCB *aiocb) +{ + NvmeCopyAIOCB *iocb = container_of(aiocb, NvmeCopyAIOCB, common); + + iocb->ret = -ECANCELED; + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; + } +} + +static const AIOCBInfo nvme_copy_aiocb_info = { + .aiocb_size = sizeof(NvmeCopyAIOCB), + .cancel_async = nvme_copy_cancel, +}; + +static void nvme_copy_bh(void *opaque) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk); + + if (iocb->idx != iocb->nr) { + req->cqe.result = cpu_to_le32(iocb->idx); + } + + qemu_iovec_destroy(&iocb->iov); + g_free(iocb->bounce); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + if (iocb->ret < 0) { + block_acct_failed(stats, &iocb->acct.read); + block_acct_failed(stats, &iocb->acct.write); + } else { + block_acct_done(stats, &iocb->acct.read); + block_acct_done(stats, &iocb->acct.write); + } + + iocb->common.cb(iocb->common.opaque, iocb->ret); + qemu_aio_unref(iocb); +} + +static void nvme_copy_cb(void *opaque, int ret); + +static void nvme_copy_out_completed_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range = &iocb->ranges[iocb->idx]; + uint32_t nlb = le32_to_cpu(range->nlb) + 1; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } + + if (ns->params.zoned) { + nvme_advance_zone_wp(ns, iocb->zone, nlb); + } + + iocb->idx++; + iocb->slba += nlb; +out: + nvme_copy_cb(iocb, iocb->ret); +} + +static void nvme_copy_out_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint32_t nlb; + size_t mlen; + uint8_t *mbounce; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } + + if (!ns->lbaf.ms) { + nvme_copy_out_completed_cb(iocb, 0); + return; + } + + range = &iocb->ranges[iocb->idx]; + nlb = le32_to_cpu(range->nlb) + 1; + + mlen = nvme_m2b(ns, nlb); + mbounce = iocb->bounce + nvme_l2b(ns, nlb); + + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, mbounce, mlen); + + iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba), + &iocb->iov, 0, nvme_copy_out_completed_cb, + iocb); + + return; + +out: + nvme_copy_cb(iocb, ret); +} + +static void nvme_copy_in_completed_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint32_t nlb; + size_t len; + uint16_t status; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } + + range = &iocb->ranges[iocb->idx]; + nlb = le32_to_cpu(range->nlb) + 1; + len = nvme_l2b(ns, nlb); + + trace_pci_nvme_copy_out(iocb->slba, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + + uint16_t apptag = le16_to_cpu(range->apptag); + uint16_t appmask = le16_to_cpu(range->appmask); + uint32_t reftag = le32_to_cpu(range->reftag); + + uint64_t slba = le64_to_cpu(range->slba); + size_t mlen = nvme_m2b(ns, nlb); + uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb); + + status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor, + slba, apptag, appmask, &reftag); + if (status) { + goto invalid; + } + + apptag = le16_to_cpu(copy->apptag); + appmask = le16_to_cpu(copy->appmask); + + if (prinfow & NVME_PRINFO_PRACT) { + status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag); + if (status) { + goto invalid; + } + + nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen, + apptag, &iocb->reftag); + } else { + status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, + prinfow, iocb->slba, apptag, appmask, + &iocb->reftag); + if (status) { + goto invalid; + } + } + } + + status = nvme_check_bounds(ns, iocb->slba, nlb); + if (status) { + goto invalid; + } + + if (ns->params.zoned) { + status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb); + if (status) { + goto invalid; + } + + iocb->zone->w_ptr += nlb; + } + + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, iocb->bounce, len); + + iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba), + &iocb->iov, 0, nvme_copy_out_cb, iocb); + + return; + +invalid: + req->status = status; + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); + } + + return; + +out: + nvme_copy_cb(iocb, ret); +} + +static void nvme_copy_in_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint64_t slba; + uint32_t nlb; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } + + if (!ns->lbaf.ms) { + nvme_copy_in_completed_cb(iocb, 0); + return; + } + + range = &iocb->ranges[iocb->idx]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb) + 1; + + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb), + nvme_m2b(ns, nlb)); + + iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba), + &iocb->iov, 0, nvme_copy_in_completed_cb, + iocb); + return; + +out: + nvme_copy_cb(iocb, iocb->ret); +} + +static void nvme_copy_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint64_t slba; + uint32_t nlb; + size_t len; + uint16_t status; + + if (ret < 0) { + iocb->ret = ret; + goto done; + } else if (iocb->ret < 0) { + goto done; + } + + if (iocb->idx == iocb->nr) { + goto done; + } + + range = &iocb->ranges[iocb->idx]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb) + 1; + len = nvme_l2b(ns, nlb); + + trace_pci_nvme_copy_source_range(slba, nlb); + + if (nlb > le16_to_cpu(ns->id_ns.mssrl)) { + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto invalid; + } + + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + goto invalid; + } + + if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { + status = nvme_check_dulbe(ns, slba, nlb); + if (status) { + goto invalid; + } + } + + if (ns->params.zoned) { + status = nvme_check_zone_read(ns, slba, nlb); + if (status) { + goto invalid; + } + } + + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, iocb->bounce, len); + + iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba), + &iocb->iov, 0, nvme_copy_in_cb, iocb); + return; + +invalid: + req->status = status; +done: + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); + } +} + + +static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + NvmeCopyAIOCB *iocb = blk_aio_get(&nvme_copy_aiocb_info, ns->blkconf.blk, + nvme_misc_cb, req); + uint16_t nr = copy->nr + 1; + uint8_t format = copy->control[0] & 0xf; + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + + uint16_t status; + + trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format); + + iocb->ranges = NULL; + iocb->zone = NULL; + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && + ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto invalid; + } + + if (!(n->id_ctrl.ocfs & (1 << format))) { + trace_pci_nvme_err_copy_invalid_format(format); + status = NVME_INVALID_FIELD | NVME_DNR; + goto invalid; + } + + if (nr > ns->id_ns.msrc + 1) { + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto invalid; + } + + iocb->ranges = g_new(NvmeCopySourceRange, nr); + + status = nvme_h2c(n, (uint8_t *)iocb->ranges, + sizeof(NvmeCopySourceRange) * nr, req); + if (status) { + goto invalid; + } + + iocb->slba = le64_to_cpu(copy->sdlba); + + if (ns->params.zoned) { + iocb->zone = nvme_get_zone_by_slba(ns, iocb->slba); + if (!iocb->zone) { + status = NVME_LBA_RANGE | NVME_DNR; + goto invalid; + } + + status = nvme_zrm_auto(n, ns, iocb->zone); + if (status) { + goto invalid; + } + } + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_copy_bh, iocb); + iocb->ret = 0; + iocb->nr = nr; + iocb->idx = 0; + iocb->reftag = le32_to_cpu(copy->reftag); + iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl), + ns->lbasz + ns->lbaf.ms); + + qemu_iovec_init(&iocb->iov, 1); + + block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0, + BLOCK_ACCT_READ); + block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0, + BLOCK_ACCT_WRITE); + + req->aiocb = &iocb->common; + nvme_copy_cb(iocb, 0); + + return NVME_NO_COMPLETE; + +invalid: + g_free(iocb->ranges); + qemu_aio_unref(iocb); + return status; +} + +static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + size_t data_len = nvme_l2b(ns, nlb); + size_t len = data_len; + int64_t offset = nvme_l2b(ns, slba); + struct nvme_compare_ctx *ctx = NULL; + uint16_t status; + + trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (prinfo & NVME_PRINFO_PRACT)) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + if (nvme_ns_ext(ns)) { + len += nvme_m2b(ns, nlb); + } + + status = nvme_check_mdts(n, len); + if (status) { + return status; + } + + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + return status; + } + + if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { + status = nvme_check_dulbe(ns, slba, nlb); + if (status) { + return status; + } + } + + status = nvme_map_dptr(n, &req->sg, len, &req->cmd); + if (status) { + return status; + } + + ctx = g_new(struct nvme_compare_ctx, 1); + ctx->data.bounce = g_malloc(data_len); + + req->opaque = ctx; + + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len); + + block_acct_start(blk_get_stats(blk), &req->acct, data_len, + BLOCK_ACCT_READ); + req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0, + nvme_compare_data_cb, req); + + return NVME_NO_COMPLETE; +} + +typedef struct NvmeFlushAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + + NvmeNamespace *ns; + uint32_t nsid; + bool broadcast; +} NvmeFlushAIOCB; + +static void nvme_flush_cancel(BlockAIOCB *acb) +{ + NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common); + + iocb->ret = -ECANCELED; + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + } +} + +static const AIOCBInfo nvme_flush_aiocb_info = { + .aiocb_size = sizeof(NvmeFlushAIOCB), + .cancel_async = nvme_flush_cancel, + .get_aio_context = nvme_get_aio_context, +}; + +static void nvme_flush_ns_cb(void *opaque, int ret) +{ + NvmeFlushAIOCB *iocb = opaque; + NvmeNamespace *ns = iocb->ns; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } + + if (ns) { + trace_pci_nvme_flush_ns(iocb->nsid); + + iocb->ns = NULL; + iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb); + return; + } + +out: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} + +static void nvme_flush_bh(void *opaque) +{ + NvmeFlushAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); + int i; + + if (iocb->ret < 0) { + goto done; + } + + if (iocb->broadcast) { + for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) { + iocb->ns = nvme_ns(n, i); + if (iocb->ns) { + iocb->nsid = i; + break; + } + } + } + + if (!iocb->ns) { + goto done; + } + + nvme_flush_ns_cb(iocb, 0); + return; + +done: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_aio_unref(iocb); + + return; +} + +static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeFlushAIOCB *iocb; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; + + iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_flush_bh, iocb); + iocb->ret = 0; + iocb->ns = NULL; + iocb->nsid = 0; + iocb->broadcast = (nsid == NVME_NSID_BROADCAST); + + if (!iocb->broadcast) { + if (!nvme_nsid_valid(n, nsid)) { + status = NVME_INVALID_NSID | NVME_DNR; + goto out; + } + + iocb->ns = nvme_ns(n, nsid); + if (!iocb->ns) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + + iocb->nsid = nsid; + } + + req->aiocb = &iocb->common; + qemu_bh_schedule(iocb->bh); + + return NVME_NO_COMPLETE; + +out: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); + + return status; +} + +static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint64_t data_size = nvme_l2b(ns, nlb); + uint64_t mapped_size = data_size; + uint64_t data_offset; + BlockBackend *blk = ns->blkconf.blk; + uint16_t status; + + if (nvme_ns_ext(ns)) { + mapped_size += nvme_m2b(ns, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + bool pract = prinfo & NVME_PRINFO_PRACT; + + if (pract && ns->lbaf.ms == 8) { + mapped_size = data_size; + } + } + } + + trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba); + + status = nvme_check_mdts(n, mapped_size); + if (status) { + goto invalid; + } + + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + goto invalid; + } + + if (ns->params.zoned) { + status = nvme_check_zone_read(ns, slba, nlb); + if (status) { + trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status); + goto invalid; + } + } + + if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { + status = nvme_check_dulbe(ns, slba, nlb); + if (status) { + goto invalid; + } + } + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + return nvme_dif_rw(n, req); + } + + status = nvme_map_data(n, nlb, req); + if (status) { + goto invalid; + } + + data_offset = nvme_l2b(ns, slba); + + block_acct_start(blk_get_stats(blk), &req->acct, data_size, + BLOCK_ACCT_READ); + nvme_blk_read(blk, data_offset, nvme_rw_cb, req); + return NVME_NO_COMPLETE; + +invalid: + block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ); + return status | NVME_DNR; +} + +static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, + bool wrz) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(ctrl); + uint64_t data_size = nvme_l2b(ns, nlb); + uint64_t mapped_size = data_size; + uint64_t data_offset; + NvmeZone *zone; + NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe; + BlockBackend *blk = ns->blkconf.blk; + uint16_t status; + + if (nvme_ns_ext(ns)) { + mapped_size += nvme_m2b(ns, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + bool pract = prinfo & NVME_PRINFO_PRACT; + + if (pract && ns->lbaf.ms == 8) { + mapped_size -= nvme_m2b(ns, nlb); + } + } + } + + trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode), + nvme_nsid(ns), nlb, mapped_size, slba); + + if (!wrz) { + status = nvme_check_mdts(n, mapped_size); + if (status) { + goto invalid; + } + } + + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + goto invalid; + } + + if (ns->params.zoned) { + zone = nvme_get_zone_by_slba(ns, slba); + assert(zone); + + if (append) { + bool piremap = !!(ctrl & NVME_RW_PIREMAP); + + if (unlikely(slba != zone->d.zslba)) { + trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); + status = NVME_INVALID_FIELD; + goto invalid; + } + + if (n->params.zasl && + data_size > (uint64_t)n->page_size << n->params.zasl) { + trace_pci_nvme_err_zasl(data_size); + return NVME_INVALID_FIELD | NVME_DNR; + } + + slba = zone->w_ptr; + rw->slba = cpu_to_le64(slba); + res->slba = cpu_to_le64(slba); + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_1: + if (!piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + /* fallthrough */ + + case NVME_ID_NS_DPS_TYPE_2: + if (piremap) { + uint32_t reftag = le32_to_cpu(rw->reftag); + rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba)); + } + + break; + + case NVME_ID_NS_DPS_TYPE_3: + if (piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + break; + } + } + + status = nvme_check_zone_write(ns, zone, slba, nlb); + if (status) { + goto invalid; + } + + status = nvme_zrm_auto(n, ns, zone); + if (status) { + goto invalid; + } + + zone->w_ptr += nlb; + } + + data_offset = nvme_l2b(ns, slba); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + return nvme_dif_rw(n, req); + } + + if (!wrz) { + status = nvme_map_data(n, nlb, req); + if (status) { + goto invalid; + } + + block_acct_start(blk_get_stats(blk), &req->acct, data_size, + BLOCK_ACCT_WRITE); + nvme_blk_write(blk, data_offset, nvme_rw_cb, req); + } else { + req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size, + BDRV_REQ_MAY_UNMAP, nvme_rw_cb, + req); + } + + return NVME_NO_COMPLETE; + +invalid: + block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE); + return status | NVME_DNR; +} + +static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req) +{ + return nvme_do_write(n, req, false, false); +} + +static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req) +{ + return nvme_do_write(n, req, false, true); +} + +static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req) +{ + return nvme_do_write(n, req, true, false); +} + +static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c, + uint64_t *slba, uint32_t *zone_idx) +{ + uint32_t dw10 = le32_to_cpu(c->cdw10); + uint32_t dw11 = le32_to_cpu(c->cdw11); + + if (!ns->params.zoned) { + trace_pci_nvme_err_invalid_opc(c->opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + *slba = ((uint64_t)dw11) << 32 | dw10; + if (unlikely(*slba >= ns->id_ns.nsze)) { + trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze); + *slba = 0; + return NVME_LBA_RANGE | NVME_DNR; + } + + *zone_idx = nvme_zone_idx(ns, *slba); + assert(*zone_idx < ns->num_zones); + + return NVME_SUCCESS; +} + +typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState, + NvmeRequest *); + +enum NvmeZoneProcessingMask { + NVME_PROC_CURRENT_ZONE = 0, + NVME_PROC_OPENED_ZONES = 1 << 0, + NVME_PROC_CLOSED_ZONES = 1 << 1, + NVME_PROC_READ_ONLY_ZONES = 1 << 2, + NVME_PROC_FULL_ZONES = 1 << 3, +}; + +static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone, + NvmeZoneState state, NvmeRequest *req) +{ + return nvme_zrm_open(nvme_ctrl(req), ns, zone); +} + +static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone, + NvmeZoneState state, NvmeRequest *req) +{ + return nvme_zrm_close(ns, zone); +} + +static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone, + NvmeZoneState state, NvmeRequest *req) +{ + return nvme_zrm_finish(ns, zone); +} + +static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone, + NvmeZoneState state, NvmeRequest *req) +{ + switch (state) { + case NVME_ZONE_STATE_READ_ONLY: + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_OFFLINE); + /* fall through */ + case NVME_ZONE_STATE_OFFLINE: + return NVME_SUCCESS; + default: + return NVME_ZONE_INVAL_TRANSITION; + } +} + +static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone) +{ + uint16_t status; + uint8_t state = nvme_get_zone_state(zone); + + if (state == NVME_ZONE_STATE_EMPTY) { + status = nvme_aor_check(ns, 1, 0); + if (status) { + return status; + } + nvme_aor_inc_active(ns); + zone->d.za |= NVME_ZA_ZD_EXT_VALID; + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED); + return NVME_SUCCESS; + } + + return NVME_ZONE_INVAL_TRANSITION; +} + +static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone, + enum NvmeZoneProcessingMask proc_mask, + op_handler_t op_hndlr, NvmeRequest *req) +{ + uint16_t status = NVME_SUCCESS; + NvmeZoneState zs = nvme_get_zone_state(zone); + bool proc_zone; + + switch (zs) { + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + proc_zone = proc_mask & NVME_PROC_OPENED_ZONES; + break; + case NVME_ZONE_STATE_CLOSED: + proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES; + break; + case NVME_ZONE_STATE_READ_ONLY: + proc_zone = proc_mask & NVME_PROC_READ_ONLY_ZONES; + break; + case NVME_ZONE_STATE_FULL: + proc_zone = proc_mask & NVME_PROC_FULL_ZONES; + break; + default: + proc_zone = false; + } + + if (proc_zone) { + status = op_hndlr(ns, zone, zs, req); + } + + return status; +} + +static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone, + enum NvmeZoneProcessingMask proc_mask, + op_handler_t op_hndlr, NvmeRequest *req) +{ + NvmeZone *next; + uint16_t status = NVME_SUCCESS; + int i; + + if (!proc_mask) { + status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req); + } else { + if (proc_mask & NVME_PROC_CLOSED_ZONES) { + QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { + status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, + req); + if (status && status != NVME_NO_COMPLETE) { + goto out; + } + } + } + if (proc_mask & NVME_PROC_OPENED_ZONES) { + QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { + status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, + req); + if (status && status != NVME_NO_COMPLETE) { + goto out; + } + } + + QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { + status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, + req); + if (status && status != NVME_NO_COMPLETE) { + goto out; + } + } + } + if (proc_mask & NVME_PROC_FULL_ZONES) { + QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) { + status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, + req); + if (status && status != NVME_NO_COMPLETE) { + goto out; + } + } + } + + if (proc_mask & NVME_PROC_READ_ONLY_ZONES) { + for (i = 0; i < ns->num_zones; i++, zone++) { + status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr, + req); + if (status && status != NVME_NO_COMPLETE) { + goto out; + } + } + } + } + +out: + return status; +} + +typedef struct NvmeZoneResetAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + + bool all; + int idx; + NvmeZone *zone; +} NvmeZoneResetAIOCB; + +static void nvme_zone_reset_cancel(BlockAIOCB *aiocb) +{ + NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common); + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + + iocb->idx = ns->num_zones; + + iocb->ret = -ECANCELED; + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; + } +} + +static const AIOCBInfo nvme_zone_reset_aiocb_info = { + .aiocb_size = sizeof(NvmeZoneResetAIOCB), + .cancel_async = nvme_zone_reset_cancel, +}; + +static void nvme_zone_reset_bh(void *opaque) +{ + NvmeZoneResetAIOCB *iocb = opaque; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); +} + +static void nvme_zone_reset_cb(void *opaque, int ret); + +static void nvme_zone_reset_epilogue_cb(void *opaque, int ret) +{ + NvmeZoneResetAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + int64_t moff; + int count; + + if (ret < 0) { + nvme_zone_reset_cb(iocb, ret); + return; + } + + if (!ns->lbaf.ms) { + nvme_zone_reset_cb(iocb, 0); + return; + } + + moff = nvme_moff(ns, iocb->zone->d.zslba); + count = nvme_m2b(ns, ns->zone_size); + + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count, + BDRV_REQ_MAY_UNMAP, + nvme_zone_reset_cb, iocb); + return; +} + +static void nvme_zone_reset_cb(void *opaque, int ret) +{ + NvmeZoneResetAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + + if (ret < 0) { + iocb->ret = ret; + goto done; + } + + if (iocb->zone) { + nvme_zrm_reset(ns, iocb->zone); + + if (!iocb->all) { + goto done; + } + } + + while (iocb->idx < ns->num_zones) { + NvmeZone *zone = &ns->zone_array[iocb->idx++]; + + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EMPTY: + if (!iocb->all) { + goto done; + } + + continue; + + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + case NVME_ZONE_STATE_CLOSED: + case NVME_ZONE_STATE_FULL: + iocb->zone = zone; + break; + + default: + continue; + } + + trace_pci_nvme_zns_zone_reset(zone->d.zslba); + + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, + nvme_l2b(ns, zone->d.zslba), + nvme_l2b(ns, ns->zone_size), + BDRV_REQ_MAY_UNMAP, + nvme_zone_reset_epilogue_cb, + iocb); + return; + } + +done: + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); + } +} + +static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = (NvmeCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + NvmeZone *zone; + NvmeZoneResetAIOCB *iocb; + uint8_t *zd_ext; + uint32_t dw13 = le32_to_cpu(cmd->cdw13); + uint64_t slba = 0; + uint32_t zone_idx = 0; + uint16_t status; + uint8_t action; + bool all; + enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE; + + action = dw13 & 0xff; + all = !!(dw13 & 0x100); + + req->status = NVME_SUCCESS; + + if (!all) { + status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx); + if (status) { + return status; + } + } + + zone = &ns->zone_array[zone_idx]; + if (slba != zone->d.zslba) { + trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba); + return NVME_INVALID_FIELD | NVME_DNR; + } + + switch (action) { + + case NVME_ZONE_ACTION_OPEN: + if (all) { + proc_mask = NVME_PROC_CLOSED_ZONES; + } + trace_pci_nvme_open_zone(slba, zone_idx, all); + status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req); + break; + + case NVME_ZONE_ACTION_CLOSE: + if (all) { + proc_mask = NVME_PROC_OPENED_ZONES; + } + trace_pci_nvme_close_zone(slba, zone_idx, all); + status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req); + break; + + case NVME_ZONE_ACTION_FINISH: + if (all) { + proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES; + } + trace_pci_nvme_finish_zone(slba, zone_idx, all); + status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req); + break; + + case NVME_ZONE_ACTION_RESET: + trace_pci_nvme_reset_zone(slba, zone_idx, all); + + iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk, + nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb); + iocb->ret = 0; + iocb->all = all; + iocb->idx = zone_idx; + iocb->zone = NULL; + + req->aiocb = &iocb->common; + nvme_zone_reset_cb(iocb, 0); + + return NVME_NO_COMPLETE; + + case NVME_ZONE_ACTION_OFFLINE: + if (all) { + proc_mask = NVME_PROC_READ_ONLY_ZONES; + } + trace_pci_nvme_offline_zone(slba, zone_idx, all); + status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req); + break; + + case NVME_ZONE_ACTION_SET_ZD_EXT: + trace_pci_nvme_set_descriptor_extension(slba, zone_idx); + if (all || !ns->params.zd_extension_size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + zd_ext = nvme_get_zd_extension(ns, zone_idx); + status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req); + if (status) { + trace_pci_nvme_err_zd_extension_map_error(zone_idx); + return status; + } + + status = nvme_set_zd_ext(ns, zone); + if (status == NVME_SUCCESS) { + trace_pci_nvme_zd_extension_set(zone_idx); + return status; + } + break; + + default: + trace_pci_nvme_err_invalid_mgmt_action(action); + status = NVME_INVALID_FIELD; + } + + if (status == NVME_ZONE_INVAL_TRANSITION) { + trace_pci_nvme_err_invalid_zone_state_transition(action, slba, + zone->d.za); + } + if (status) { + status |= NVME_DNR; + } + + return status; +} + +static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl) +{ + NvmeZoneState zs = nvme_get_zone_state(zl); + + switch (zafs) { + case NVME_ZONE_REPORT_ALL: + return true; + case NVME_ZONE_REPORT_EMPTY: + return zs == NVME_ZONE_STATE_EMPTY; + case NVME_ZONE_REPORT_IMPLICITLY_OPEN: + return zs == NVME_ZONE_STATE_IMPLICITLY_OPEN; + case NVME_ZONE_REPORT_EXPLICITLY_OPEN: + return zs == NVME_ZONE_STATE_EXPLICITLY_OPEN; + case NVME_ZONE_REPORT_CLOSED: + return zs == NVME_ZONE_STATE_CLOSED; + case NVME_ZONE_REPORT_FULL: + return zs == NVME_ZONE_STATE_FULL; + case NVME_ZONE_REPORT_READ_ONLY: + return zs == NVME_ZONE_STATE_READ_ONLY; + case NVME_ZONE_REPORT_OFFLINE: + return zs == NVME_ZONE_STATE_OFFLINE; + default: + return false; + } +} + +static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = (NvmeCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + /* cdw12 is zero-based number of dwords to return. Convert to bytes */ + uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2; + uint32_t dw13 = le32_to_cpu(cmd->cdw13); + uint32_t zone_idx, zra, zrasf, partial; + uint64_t max_zones, nr_zones = 0; + uint16_t status; + uint64_t slba; + NvmeZoneDescr *z; + NvmeZone *zone; + NvmeZoneReportHeader *header; + void *buf, *buf_p; + size_t zone_entry_sz; + int i; + + req->status = NVME_SUCCESS; + + status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx); + if (status) { + return status; + } + + zra = dw13 & 0xff; + if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + zrasf = (dw13 >> 8) & 0xff; + if (zrasf > NVME_ZONE_REPORT_OFFLINE) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (data_size < sizeof(NvmeZoneReportHeader)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + status = nvme_check_mdts(n, data_size); + if (status) { + return status; + } + + partial = (dw13 >> 16) & 0x01; + + zone_entry_sz = sizeof(NvmeZoneDescr); + if (zra == NVME_ZONE_REPORT_EXTENDED) { + zone_entry_sz += ns->params.zd_extension_size; + } + + max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz; + buf = g_malloc0(data_size); + + zone = &ns->zone_array[zone_idx]; + for (i = zone_idx; i < ns->num_zones; i++) { + if (partial && nr_zones >= max_zones) { + break; + } + if (nvme_zone_matches_filter(zrasf, zone++)) { + nr_zones++; + } + } + header = (NvmeZoneReportHeader *)buf; + header->nr_zones = cpu_to_le64(nr_zones); + + buf_p = buf + sizeof(NvmeZoneReportHeader); + for (; zone_idx < ns->num_zones && max_zones > 0; zone_idx++) { + zone = &ns->zone_array[zone_idx]; + if (nvme_zone_matches_filter(zrasf, zone)) { + z = (NvmeZoneDescr *)buf_p; + buf_p += sizeof(NvmeZoneDescr); + + z->zt = zone->d.zt; + z->zs = zone->d.zs; + z->zcap = cpu_to_le64(zone->d.zcap); + z->zslba = cpu_to_le64(zone->d.zslba); + z->za = zone->d.za; + + if (nvme_wp_is_valid(zone)) { + z->wp = cpu_to_le64(zone->d.wp); + } else { + z->wp = cpu_to_le64(~0ULL); + } + + if (zra == NVME_ZONE_REPORT_EXTENDED) { + if (zone->d.za & NVME_ZA_ZD_EXT_VALID) { + memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx), + ns->params.zd_extension_size); + } + buf_p += ns->params.zd_extension_size; + } + + max_zones--; + } + } + + status = nvme_c2h(n, (uint8_t *)buf, data_size, req); + + g_free(buf); + + return status; +} + +static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + + trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), + req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode)); + + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + /* + * In the base NVM command set, Flush may apply to all namespaces + * (indicated by NSID being set to FFFFFFFFh). But if that feature is used + * along with TP 4056 (Namespace Types), it may be pretty screwed up. + * + * If NSID is indeed set to FFFFFFFFh, we simply cannot associate the + * opcode with a specific command since we cannot determine a unique I/O + * command set. Opcode 0h could have any other meaning than something + * equivalent to flushing and say it DOES have completely different + * semantics in some other command set - does an NSID of FFFFFFFFh then + * mean "for all namespaces, apply whatever command set specific command + * that uses the 0h opcode?" Or does it mean "for all namespaces, apply + * whatever command that uses the 0h opcode if, and only if, it allows NSID + * to be FFFFFFFFh"? + * + * Anyway (and luckily), for now, we do not care about this since the + * device only supports namespace types that includes the NVM Flush command + * (NVM and Zoned), so always do an NVM Flush. + */ + if (req->cmd.opcode == NVME_CMD_FLUSH) { + return nvme_flush(n, req); + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + if (ns->status) { + return ns->status; + } + + if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) { + return NVME_INVALID_FIELD; + } + + req->ns = ns; + + switch (req->cmd.opcode) { + case NVME_CMD_WRITE_ZEROES: + return nvme_write_zeroes(n, req); + case NVME_CMD_ZONE_APPEND: + return nvme_zone_append(n, req); + case NVME_CMD_WRITE: + return nvme_write(n, req); + case NVME_CMD_READ: + return nvme_read(n, req); + case NVME_CMD_COMPARE: + return nvme_compare(n, req); + case NVME_CMD_DSM: + return nvme_dsm(n, req); + case NVME_CMD_VERIFY: + return nvme_verify(n, req); + case NVME_CMD_COPY: + return nvme_copy(n, req); + case NVME_CMD_ZONE_MGMT_SEND: + return nvme_zone_mgmt_send(n, req); + case NVME_CMD_ZONE_MGMT_RECV: + return nvme_zone_mgmt_recv(n, req); + default: + assert(false); + } + + return NVME_INVALID_OPCODE | NVME_DNR; +} + +static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) +{ + n->sq[sq->sqid] = NULL; + timer_free(sq->timer); + g_free(sq->io_req); + if (sq->sqid) { + g_free(sq); + } +} + +static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd; + NvmeRequest *r, *next; + NvmeSQueue *sq; + NvmeCQueue *cq; + uint16_t qid = le16_to_cpu(c->qid); + + if (unlikely(!qid || nvme_check_sqid(n, qid))) { + trace_pci_nvme_err_invalid_del_sq(qid); + return NVME_INVALID_QID | NVME_DNR; + } + + trace_pci_nvme_del_sq(qid); + + sq = n->sq[qid]; + while (!QTAILQ_EMPTY(&sq->out_req_list)) { + r = QTAILQ_FIRST(&sq->out_req_list); + assert(r->aiocb); + blk_aio_cancel(r->aiocb); + } + + assert(QTAILQ_EMPTY(&sq->out_req_list)); + + if (!nvme_check_cqid(n, sq->cqid)) { + cq = n->cq[sq->cqid]; + QTAILQ_REMOVE(&cq->sq_list, sq, entry); + + nvme_post_cqes(cq); + QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) { + if (r->sq == sq) { + QTAILQ_REMOVE(&cq->req_list, r, entry); + QTAILQ_INSERT_TAIL(&sq->req_list, r, entry); + } + } + } + + nvme_free_sq(sq, n); + return NVME_SUCCESS; +} + +static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, + uint16_t sqid, uint16_t cqid, uint16_t size) +{ + int i; + NvmeCQueue *cq; + + sq->ctrl = n; + sq->dma_addr = dma_addr; + sq->sqid = sqid; + sq->size = size; + sq->cqid = cqid; + sq->head = sq->tail = 0; + sq->io_req = g_new0(NvmeRequest, sq->size); + + QTAILQ_INIT(&sq->req_list); + QTAILQ_INIT(&sq->out_req_list); + for (i = 0; i < sq->size; i++) { + sq->io_req[i].sq = sq; + QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); + } + sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); + + assert(n->cq[cqid]); + cq = n->cq[cqid]; + QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry); + n->sq[sqid] = sq; +} + +static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeSQueue *sq; + NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd; + + uint16_t cqid = le16_to_cpu(c->cqid); + uint16_t sqid = le16_to_cpu(c->sqid); + uint16_t qsize = le16_to_cpu(c->qsize); + uint16_t qflags = le16_to_cpu(c->sq_flags); + uint64_t prp1 = le64_to_cpu(c->prp1); + + trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); + + if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { + trace_pci_nvme_err_invalid_create_sq_cqid(cqid); + return NVME_INVALID_CQID | NVME_DNR; + } + if (unlikely(!sqid || sqid > n->params.max_ioqpairs || + n->sq[sqid] != NULL)) { + trace_pci_nvme_err_invalid_create_sq_sqid(sqid); + return NVME_INVALID_QID | NVME_DNR; + } + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) { + trace_pci_nvme_err_invalid_create_sq_size(qsize); + return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; + } + if (unlikely(prp1 & (n->page_size - 1))) { + trace_pci_nvme_err_invalid_create_sq_addr(prp1); + return NVME_INVALID_PRP_OFFSET | NVME_DNR; + } + if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { + trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); + return NVME_INVALID_FIELD | NVME_DNR; + } + sq = g_malloc0(sizeof(*sq)); + nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1); + return NVME_SUCCESS; +} + +struct nvme_stats { + uint64_t units_read; + uint64_t units_written; + uint64_t read_commands; + uint64_t write_commands; +}; + +static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) +{ + BlockAcctStats *s = blk_get_stats(ns->blkconf.blk); + + stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS; + stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS; + stats->read_commands += s->nr_ops[BLOCK_ACCT_READ]; + stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; +} + +static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + struct nvme_stats stats = { 0 }; + NvmeSmartLog smart = { 0 }; + uint32_t trans_len; + NvmeNamespace *ns; + time_t current_ms; + + if (off >= sizeof(smart)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (nsid != 0xffffffff) { + ns = nvme_ns(n, nsid); + if (!ns) { + return NVME_INVALID_NSID | NVME_DNR; + } + nvme_set_blk_stats(ns, &stats); + } else { + int i; + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + nvme_set_blk_stats(ns, &stats); + } + } + + trans_len = MIN(sizeof(smart) - off, buf_len); + smart.critical_warning = n->smart_critical_warning; + + smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read, + 1000)); + smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written, + 1000)); + smart.host_read_commands[0] = cpu_to_le64(stats.read_commands); + smart.host_write_commands[0] = cpu_to_le64(stats.write_commands); + + smart.temperature = cpu_to_le16(n->temperature); + + if ((n->temperature >= n->features.temp_thresh_hi) || + (n->temperature <= n->features.temp_thresh_low)) { + smart.critical_warning |= NVME_SMART_TEMPERATURE; + } + + current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + smart.power_on_hours[0] = + cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + + return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req); +} + +static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + uint32_t trans_len; + NvmeFwSlotInfoLog fw_log = { + .afi = 0x1, + }; + + if (off >= sizeof(fw_log)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' '); + trans_len = MIN(sizeof(fw_log) - off, buf_len); + + return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req); +} + +static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t trans_len; + NvmeErrorLog errlog; + + if (off >= sizeof(errlog)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_ERROR); + } + + memset(&errlog, 0x0, sizeof(errlog)); + trans_len = MIN(sizeof(errlog) - off, buf_len); + + return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req); +} + +static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t nslist[1024]; + uint32_t trans_len; + int i = 0; + uint32_t nsid; + + if (off >= sizeof(nslist)) { + trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist)); + return NVME_INVALID_FIELD | NVME_DNR; + } + + memset(nslist, 0x0, sizeof(nslist)); + trans_len = MIN(sizeof(nslist) - off, buf_len); + + while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != + NVME_CHANGED_NSID_SIZE) { + /* + * If more than 1024 namespaces, the first entry in the log page should + * be set to FFFFFFFFh and the others to 0 as spec. + */ + if (i == ARRAY_SIZE(nslist)) { + memset(nslist, 0x0, sizeof(nslist)); + nslist[0] = 0xffffffff; + break; + } + + nslist[i++] = nsid; + clear_bit(nsid, n->changed_nsids); + } + + /* + * Remove all the remaining list entries in case returns directly due to + * more than 1024 namespaces. + */ + if (nslist[0] == 0xffffffff) { + bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE); + } + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_NOTICE); + } + + return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req); +} + +static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + NvmeEffectsLog log = {}; + const uint32_t *src_iocs = NULL; + uint32_t trans_len; + + if (off >= sizeof(log)) { + trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(log)); + return NVME_INVALID_FIELD | NVME_DNR; + } + + switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) { + case NVME_CC_CSS_NVM: + src_iocs = nvme_cse_iocs_nvm; + /* fall through */ + case NVME_CC_CSS_ADMIN_ONLY: + break; + case NVME_CC_CSS_CSI: + switch (csi) { + case NVME_CSI_NVM: + src_iocs = nvme_cse_iocs_nvm; + break; + case NVME_CSI_ZONED: + src_iocs = nvme_cse_iocs_zoned; + break; + } + } + + memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs)); + + if (src_iocs) { + memcpy(log.iocs, src_iocs, sizeof(log.iocs)); + } + + trans_len = MIN(sizeof(log) - off, buf_len); + + return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); +} + +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t dw12 = le32_to_cpu(cmd->cdw12); + uint32_t dw13 = le32_to_cpu(cmd->cdw13); + uint8_t lid = dw10 & 0xff; + uint8_t lsp = (dw10 >> 8) & 0xf; + uint8_t rae = (dw10 >> 15) & 0x1; + uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24; + uint32_t numdl, numdu; + uint64_t off, lpol, lpou; + size_t len; + uint16_t status; + + numdl = (dw10 >> 16); + numdu = (dw11 & 0xffff); + lpol = dw12; + lpou = dw13; + + len = (((numdu << 16) | numdl) + 1) << 2; + off = (lpou << 32ULL) | lpol; + + if (off & 0x3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off); + + status = nvme_check_mdts(n, len); + if (status) { + return status; + } + + switch (lid) { + case NVME_LOG_ERROR_INFO: + return nvme_error_info(n, rae, len, off, req); + case NVME_LOG_SMART_INFO: + return nvme_smart_info(n, rae, len, off, req); + case NVME_LOG_FW_SLOT_INFO: + return nvme_fw_log_info(n, len, off, req); + case NVME_LOG_CHANGED_NSLIST: + return nvme_changed_nslist(n, rae, len, off, req); + case NVME_LOG_CMD_EFFECTS: + return nvme_cmd_effects(n, csi, len, off, req); + default: + trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); + return NVME_INVALID_FIELD | NVME_DNR; + } +} + +static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) +{ + n->cq[cq->cqid] = NULL; + timer_free(cq->timer); + if (msix_enabled(&n->parent_obj)) { + msix_vector_unuse(&n->parent_obj, cq->vector); + } + if (cq->cqid) { + g_free(cq); + } +} + +static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd; + NvmeCQueue *cq; + uint16_t qid = le16_to_cpu(c->qid); + + if (unlikely(!qid || nvme_check_cqid(n, qid))) { + trace_pci_nvme_err_invalid_del_cq_cqid(qid); + return NVME_INVALID_CQID | NVME_DNR; + } + + cq = n->cq[qid]; + if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { + trace_pci_nvme_err_invalid_del_cq_notempty(qid); + return NVME_INVALID_QUEUE_DEL; + } + + if (cq->irq_enabled && cq->tail != cq->head) { + n->cq_pending--; + } + + nvme_irq_deassert(n, cq); + trace_pci_nvme_del_cq(qid); + nvme_free_cq(cq, n); + return NVME_SUCCESS; +} + +static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, + uint16_t cqid, uint16_t vector, uint16_t size, + uint16_t irq_enabled) +{ + int ret; + + if (msix_enabled(&n->parent_obj)) { + ret = msix_vector_use(&n->parent_obj, vector); + assert(ret == 0); + } + cq->ctrl = n; + cq->cqid = cqid; + cq->size = size; + cq->dma_addr = dma_addr; + cq->phase = 1; + cq->irq_enabled = irq_enabled; + cq->vector = vector; + cq->head = cq->tail = 0; + QTAILQ_INIT(&cq->req_list); + QTAILQ_INIT(&cq->sq_list); + n->cq[cqid] = cq; + cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); +} + +static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCQueue *cq; + NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd; + uint16_t cqid = le16_to_cpu(c->cqid); + uint16_t vector = le16_to_cpu(c->irq_vector); + uint16_t qsize = le16_to_cpu(c->qsize); + uint16_t qflags = le16_to_cpu(c->cq_flags); + uint64_t prp1 = le64_to_cpu(c->prp1); + + trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags, + NVME_CQ_FLAGS_IEN(qflags) != 0); + + if (unlikely(!cqid || cqid > n->params.max_ioqpairs || + n->cq[cqid] != NULL)) { + trace_pci_nvme_err_invalid_create_cq_cqid(cqid); + return NVME_INVALID_QID | NVME_DNR; + } + if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) { + trace_pci_nvme_err_invalid_create_cq_size(qsize); + return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; + } + if (unlikely(prp1 & (n->page_size - 1))) { + trace_pci_nvme_err_invalid_create_cq_addr(prp1); + return NVME_INVALID_PRP_OFFSET | NVME_DNR; + } + if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { + trace_pci_nvme_err_invalid_create_cq_vector(vector); + return NVME_INVALID_IRQ_VECTOR | NVME_DNR; + } + if (unlikely(vector >= n->params.msix_qsize)) { + trace_pci_nvme_err_invalid_create_cq_vector(vector); + return NVME_INVALID_IRQ_VECTOR | NVME_DNR; + } + if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { + trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); + return NVME_INVALID_FIELD | NVME_DNR; + } + + cq = g_malloc0(sizeof(*cq)); + nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1, + NVME_CQ_FLAGS_IEN(qflags)); + + /* + * It is only required to set qs_created when creating a completion queue; + * creating a submission queue without a matching completion queue will + * fail. + */ + n->qs_created = true; + return NVME_SUCCESS; +} + +static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req) +{ + uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {}; + + return nvme_c2h(n, id, sizeof(id), req); +} + +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req) +{ + trace_pci_nvme_identify_ctrl(); + + return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req); +} + +static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {}; + NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id; + + trace_pci_nvme_identify_ctrl_csi(c->csi); + + switch (c->csi) { + case NVME_CSI_NVM: + id_nvm->vsl = n->params.vsl; + id_nvm->dmrsl = cpu_to_le32(n->dmrsl); + break; + + case NVME_CSI_ZONED: + ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl; + break; + + default: + return NVME_INVALID_FIELD | NVME_DNR; + } + + return nvme_c2h(n, id, sizeof(id), req); +} + +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) +{ + NvmeNamespace *ns; + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t nsid = le32_to_cpu(c->nsid); + + trace_pci_nvme_identify_ns(nsid); + + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + if (!active) { + ns = nvme_subsys_ns(n->subsys, nsid); + if (!ns) { + return nvme_rpt_empty_id_struct(n, req); + } + } else { + return nvme_rpt_empty_id_struct(n, req); + } + } + + if (active || ns->csi == NVME_CSI_NVM) { + return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req); + } + + return NVME_INVALID_CMD_SET | NVME_DNR; +} + +static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, + bool attached) +{ + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t nsid = le32_to_cpu(c->nsid); + uint16_t min_id = le16_to_cpu(c->ctrlid); + uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; + uint16_t *ids = &list[1]; + NvmeNamespace *ns; + NvmeCtrl *ctrl; + int cntlid, nr_ids = 0; + + trace_pci_nvme_identify_ctrl_list(c->cns, min_id); + + if (!n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (attached) { + if (nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ns = nvme_subsys_ns(n->subsys, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { + ctrl = nvme_subsys_ctrl(n->subsys, cntlid); + if (!ctrl) { + continue; + } + + if (attached && !nvme_ns(ctrl, nsid)) { + continue; + } + + ids[nr_ids++] = cntlid; + } + + list[0] = nr_ids; + + return nvme_c2h(n, (uint8_t *)list, sizeof(list), req); +} + +static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, + bool active) +{ + NvmeNamespace *ns; + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t nsid = le32_to_cpu(c->nsid); + + trace_pci_nvme_identify_ns_csi(nsid, c->csi); + + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + if (!active) { + ns = nvme_subsys_ns(n->subsys, nsid); + if (!ns) { + return nvme_rpt_empty_id_struct(n, req); + } + } else { + return nvme_rpt_empty_id_struct(n, req); + } + } + + if (c->csi == NVME_CSI_NVM) { + return nvme_rpt_empty_id_struct(n, req); + } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) { + return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned), + req); + } + + return NVME_INVALID_FIELD | NVME_DNR; +} + +static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, + bool active) +{ + NvmeNamespace *ns; + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t min_nsid = le32_to_cpu(c->nsid); + uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; + static const int data_len = sizeof(list); + uint32_t *list_ptr = (uint32_t *)list; + int i, j = 0; + + trace_pci_nvme_identify_nslist(min_nsid); + + /* + * Both FFFFFFFFh (NVME_NSID_BROADCAST) and FFFFFFFFEh are invalid values + * since the Active Namespace ID List should return namespaces with ids + * *higher* than the NSID specified in the command. This is also specified + * in the spec (NVM Express v1.3d, Section 5.15.4). + */ + if (min_nsid >= NVME_NSID_BROADCAST - 1) { + return NVME_INVALID_NSID | NVME_DNR; + } + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + if (!active) { + ns = nvme_subsys_ns(n->subsys, i); + if (!ns) { + continue; + } + } else { + continue; + } + } + if (ns->params.nsid <= min_nsid) { + continue; + } + list_ptr[j++] = cpu_to_le32(ns->params.nsid); + if (j == data_len / sizeof(uint32_t)) { + break; + } + } + + return nvme_c2h(n, list, data_len, req); +} + +static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, + bool active) +{ + NvmeNamespace *ns; + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t min_nsid = le32_to_cpu(c->nsid); + uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; + static const int data_len = sizeof(list); + uint32_t *list_ptr = (uint32_t *)list; + int i, j = 0; + + trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi); + + /* + * Same as in nvme_identify_nslist(), FFFFFFFFh/FFFFFFFFEh are invalid. + */ + if (min_nsid >= NVME_NSID_BROADCAST - 1) { + return NVME_INVALID_NSID | NVME_DNR; + } + + if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + if (!active) { + ns = nvme_subsys_ns(n->subsys, i); + if (!ns) { + continue; + } + } else { + continue; + } + } + if (ns->params.nsid <= min_nsid || c->csi != ns->csi) { + continue; + } + list_ptr[j++] = cpu_to_le32(ns->params.nsid); + if (j == data_len / sizeof(uint32_t)) { + break; + } + } + + return nvme_c2h(n, list, data_len, req); +} + +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t nsid = le32_to_cpu(c->nsid); + uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; + uint8_t *pos = list; + struct { + NvmeIdNsDescr hdr; + uint8_t v[NVME_NIDL_UUID]; + } QEMU_PACKED uuid = {}; + struct { + NvmeIdNsDescr hdr; + uint64_t v; + } QEMU_PACKED eui64 = {}; + struct { + NvmeIdNsDescr hdr; + uint8_t v; + } QEMU_PACKED csi = {}; + + trace_pci_nvme_identify_ns_descr_list(nsid); + + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + /* + * If the EUI-64 field is 0 and the NGUID field is 0, the namespace must + * provide a valid Namespace UUID in the Namespace Identification Descriptor + * data structure. QEMU does not yet support setting NGUID. + */ + uuid.hdr.nidt = NVME_NIDT_UUID; + uuid.hdr.nidl = NVME_NIDL_UUID; + memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID); + memcpy(pos, &uuid, sizeof(uuid)); + pos += sizeof(uuid); + + if (ns->params.eui64) { + eui64.hdr.nidt = NVME_NIDT_EUI64; + eui64.hdr.nidl = NVME_NIDL_EUI64; + eui64.v = cpu_to_be64(ns->params.eui64); + memcpy(pos, &eui64, sizeof(eui64)); + pos += sizeof(eui64); + } + + csi.hdr.nidt = NVME_NIDT_CSI; + csi.hdr.nidl = NVME_NIDL_CSI; + csi.v = ns->csi; + memcpy(pos, &csi, sizeof(csi)); + pos += sizeof(csi); + + return nvme_c2h(n, list, sizeof(list), req); +} + +static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req) +{ + uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; + static const int data_len = sizeof(list); + + trace_pci_nvme_identify_cmd_set(); + + NVME_SET_CSI(*list, NVME_CSI_NVM); + NVME_SET_CSI(*list, NVME_CSI_ZONED); + + return nvme_c2h(n, list, data_len, req); +} + +static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + + trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid), + c->csi); + + switch (c->cns) { + case NVME_ID_CNS_NS: + return nvme_identify_ns(n, req, true); + case NVME_ID_CNS_NS_PRESENT: + return nvme_identify_ns(n, req, false); + case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: + return nvme_identify_ctrl_list(n, req, true); + case NVME_ID_CNS_CTRL_LIST: + return nvme_identify_ctrl_list(n, req, false); + case NVME_ID_CNS_CS_NS: + return nvme_identify_ns_csi(n, req, true); + case NVME_ID_CNS_CS_NS_PRESENT: + return nvme_identify_ns_csi(n, req, false); + case NVME_ID_CNS_CTRL: + return nvme_identify_ctrl(n, req); + case NVME_ID_CNS_CS_CTRL: + return nvme_identify_ctrl_csi(n, req); + case NVME_ID_CNS_NS_ACTIVE_LIST: + return nvme_identify_nslist(n, req, true); + case NVME_ID_CNS_NS_PRESENT_LIST: + return nvme_identify_nslist(n, req, false); + case NVME_ID_CNS_CS_NS_ACTIVE_LIST: + return nvme_identify_nslist_csi(n, req, true); + case NVME_ID_CNS_CS_NS_PRESENT_LIST: + return nvme_identify_nslist_csi(n, req, false); + case NVME_ID_CNS_NS_DESCR_LIST: + return nvme_identify_ns_descr_list(n, req); + case NVME_ID_CNS_IO_COMMAND_SET: + return nvme_identify_cmd_set(n, req); + default: + trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); + return NVME_INVALID_FIELD | NVME_DNR; + } +} + +static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req) +{ + uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff; + + req->cqe.result = 1; + if (nvme_check_sqid(n, sqid)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts) +{ + trace_pci_nvme_setfeat_timestamp(ts); + + n->host_timestamp = le64_to_cpu(ts); + n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); +} + +static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) +{ + uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms; + + union nvme_timestamp { + struct { + uint64_t timestamp:48; + uint64_t sync:1; + uint64_t origin:3; + uint64_t rsvd1:12; + }; + uint64_t all; + }; + + union nvme_timestamp ts; + ts.all = 0; + ts.timestamp = n->host_timestamp + elapsed_time; + + /* If the host timestamp is non-zero, set the timestamp origin */ + ts.origin = n->host_timestamp ? 0x01 : 0x00; + + trace_pci_nvme_getfeat_timestamp(ts.all); + + return cpu_to_le64(ts.all); +} + +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) +{ + uint64_t timestamp = nvme_get_timestamp(n); + + return nvme_c2h(n, (uint8_t *)×tamp, sizeof(timestamp), req); +} + +static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t nsid = le32_to_cpu(cmd->nsid); + uint32_t result; + uint8_t fid = NVME_GETSETFEAT_FID(dw10); + NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10); + uint16_t iv; + NvmeNamespace *ns; + int i; + + static const uint32_t nvme_feature_default[NVME_FID_MAX] = { + [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT, + }; + + trace_pci_nvme_getfeat(nvme_cid(req), nsid, fid, sel, dw11); + + if (!nvme_feature_support[fid]) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) { + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { + /* + * The Reservation Notification Mask and Reservation Persistence + * features require a status code of Invalid Field in Command when + * NSID is FFFFFFFFh. Since the device does not support those + * features we can always return Invalid Namespace or Format as we + * should do for all other features. + */ + return NVME_INVALID_NSID | NVME_DNR; + } + + if (!nvme_ns(n, nsid)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + switch (sel) { + case NVME_GETFEAT_SELECT_CURRENT: + break; + case NVME_GETFEAT_SELECT_SAVED: + /* no features are saveable by the controller; fallthrough */ + case NVME_GETFEAT_SELECT_DEFAULT: + goto defaults; + case NVME_GETFEAT_SELECT_CAP: + result = nvme_feature_cap[fid]; + goto out; + } + + switch (fid) { + case NVME_TEMPERATURE_THRESHOLD: + result = 0; + + /* + * The controller only implements the Composite Temperature sensor, so + * return 0 for all other sensors. + */ + if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) { + goto out; + } + + switch (NVME_TEMP_THSEL(dw11)) { + case NVME_TEMP_THSEL_OVER: + result = n->features.temp_thresh_hi; + goto out; + case NVME_TEMP_THSEL_UNDER: + result = n->features.temp_thresh_low; + goto out; + } + + return NVME_INVALID_FIELD | NVME_DNR; + case NVME_ERROR_RECOVERY: + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + result = ns->features.err_rec; + goto out; + case NVME_VOLATILE_WRITE_CACHE: + result = 0; + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + result = blk_enable_write_cache(ns->blkconf.blk); + if (result) { + break; + } + } + trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); + goto out; + case NVME_ASYNCHRONOUS_EVENT_CONF: + result = n->features.async_config; + goto out; + case NVME_TIMESTAMP: + return nvme_get_feature_timestamp(n, req); + default: + break; + } + +defaults: + switch (fid) { + case NVME_TEMPERATURE_THRESHOLD: + result = 0; + + if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) { + break; + } + + if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) { + result = NVME_TEMPERATURE_WARNING; + } + + break; + case NVME_NUMBER_OF_QUEUES: + result = (n->params.max_ioqpairs - 1) | + ((n->params.max_ioqpairs - 1) << 16); + trace_pci_nvme_getfeat_numq(result); + break; + case NVME_INTERRUPT_VECTOR_CONF: + iv = dw11 & 0xffff; + if (iv >= n->params.max_ioqpairs + 1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + result = iv; + if (iv == n->admin_cq.vector) { + result |= NVME_INTVC_NOCOALESCING; + } + break; + default: + result = nvme_feature_default[fid]; + break; + } + +out: + req->cqe.result = cpu_to_le32(result); + return NVME_SUCCESS; +} + +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) +{ + uint16_t ret; + uint64_t timestamp; + + ret = nvme_h2c(n, (uint8_t *)×tamp, sizeof(timestamp), req); + if (ret) { + return ret; + } + + nvme_set_timestamp(n, timestamp); + + return NVME_SUCCESS; +} + +static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns = NULL; + + NvmeCmd *cmd = &req->cmd; + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t nsid = le32_to_cpu(cmd->nsid); + uint8_t fid = NVME_GETSETFEAT_FID(dw10); + uint8_t save = NVME_SETFEAT_SAVE(dw10); + int i; + + trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11); + + if (save && !(nvme_feature_cap[fid] & NVME_FEAT_CAP_SAVE)) { + return NVME_FID_NOT_SAVEABLE | NVME_DNR; + } + + if (!nvme_feature_support[fid]) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) { + if (nsid != NVME_NSID_BROADCAST) { + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + } + } else if (nsid && nsid != NVME_NSID_BROADCAST) { + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + return NVME_FEAT_NOT_NS_SPEC | NVME_DNR; + } + + if (!(nvme_feature_cap[fid] & NVME_FEAT_CAP_CHANGE)) { + return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR; + } + + switch (fid) { + case NVME_TEMPERATURE_THRESHOLD: + if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) { + break; + } + + switch (NVME_TEMP_THSEL(dw11)) { + case NVME_TEMP_THSEL_OVER: + n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11); + break; + case NVME_TEMP_THSEL_UNDER: + n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11); + break; + default: + return NVME_INVALID_FIELD | NVME_DNR; + } + + if ((n->temperature >= n->features.temp_thresh_hi) || + (n->temperature <= n->features.temp_thresh_low)) { + nvme_smart_event(n, NVME_AER_INFO_SMART_TEMP_THRESH); + } + + break; + case NVME_ERROR_RECOVERY: + if (nsid == NVME_NSID_BROADCAST) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + + if (!ns) { + continue; + } + + if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) { + ns->features.err_rec = dw11; + } + } + + break; + } + + assert(ns); + if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) { + ns->features.err_rec = dw11; + } + break; + case NVME_VOLATILE_WRITE_CACHE: + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + if (!(dw11 & 0x1) && blk_enable_write_cache(ns->blkconf.blk)) { + blk_flush(ns->blkconf.blk); + } + + blk_set_enable_write_cache(ns->blkconf.blk, dw11 & 1); + } + + break; + + case NVME_NUMBER_OF_QUEUES: + if (n->qs_created) { + return NVME_CMD_SEQ_ERROR | NVME_DNR; + } + + /* + * NVMe v1.3, Section 5.21.1.7: FFFFh is not an allowed value for NCQR + * and NSQR. + */ + if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1, + ((dw11 >> 16) & 0xffff) + 1, + n->params.max_ioqpairs, + n->params.max_ioqpairs); + req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | + ((n->params.max_ioqpairs - 1) << 16)); + break; + case NVME_ASYNCHRONOUS_EVENT_CONF: + n->features.async_config = dw11; + break; + case NVME_TIMESTAMP: + return nvme_set_feature_timestamp(n, req); + case NVME_COMMAND_SET_PROFILE: + if (dw11 & 0x1ff) { + trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff); + return NVME_CMD_SET_CMB_REJECTED | NVME_DNR; + } + break; + default: + return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR; + } + return NVME_SUCCESS; +} + +static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) +{ + trace_pci_nvme_aer(nvme_cid(req)); + + if (n->outstanding_aers > n->params.aerl) { + trace_pci_nvme_aer_aerl_exceeded(); + return NVME_AER_LIMIT_EXCEEDED; + } + + n->aer_reqs[n->outstanding_aers] = req; + n->outstanding_aers++; + + if (!QTAILQ_EMPTY(&n->aer_queue)) { + nvme_process_aers(n); + } + + return NVME_NO_COMPLETE; +} + +static void nvme_update_dmrsl(NvmeCtrl *n) +{ + int nsid; + + for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { + NvmeNamespace *ns = nvme_ns(n, nsid); + if (!ns) { + continue; + } + + n->dmrsl = MIN_NON_ZERO(n->dmrsl, + BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); + } +} + +static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ + uint32_t cc = ldl_le_p(&n->bar.cc); + + ns->iocs = nvme_cse_iocs_none; + switch (ns->csi) { + case NVME_CSI_NVM: + if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) { + ns->iocs = nvme_cse_iocs_nvm; + } + break; + case NVME_CSI_ZONED: + if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) { + ns->iocs = nvme_cse_iocs_zoned; + } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) { + ns->iocs = nvme_cse_iocs_nvm; + } + break; + } +} + +static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + NvmeCtrl *ctrl; + uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint8_t sel = dw10 & 0xf; + uint16_t *nr_ids = &list[0]; + uint16_t *ids = &list[1]; + uint16_t ret; + int i; + + trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf); + + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_subsys_ns(n->subsys, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_h2c(n, (uint8_t *)list, 4096, req); + if (ret) { + return ret; + } + + if (!*nr_ids) { + return NVME_NS_CTRL_LIST_INVALID | NVME_DNR; + } + + *nr_ids = MIN(*nr_ids, NVME_CONTROLLER_LIST_SIZE - 1); + for (i = 0; i < *nr_ids; i++) { + ctrl = nvme_subsys_ctrl(n->subsys, ids[i]); + if (!ctrl) { + return NVME_NS_CTRL_LIST_INVALID | NVME_DNR; + } + + switch (sel) { + case NVME_NS_ATTACHMENT_ATTACH: + if (nvme_ns(ctrl, nsid)) { + return NVME_NS_ALREADY_ATTACHED | NVME_DNR; + } + + if (ns->attached && !ns->params.shared) { + return NVME_NS_PRIVATE | NVME_DNR; + } + + nvme_attach_ns(ctrl, ns); + nvme_select_iocs_ns(ctrl, ns); + + break; + + case NVME_NS_ATTACHMENT_DETACH: + if (!nvme_ns(ctrl, nsid)) { + return NVME_NS_NOT_ATTACHED | NVME_DNR; + } + + ctrl->namespaces[nsid] = NULL; + ns->attached--; + + nvme_update_dmrsl(ctrl); + + break; + + default: + return NVME_INVALID_FIELD | NVME_DNR; + } + + /* + * Add namespace id to the changed namespace id list for event clearing + * via Get Log Page command. + */ + if (!test_and_set_bit(nsid, ctrl->changed_nsids)) { + nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE, + NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED, + NVME_LOG_CHANGED_NSLIST); + } + } + + return NVME_SUCCESS; +} + +typedef struct NvmeFormatAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + QEMUBH *bh; + NvmeRequest *req; + int ret; + + NvmeNamespace *ns; + uint32_t nsid; + bool broadcast; + int64_t offset; +} NvmeFormatAIOCB; + +static void nvme_format_bh(void *opaque); + +static void nvme_format_cancel(BlockAIOCB *aiocb) +{ + NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common); + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + } +} + +static const AIOCBInfo nvme_format_aiocb_info = { + .aiocb_size = sizeof(NvmeFormatAIOCB), + .cancel_async = nvme_format_cancel, + .get_aio_context = nvme_get_aio_context, +}; + +static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint8_t lbaf = dw10 & 0xf; + uint8_t pi = (dw10 >> 5) & 0x7; + uint8_t mset = (dw10 >> 4) & 0x1; + uint8_t pil = (dw10 >> 8) & 0x1; + + trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil); + + ns->id_ns.dps = (pil << 3) | pi; + ns->id_ns.flbas = lbaf | (mset << 4); + + nvme_ns_init_format(ns); +} + +static void nvme_format_ns_cb(void *opaque, int ret) +{ + NvmeFormatAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = iocb->ns; + int bytes; + + if (ret < 0) { + iocb->ret = ret; + goto done; + } + + assert(ns); + + if (iocb->offset < ns->size) { + bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset); + + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset, + bytes, BDRV_REQ_MAY_UNMAP, + nvme_format_ns_cb, iocb); + + iocb->offset += bytes; + return; + } + + nvme_format_set(ns, &req->cmd); + ns->status = 0x0; + iocb->ns = NULL; + iocb->offset = 0; + +done: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} + +static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi) +{ + if (ns->params.zoned) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + if (lbaf > ns->id_ns.nlbaf) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static void nvme_format_bh(void *opaque) +{ + NvmeFormatAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint8_t lbaf = dw10 & 0xf; + uint8_t pi = (dw10 >> 5) & 0x7; + uint16_t status; + int i; + + if (iocb->ret < 0) { + goto done; + } + + if (iocb->broadcast) { + for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) { + iocb->ns = nvme_ns(n, i); + if (iocb->ns) { + iocb->nsid = i; + break; + } + } + } + + if (!iocb->ns) { + goto done; + } + + status = nvme_format_check(iocb->ns, lbaf, pi); + if (status) { + req->status = status; + goto done; + } + + iocb->ns->status = NVME_FORMAT_IN_PROGRESS; + nvme_format_ns_cb(iocb, 0); + return; + +done: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_aio_unref(iocb); +} + +static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeFormatAIOCB *iocb; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; + + iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_format_bh, iocb); + iocb->ret = 0; + iocb->ns = NULL; + iocb->nsid = 0; + iocb->broadcast = (nsid == NVME_NSID_BROADCAST); + iocb->offset = 0; + + if (!iocb->broadcast) { + if (!nvme_nsid_valid(n, nsid)) { + status = NVME_INVALID_NSID | NVME_DNR; + goto out; + } + + iocb->ns = nvme_ns(n, nsid); + if (!iocb->ns) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + } + + req->aiocb = &iocb->common; + qemu_bh_schedule(iocb->bh); + + return NVME_NO_COMPLETE; + +out: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); + return status; +} + +static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) +{ + trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, + nvme_adm_opc_str(req->cmd.opcode)); + + if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + /* SGLs shall not be used for Admin commands in NVMe over PCIe */ + if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) { + return NVME_INVALID_FIELD; + } + + switch (req->cmd.opcode) { + case NVME_ADM_CMD_DELETE_SQ: + return nvme_del_sq(n, req); + case NVME_ADM_CMD_CREATE_SQ: + return nvme_create_sq(n, req); + case NVME_ADM_CMD_GET_LOG_PAGE: + return nvme_get_log(n, req); + case NVME_ADM_CMD_DELETE_CQ: + return nvme_del_cq(n, req); + case NVME_ADM_CMD_CREATE_CQ: + return nvme_create_cq(n, req); + case NVME_ADM_CMD_IDENTIFY: + return nvme_identify(n, req); + case NVME_ADM_CMD_ABORT: + return nvme_abort(n, req); + case NVME_ADM_CMD_SET_FEATURES: + return nvme_set_feature(n, req); + case NVME_ADM_CMD_GET_FEATURES: + return nvme_get_feature(n, req); + case NVME_ADM_CMD_ASYNC_EV_REQ: + return nvme_aer(n, req); + case NVME_ADM_CMD_NS_ATTACHMENT: + return nvme_ns_attachment(n, req); + case NVME_ADM_CMD_FORMAT_NVM: + return nvme_format(n, req); + default: + assert(false); + } + + return NVME_INVALID_OPCODE | NVME_DNR; +} + +static void nvme_process_sq(void *opaque) +{ + NvmeSQueue *sq = opaque; + NvmeCtrl *n = sq->ctrl; + NvmeCQueue *cq = n->cq[sq->cqid]; + + uint16_t status; + hwaddr addr; + NvmeCmd cmd; + NvmeRequest *req; + + while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { + addr = sq->dma_addr + sq->head * n->sqe_size; + if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) { + trace_pci_nvme_err_addr_read(addr); + trace_pci_nvme_err_cfs(); + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); + break; + } + nvme_inc_sq_head(sq); + + req = QTAILQ_FIRST(&sq->req_list); + QTAILQ_REMOVE(&sq->req_list, req, entry); + QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); + nvme_req_clear(req); + req->cqe.cid = cmd.cid; + memcpy(&req->cmd, &cmd, sizeof(NvmeCmd)); + + status = sq->sqid ? nvme_io_cmd(n, req) : + nvme_admin_cmd(n, req); + if (status != NVME_NO_COMPLETE) { + req->status = status; + nvme_enqueue_req_completion(cq, req); + } + } +} + +static void nvme_ctrl_reset(NvmeCtrl *n) +{ + NvmeNamespace *ns; + int i; + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + nvme_ns_drain(ns); + } + + for (i = 0; i < n->params.max_ioqpairs + 1; i++) { + if (n->sq[i] != NULL) { + nvme_free_sq(n->sq[i], n); + } + } + for (i = 0; i < n->params.max_ioqpairs + 1; i++) { + if (n->cq[i] != NULL) { + nvme_free_cq(n->cq[i], n); + } + } + + while (!QTAILQ_EMPTY(&n->aer_queue)) { + NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue); + QTAILQ_REMOVE(&n->aer_queue, event, entry); + g_free(event); + } + + n->aer_queued = 0; + n->outstanding_aers = 0; + n->qs_created = false; +} + +static void nvme_ctrl_shutdown(NvmeCtrl *n) +{ + NvmeNamespace *ns; + int i; + + if (n->pmr.dev) { + memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); + } + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + nvme_ns_shutdown(ns); + } +} + +static void nvme_select_iocs(NvmeCtrl *n) +{ + NvmeNamespace *ns; + int i; + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + nvme_select_iocs_ns(n, ns); + } +} + +static int nvme_start_ctrl(NvmeCtrl *n) +{ + uint64_t cap = ldq_le_p(&n->bar.cap); + uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t aqa = ldl_le_p(&n->bar.aqa); + uint64_t asq = ldq_le_p(&n->bar.asq); + uint64_t acq = ldq_le_p(&n->bar.acq); + uint32_t page_bits = NVME_CC_MPS(cc) + 12; + uint32_t page_size = 1 << page_bits; + + if (unlikely(n->cq[0])) { + trace_pci_nvme_err_startfail_cq(); + return -1; + } + if (unlikely(n->sq[0])) { + trace_pci_nvme_err_startfail_sq(); + return -1; + } + if (unlikely(asq & (page_size - 1))) { + trace_pci_nvme_err_startfail_asq_misaligned(asq); + return -1; + } + if (unlikely(acq & (page_size - 1))) { + trace_pci_nvme_err_startfail_acq_misaligned(acq); + return -1; + } + if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) { + trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc)); + return -1; + } + if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) { + trace_pci_nvme_err_startfail_page_too_small( + NVME_CC_MPS(cc), + NVME_CAP_MPSMIN(cap)); + return -1; + } + if (unlikely(NVME_CC_MPS(cc) > + NVME_CAP_MPSMAX(cap))) { + trace_pci_nvme_err_startfail_page_too_large( + NVME_CC_MPS(cc), + NVME_CAP_MPSMAX(cap)); + return -1; + } + if (unlikely(NVME_CC_IOCQES(cc) < + NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { + trace_pci_nvme_err_startfail_cqent_too_small( + NVME_CC_IOCQES(cc), + NVME_CTRL_CQES_MIN(cap)); + return -1; + } + if (unlikely(NVME_CC_IOCQES(cc) > + NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { + trace_pci_nvme_err_startfail_cqent_too_large( + NVME_CC_IOCQES(cc), + NVME_CTRL_CQES_MAX(cap)); + return -1; + } + if (unlikely(NVME_CC_IOSQES(cc) < + NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { + trace_pci_nvme_err_startfail_sqent_too_small( + NVME_CC_IOSQES(cc), + NVME_CTRL_SQES_MIN(cap)); + return -1; + } + if (unlikely(NVME_CC_IOSQES(cc) > + NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { + trace_pci_nvme_err_startfail_sqent_too_large( + NVME_CC_IOSQES(cc), + NVME_CTRL_SQES_MAX(cap)); + return -1; + } + if (unlikely(!NVME_AQA_ASQS(aqa))) { + trace_pci_nvme_err_startfail_asqent_sz_zero(); + return -1; + } + if (unlikely(!NVME_AQA_ACQS(aqa))) { + trace_pci_nvme_err_startfail_acqent_sz_zero(); + return -1; + } + + n->page_bits = page_bits; + n->page_size = page_size; + n->max_prp_ents = n->page_size / sizeof(uint64_t); + n->cqe_size = 1 << NVME_CC_IOCQES(cc); + n->sqe_size = 1 << NVME_CC_IOSQES(cc); + nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1); + nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1); + + nvme_set_timestamp(n, 0ULL); + + QTAILQ_INIT(&n->aer_queue); + + nvme_select_iocs(n); + + return 0; +} + +static void nvme_cmb_enable_regs(NvmeCtrl *n) +{ + uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc); + uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz); + + NVME_CMBLOC_SET_CDPCILS(cmbloc, 1); + NVME_CMBLOC_SET_CDPMLS(cmbloc, 1); + NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR); + stl_le_p(&n->bar.cmbloc, cmbloc); + + NVME_CMBSZ_SET_SQS(cmbsz, 1); + NVME_CMBSZ_SET_CQS(cmbsz, 0); + NVME_CMBSZ_SET_LISTS(cmbsz, 1); + NVME_CMBSZ_SET_RDS(cmbsz, 1); + NVME_CMBSZ_SET_WDS(cmbsz, 1); + NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */ + NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb); + stl_le_p(&n->bar.cmbsz, cmbsz); +} + +static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, + unsigned size) +{ + uint64_t cap = ldq_le_p(&n->bar.cap); + uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t intms = ldl_le_p(&n->bar.intms); + uint32_t csts = ldl_le_p(&n->bar.csts); + uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts); + + if (unlikely(offset & (sizeof(uint32_t) - 1))) { + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32, + "MMIO write not 32-bit aligned," + " offset=0x%"PRIx64"", offset); + /* should be ignored, fall through for now */ + } + + if (unlikely(size < sizeof(uint32_t))) { + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall, + "MMIO write smaller than 32-bits," + " offset=0x%"PRIx64", size=%u", + offset, size); + /* should be ignored, fall through for now */ + } + + switch (offset) { + case NVME_REG_INTMS: + if (unlikely(msix_enabled(&(n->parent_obj)))) { + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, + "undefined access to interrupt mask set" + " when MSI-X is enabled"); + /* should be ignored, fall through for now */ + } + intms |= data; + stl_le_p(&n->bar.intms, intms); + n->bar.intmc = n->bar.intms; + trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms); + nvme_irq_check(n); + break; + case NVME_REG_INTMC: + if (unlikely(msix_enabled(&(n->parent_obj)))) { + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, + "undefined access to interrupt mask clr" + " when MSI-X is enabled"); + /* should be ignored, fall through for now */ + } + intms &= ~data; + stl_le_p(&n->bar.intms, intms); + n->bar.intmc = n->bar.intms; + trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms); + nvme_irq_check(n); + break; + case NVME_REG_CC: + trace_pci_nvme_mmio_cfg(data & 0xffffffff); + + /* Windows first sends data, then sends enable bit */ + if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) && + !NVME_CC_SHN(data) && !NVME_CC_SHN(cc)) + { + cc = data; + } + + if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) { + cc = data; + + /* flush CC since nvme_start_ctrl() needs the value */ + stl_le_p(&n->bar.cc, cc); + if (unlikely(nvme_start_ctrl(n))) { + trace_pci_nvme_err_startfail(); + csts = NVME_CSTS_FAILED; + } else { + trace_pci_nvme_mmio_start_success(); + csts = NVME_CSTS_READY; + } + } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) { + trace_pci_nvme_mmio_stopped(); + nvme_ctrl_reset(n); + cc = 0; + csts &= ~NVME_CSTS_READY; + } + + if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) { + trace_pci_nvme_mmio_shutdown_set(); + nvme_ctrl_shutdown(n); + cc = data; + csts |= NVME_CSTS_SHST_COMPLETE; + } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) { + trace_pci_nvme_mmio_shutdown_cleared(); + csts &= ~NVME_CSTS_SHST_COMPLETE; + cc = data; + } + + stl_le_p(&n->bar.cc, cc); + stl_le_p(&n->bar.csts, csts); + + break; + case NVME_REG_CSTS: + if (data & (1 << 4)) { + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, + "attempted to W1C CSTS.NSSRO" + " but CAP.NSSRS is zero (not supported)"); + } else if (data != 0) { + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts, + "attempted to set a read only bit" + " of controller status"); + } + break; + case NVME_REG_NSSR: + if (data == 0x4e564d65) { + trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); + } else { + /* The spec says that writes of other values have no effect */ + return; + } + break; + case NVME_REG_AQA: + stl_le_p(&n->bar.aqa, data); + trace_pci_nvme_mmio_aqattr(data & 0xffffffff); + break; + case NVME_REG_ASQ: + stn_le_p(&n->bar.asq, size, data); + trace_pci_nvme_mmio_asqaddr(data); + break; + case NVME_REG_ASQ + 4: + stl_le_p((uint8_t *)&n->bar.asq + 4, data); + trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(&n->bar.asq)); + break; + case NVME_REG_ACQ: + trace_pci_nvme_mmio_acqaddr(data); + stn_le_p(&n->bar.acq, size, data); + break; + case NVME_REG_ACQ + 4: + stl_le_p((uint8_t *)&n->bar.acq + 4, data); + trace_pci_nvme_mmio_acqaddr_hi(data, ldq_le_p(&n->bar.acq)); + break; + case NVME_REG_CMBLOC: + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, + "invalid write to reserved CMBLOC" + " when CMBSZ is zero, ignored"); + return; + case NVME_REG_CMBSZ: + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly, + "invalid write to read only CMBSZ, ignored"); + return; + case NVME_REG_CMBMSC: + if (!NVME_CAP_CMBS(cap)) { + return; + } + + stn_le_p(&n->bar.cmbmsc, size, data); + n->cmb.cmse = false; + + if (NVME_CMBMSC_CRE(data)) { + nvme_cmb_enable_regs(n); + + if (NVME_CMBMSC_CMSE(data)) { + uint64_t cmbmsc = ldq_le_p(&n->bar.cmbmsc); + hwaddr cba = NVME_CMBMSC_CBA(cmbmsc) << CMBMSC_CBA_SHIFT; + if (cba + int128_get64(n->cmb.mem.size) < cba) { + uint32_t cmbsts = ldl_le_p(&n->bar.cmbsts); + NVME_CMBSTS_SET_CBAI(cmbsts, 1); + stl_le_p(&n->bar.cmbsts, cmbsts); + return; + } + + n->cmb.cba = cba; + n->cmb.cmse = true; + } + } else { + n->bar.cmbsz = 0; + n->bar.cmbloc = 0; + } + + return; + case NVME_REG_CMBMSC + 4: + stl_le_p((uint8_t *)&n->bar.cmbmsc + 4, data); + return; + + case NVME_REG_PMRCAP: + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, + "invalid write to PMRCAP register, ignored"); + return; + case NVME_REG_PMRCTL: + if (!NVME_CAP_PMRS(cap)) { + return; + } + + stl_le_p(&n->bar.pmrctl, data); + if (NVME_PMRCTL_EN(data)) { + memory_region_set_enabled(&n->pmr.dev->mr, true); + pmrsts = 0; + } else { + memory_region_set_enabled(&n->pmr.dev->mr, false); + NVME_PMRSTS_SET_NRDY(pmrsts, 1); + n->pmr.cmse = false; + } + stl_le_p(&n->bar.pmrsts, pmrsts); + return; + case NVME_REG_PMRSTS: + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, + "invalid write to PMRSTS register, ignored"); + return; + case NVME_REG_PMREBS: + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, + "invalid write to PMREBS register, ignored"); + return; + case NVME_REG_PMRSWTP: + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, + "invalid write to PMRSWTP register, ignored"); + return; + case NVME_REG_PMRMSCL: + if (!NVME_CAP_PMRS(cap)) { + return; + } + + stl_le_p(&n->bar.pmrmscl, data); + n->pmr.cmse = false; + + if (NVME_PMRMSCL_CMSE(data)) { + uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu); + hwaddr cba = pmrmscu << 32 | + (NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT); + if (cba + int128_get64(n->pmr.dev->mr.size) < cba) { + NVME_PMRSTS_SET_CBAI(pmrsts, 1); + stl_le_p(&n->bar.pmrsts, pmrsts); + return; + } + + n->pmr.cmse = true; + n->pmr.cba = cba; + } + + return; + case NVME_REG_PMRMSCU: + if (!NVME_CAP_PMRS(cap)) { + return; + } + + stl_le_p(&n->bar.pmrmscu, data); + return; + default: + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, + "invalid MMIO write," + " offset=0x%"PRIx64", data=%"PRIx64"", + offset, data); + break; + } +} + +static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + uint8_t *ptr = (uint8_t *)&n->bar; + + trace_pci_nvme_mmio_read(addr, size); + + if (unlikely(addr & (sizeof(uint32_t) - 1))) { + NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32, + "MMIO read not 32-bit aligned," + " offset=0x%"PRIx64"", addr); + /* should RAZ, fall through for now */ + } else if (unlikely(size < sizeof(uint32_t))) { + NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall, + "MMIO read smaller than 32-bits," + " offset=0x%"PRIx64"", addr); + /* should RAZ, fall through for now */ + } + + if (addr > sizeof(n->bar) - size) { + NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, + "MMIO read beyond last register," + " offset=0x%"PRIx64", returning 0", addr); + + return 0; + } + + /* + * When PMRWBM bit 1 is set then read from + * from PMRSTS should ensure prior writes + * made it to persistent media + */ + if (addr == NVME_REG_PMRSTS && + (NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) { + memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); + } + + return ldn_le_p(ptr + addr, size); +} + +static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) +{ + uint32_t qid; + + if (unlikely(addr & ((1 << 2) - 1))) { + NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, + "doorbell write not 32-bit aligned," + " offset=0x%"PRIx64", ignoring", addr); + return; + } + + if (((addr - 0x1000) >> 2) & 1) { + /* Completion queue doorbell write */ + + uint16_t new_head = val & 0xffff; + int start_sqs; + NvmeCQueue *cq; + + qid = (addr - (0x1000 + (1 << 2))) >> 3; + if (unlikely(nvme_check_cqid(n, qid))) { + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq, + "completion queue doorbell write" + " for nonexistent queue," + " sqid=%"PRIu32", ignoring", qid); + + /* + * NVM Express v1.3d, Section 4.1 state: "If host software writes + * an invalid value to the Submission Queue Tail Doorbell or + * Completion Queue Head Doorbell regiter and an Asynchronous Event + * Request command is outstanding, then an asynchronous event is + * posted to the Admin Completion Queue with a status code of + * Invalid Doorbell Write Value." + * + * Also note that the spec includes the "Invalid Doorbell Register" + * status code, but nowhere does it specify when to use it. + * However, it seems reasonable to use it here in a similar + * fashion. + */ + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + + return; + } + + cq = n->cq[qid]; + if (unlikely(new_head >= cq->size)) { + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead, + "completion queue doorbell write value" + " beyond queue size, sqid=%"PRIu32"," + " new_head=%"PRIu16", ignoring", + qid, new_head); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, + NVME_LOG_ERROR_INFO); + } + + return; + } + + trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head); + + start_sqs = nvme_cq_full(cq) ? 1 : 0; + cq->head = new_head; + if (start_sqs) { + NvmeSQueue *sq; + QTAILQ_FOREACH(sq, &cq->sq_list, entry) { + timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } + timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } + + if (cq->tail == cq->head) { + if (cq->irq_enabled) { + n->cq_pending--; + } + + nvme_irq_deassert(n, cq); + } + } else { + /* Submission queue doorbell write */ + + uint16_t new_tail = val & 0xffff; + NvmeSQueue *sq; + + qid = (addr - 0x1000) >> 3; + if (unlikely(nvme_check_sqid(n, qid))) { + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq, + "submission queue doorbell write" + " for nonexistent queue," + " sqid=%"PRIu32", ignoring", qid); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + + return; + } + + sq = n->sq[qid]; + if (unlikely(new_tail >= sq->size)) { + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail, + "submission queue doorbell write value" + " beyond queue size, sqid=%"PRIu32"," + " new_tail=%"PRIu16", ignoring", + qid, new_tail); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, + NVME_LOG_ERROR_INFO); + } + + return; + } + + trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail); + + sq->tail = new_tail; + timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + + trace_pci_nvme_mmio_write(addr, data, size); + + if (addr < sizeof(n->bar)) { + nvme_write_bar(n, addr, data, size); + } else { + nvme_process_db(n, addr, data); + } +} + +static const MemoryRegionOps nvme_mmio_ops = { + .read = nvme_mmio_read, + .write = nvme_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 2, + .max_access_size = 8, + }, +}; + +static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + stn_le_p(&n->cmb.buf[addr], size, data); +} + +static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + return ldn_le_p(&n->cmb.buf[addr], size); +} + +static const MemoryRegionOps nvme_cmb_ops = { + .read = nvme_cmb_read, + .write = nvme_cmb_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +static void nvme_check_constraints(NvmeCtrl *n, Error **errp) +{ + NvmeParams *params = &n->params; + + if (params->num_queues) { + warn_report("num_queues is deprecated; please use max_ioqpairs " + "instead"); + + params->max_ioqpairs = params->num_queues - 1; + } + + if (n->namespace.blkconf.blk && n->subsys) { + error_setg(errp, "subsystem support is unavailable with legacy " + "namespace ('drive' property)"); + return; + } + + if (params->max_ioqpairs < 1 || + params->max_ioqpairs > NVME_MAX_IOQPAIRS) { + error_setg(errp, "max_ioqpairs must be between 1 and %d", + NVME_MAX_IOQPAIRS); + return; + } + + if (params->msix_qsize < 1 || + params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) { + error_setg(errp, "msix_qsize must be between 1 and %d", + PCI_MSIX_FLAGS_QSIZE + 1); + return; + } + + if (!params->serial) { + error_setg(errp, "serial property not set"); + return; + } + + if (n->pmr.dev) { + if (host_memory_backend_is_mapped(n->pmr.dev)) { + error_setg(errp, "can't use already busy memdev: %s", + object_get_canonical_path_component(OBJECT(n->pmr.dev))); + return; + } + + if (!is_power_of_2(n->pmr.dev->size)) { + error_setg(errp, "pmr backend size needs to be power of 2 in size"); + return; + } + + host_memory_backend_set_mapped(n->pmr.dev, true); + } + + if (n->params.zasl > n->params.mdts) { + error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less " + "than or equal to mdts (Maximum Data Transfer Size)"); + return; + } + + if (!n->params.vsl) { + error_setg(errp, "vsl must be non-zero"); + return; + } +} + +static void nvme_init_state(NvmeCtrl *n) +{ + /* add one to max_ioqpairs to account for the admin queue pair */ + n->reg_size = pow2ceil(sizeof(NvmeBar) + + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); + n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); + n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); + n->temperature = NVME_TEMPERATURE; + n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING; + n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); +} + +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) +{ + uint64_t cmb_size = n->params.cmb_size_mb * MiB; + uint64_t cap = ldq_le_p(&n->bar.cap); + + n->cmb.buf = g_malloc0(cmb_size); + memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", cmb_size); + pci_register_bar(pci_dev, NVME_CMB_BIR, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem); + + NVME_CAP_SET_CMBS(cap, 1); + stq_le_p(&n->bar.cap, cap); + + if (n->params.legacy_cmb) { + nvme_cmb_enable_regs(n); + n->cmb.cmse = true; + } +} + +static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) +{ + uint32_t pmrcap = ldl_le_p(&n->bar.pmrcap); + + NVME_PMRCAP_SET_RDS(pmrcap, 1); + NVME_PMRCAP_SET_WDS(pmrcap, 1); + NVME_PMRCAP_SET_BIR(pmrcap, NVME_PMR_BIR); + /* Turn on bit 1 support */ + NVME_PMRCAP_SET_PMRWBM(pmrcap, 0x02); + NVME_PMRCAP_SET_CMSS(pmrcap, 1); + stl_le_p(&n->bar.pmrcap, pmrcap); + + pci_register_bar(pci_dev, NVME_PMR_BIR, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr); + + memory_region_set_enabled(&n->pmr.dev->mr, false); +} + +static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) +{ + uint8_t *pci_conf = pci_dev->config; + uint64_t bar_size, msix_table_size, msix_pba_size; + unsigned msix_table_offset, msix_pba_offset; + int ret; + + Error *err = NULL; + + pci_conf[PCI_INTERRUPT_PIN] = 1; + pci_config_set_prog_interface(pci_conf, 0x2); + + if (n->params.use_intel_id) { + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); + pci_config_set_device_id(pci_conf, 0x5845); + } else { + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT); + pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME); + } + + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); + pcie_endpoint_cap_init(pci_dev, 0x80); + + bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB); + msix_table_offset = bar_size; + msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize; + + bar_size += msix_table_size; + bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); + msix_pba_offset = bar_size; + msix_pba_size = QEMU_ALIGN_UP(n->params.msix_qsize, 64) / 8; + + bar_size += msix_pba_size; + bar_size = pow2ceil(bar_size); + + memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + n->reg_size); + memory_region_add_subregion(&n->bar0, 0, &n->iomem); + + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + ret = msix_init(pci_dev, n->params.msix_qsize, + &n->bar0, 0, msix_table_offset, + &n->bar0, 0, msix_pba_offset, 0, &err); + if (ret < 0) { + if (ret == -ENOTSUP) { + warn_report_err(err); + } else { + error_propagate(errp, err); + return ret; + } + } + + if (n->params.cmb_size_mb) { + nvme_init_cmb(n, pci_dev); + } + + if (n->pmr.dev) { + nvme_init_pmr(n, pci_dev); + } + + return 0; +} + +static void nvme_init_subnqn(NvmeCtrl *n) +{ + NvmeSubsystem *subsys = n->subsys; + NvmeIdCtrl *id = &n->id_ctrl; + + if (!subsys) { + snprintf((char *)id->subnqn, sizeof(id->subnqn), + "nqn.2019-08.org.qemu:%s", n->params.serial); + } else { + pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn); + } +} + +static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) +{ + NvmeIdCtrl *id = &n->id_ctrl; + uint8_t *pci_conf = pci_dev->config; + uint64_t cap = ldq_le_p(&n->bar.cap); + + id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); + id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); + strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); + strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); + + id->cntlid = cpu_to_le16(n->cntlid); + + id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); + + id->rab = 6; + + if (n->params.use_intel_id) { + id->ieee[0] = 0xb3; + id->ieee[1] = 0x02; + id->ieee[2] = 0x00; + } else { + id->ieee[0] = 0x00; + id->ieee[1] = 0x54; + id->ieee[2] = 0x52; + } + + id->mdts = n->params.mdts; + id->ver = cpu_to_le32(NVME_SPEC_VER); + id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT); + id->cntrltype = 0x1; + + /* + * Because the controller always completes the Abort command immediately, + * there can never be more than one concurrently executing Abort command, + * so this value is never used for anything. Note that there can easily be + * many Abort commands in the queues, but they are not considered + * "executing" until processed by nvme_abort. + * + * The specification recommends a value of 3 for Abort Command Limit (four + * concurrently outstanding Abort commands), so lets use that though it is + * inconsequential. + */ + id->acl = 3; + id->aerl = n->params.aerl; + id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO; + id->lpa = NVME_LPA_NS_SMART | NVME_LPA_CSE | NVME_LPA_EXTENDED; + + /* recommended default value (~70 C) */ + id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING); + id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL); + + id->sqes = (0x6 << 4) | 0x6; + id->cqes = (0x4 << 4) | 0x4; + id->nn = cpu_to_le32(NVME_MAX_NAMESPACES); + id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP | + NVME_ONCS_FEATURES | NVME_ONCS_DSM | + NVME_ONCS_COMPARE | NVME_ONCS_COPY); + + /* + * NOTE: If this device ever supports a command set that does NOT use 0x0 + * as a Flush-equivalent operation, support for the broadcast NSID in Flush + * should probably be removed. + * + * See comment in nvme_io_cmd. + */ + id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT; + + id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0); + id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | + NVME_CTRL_SGLS_BITBUCKET); + + nvme_init_subnqn(n); + + id->psd[0].mp = cpu_to_le16(0x9c4); + id->psd[0].enlat = cpu_to_le32(0x10); + id->psd[0].exlat = cpu_to_le32(0x4); + + if (n->subsys) { + id->cmic |= NVME_CMIC_MULTI_CTRL; + } + + NVME_CAP_SET_MQES(cap, 0x7ff); + NVME_CAP_SET_CQR(cap, 1); + NVME_CAP_SET_TO(cap, 0xf); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY); + NVME_CAP_SET_MPSMAX(cap, 4); + NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0); + NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0); + stq_le_p(&n->bar.cap, cap); + + stl_le_p(&n->bar.vs, NVME_SPEC_VER); + n->bar.intmc = n->bar.intms = 0; +} + +static int nvme_init_subsys(NvmeCtrl *n, Error **errp) +{ + int cntlid; + + if (!n->subsys) { + return 0; + } + + cntlid = nvme_subsys_register_ctrl(n, errp); + if (cntlid < 0) { + return -1; + } + + n->cntlid = cntlid; + + return 0; +} + +void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ + uint32_t nsid = ns->params.nsid; + assert(nsid && nsid <= NVME_MAX_NAMESPACES); + + n->namespaces[nsid] = ns; + ns->attached++; + + n->dmrsl = MIN_NON_ZERO(n->dmrsl, + BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); +} + +static void nvme_realize(PCIDevice *pci_dev, Error **errp) +{ + NvmeCtrl *n = NVME(pci_dev); + NvmeNamespace *ns; + Error *local_err = NULL; + + nvme_check_constraints(n, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, + &pci_dev->qdev, n->parent_obj.qdev.id); + + nvme_init_state(n); + if (nvme_init_pci(n, pci_dev, errp)) { + return; + } + + if (nvme_init_subsys(n, errp)) { + error_propagate(errp, local_err); + return; + } + nvme_init_ctrl(n, pci_dev); + + /* setup a namespace if the controller drive property was given */ + if (n->namespace.blkconf.blk) { + ns = &n->namespace; + ns->params.nsid = 1; + + if (nvme_ns_setup(ns, errp)) { + return; + } + + nvme_attach_ns(n, ns); + } +} + +static void nvme_exit(PCIDevice *pci_dev) +{ + NvmeCtrl *n = NVME(pci_dev); + NvmeNamespace *ns; + int i; + + nvme_ctrl_reset(n); + + if (n->subsys) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + ns->attached--; + } + } + + nvme_subsys_unregister_ctrl(n->subsys, n); + } + + g_free(n->cq); + g_free(n->sq); + g_free(n->aer_reqs); + + if (n->params.cmb_size_mb) { + g_free(n->cmb.buf); + } + + if (n->pmr.dev) { + host_memory_backend_set_mapped(n->pmr.dev, false); + } + msix_uninit(pci_dev, &n->bar0, &n->bar0); + memory_region_del_subregion(&n->bar0, &n->iomem); +} + +static Property nvme_props[] = { + DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf), + DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), + DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS, + NvmeSubsystem *), + DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial), + DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0), + DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0), + DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64), + DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65), + DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3), + DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64), + DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7), + DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7), + DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false), + DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false), + DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), + DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, + params.auto_transition_zones, true), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NvmeCtrl *n = NVME(obj); + uint8_t value = n->smart_critical_warning; + + visit_type_uint8(v, name, &value, errp); +} + +static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NvmeCtrl *n = NVME(obj); + uint8_t value, old_value, cap = 0, index, event; + + if (!visit_type_uint8(v, name, &value, errp)) { + return; + } + + cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY + | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA; + if (NVME_CAP_PMRS(ldq_le_p(&n->bar.cap))) { + cap |= NVME_SMART_PMR_UNRELIABLE; + } + + if ((value & cap) != value) { + error_setg(errp, "unsupported smart critical warning bits: 0x%x", + value & ~cap); + return; + } + + old_value = n->smart_critical_warning; + n->smart_critical_warning = value; + + /* only inject new bits of smart critical warning */ + for (index = 0; index < NVME_SMART_WARN_MAX; index++) { + event = 1 << index; + if (value & ~old_value & event) + nvme_smart_event(n, event); + } +} + +static const VMStateDescription nvme_vmstate = { + .name = "nvme", + .unmigratable = 1, +}; + +static void nvme_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + + pc->realize = nvme_realize; + pc->exit = nvme_exit; + pc->class_id = PCI_CLASS_STORAGE_EXPRESS; + pc->revision = 2; + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "Non-Volatile Memory Express"; + device_class_set_props(dc, nvme_props); + dc->vmsd = &nvme_vmstate; +} + +static void nvme_instance_init(Object *obj) +{ + NvmeCtrl *n = NVME(obj); + + device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex, + "bootindex", "/namespace@1,0", + DEVICE(obj)); + + object_property_add(obj, "smart_critical_warning", "uint8", + nvme_get_smart_warning, + nvme_set_smart_warning, NULL, NULL); +} + +static const TypeInfo nvme_info = { + .name = TYPE_NVME, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(NvmeCtrl), + .instance_init = nvme_instance_init, + .class_init = nvme_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { } + }, +}; + +static const TypeInfo nvme_bus_info = { + .name = TYPE_NVME_BUS, + .parent = TYPE_BUS, + .instance_size = sizeof(NvmeBus), +}; + +static void nvme_register_types(void) +{ + type_register_static(&nvme_info); + type_register_static(&nvme_bus_info); +} + +type_init(nvme_register_types) diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c new file mode 100644 index 00000000000..5dbd18b2a4a --- /dev/null +++ b/hw/nvme/dif.c @@ -0,0 +1,509 @@ +/* + * QEMU NVM Express End-to-End Data Protection support + * + * Copyright (c) 2021 Samsung Electronics Co., Ltd. + * + * Authors: + * Klaus Jensen + * Gollu Appalanaidu + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "sysemu/block-backend.h" + +#include "nvme.h" +#include "trace.h" + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, + uint32_t reftag) +{ + if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && + (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + return NVME_SUCCESS; +} + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, + size_t len) +{ + unsigned int i; + + for (i = 0; i < len; i++) { + crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + } + + return crc; +} + +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint32_t *reftag) +{ + uint8_t *end = buf + len; + int16_t pil = 0; + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + } + + trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, + apptag, *reftag); + + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); + + if (pil) { + crc = crc_t10dif(crc, mbuf, pil); + } + + dif->guard = cpu_to_be16(crc); + dif->apptag = cpu_to_be16(apptag); + dif->reftag = cpu_to_be32(*reftag); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { + (*reftag)++; + } + } +} + +static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint8_t prinfo, uint16_t apptag, + uint16_t appmask, uint32_t reftag) +{ + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_3: + if (be32_to_cpu(dif->reftag) != 0xffffffff) { + break; + } + + /* fallthrough */ + case NVME_ID_NS_DPS_TYPE_1: + case NVME_ID_NS_DPS_TYPE_2: + if (be16_to_cpu(dif->apptag) != 0xffff) { + break; + } + + trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag), + be32_to_cpu(dif->reftag)); + + return NVME_SUCCESS; + } + + if (prinfo & NVME_PRINFO_PRCHK_GUARD) { + uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); + + if (pil) { + crc = crc_t10dif(crc, mbuf, pil); + } + + trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc); + + if (be16_to_cpu(dif->guard) != crc) { + return NVME_E2E_GUARD_ERROR; + } + } + + if (prinfo & NVME_PRINFO_PRCHK_APP) { + trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, + appmask); + + if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { + return NVME_E2E_APP_ERROR; + } + } + + if (prinfo & NVME_PRINFO_PRCHK_REF) { + trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); + + if (be32_to_cpu(dif->reftag) != reftag) { + return NVME_E2E_REF_ERROR; + } + } + + return NVME_SUCCESS; +} + +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint8_t prinfo, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint32_t *reftag) +{ + uint8_t *end = buf + len; + int16_t pil = 0; + uint16_t status; + + status = nvme_check_prinfo(ns, prinfo, slba, *reftag); + if (status) { + return status; + } + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + } + + trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil); + + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + + status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag, + appmask, *reftag); + if (status) { + return status; + } + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { + (*reftag)++; + } + } + + return NVME_SUCCESS; +} + +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba) +{ + BlockBackend *blk = ns->blkconf.blk; + BlockDriverState *bs = blk_bs(blk); + + int64_t moffset = 0, offset = nvme_l2b(ns, slba); + uint8_t *mbufp, *end; + bool zeroed; + int16_t pil = 0; + int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds; + int64_t pnum = 0; + + Error *err = NULL; + + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + } + + do { + int ret; + + bytes -= pnum; + + ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); + if (ret < 0) { + error_setg_errno(&err, -ret, "unable to get block status"); + error_report_err(err); + + return NVME_INTERNAL_DEV_ERROR; + } + + zeroed = !!(ret & BDRV_BLOCK_ZERO); + + trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed); + + if (zeroed) { + mbufp = mbuf + moffset; + mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms; + end = mbufp + mlen; + + for (; mbufp < end; mbufp += ns->lbaf.ms) { + memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); + } + } + + moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms; + offset += pnum; + } while (pnum != bytes); + + return NVME_SUCCESS; +} + +static void nvme_dif_rw_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk)); + + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + nvme_rw_complete_cb(req, ret); +} + +static void nvme_dif_rw_check_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeCtrl *n = nvme_ctrl(req); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + uint16_t status; + + trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask, + reftag); + + if (ret) { + goto out; + } + + status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size, + slba); + if (status) { + req->status = status; + goto out; + } + + status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, + slba, apptag, appmask, &reftag); + if (status) { + req->status = status; + goto out; + } + + status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, + NVME_TX_DIRECTION_FROM_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + + if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) { + goto out; + } + + status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, + NVME_TX_DIRECTION_FROM_DEVICE, req); + if (status) { + req->status = status; + } + +out: + nvme_dif_rw_cb(ctx, ret); +} + +static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint64_t offset = nvme_moff(ns, slba); + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_reset(&ctx->mdata.iov); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, + nvme_dif_rw_check_cb, ctx); + return; + +out: + nvme_dif_rw_cb(ctx, ret); +} + +static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint64_t offset = nvme_moff(ns, slba); + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0, + nvme_dif_rw_cb, ctx); + return; + +out: + nvme_dif_rw_cb(ctx, ret); +} + +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES; + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + uint64_t slba = le64_to_cpu(rw->slba); + size_t len = nvme_l2b(ns, nlb); + size_t mlen = nvme_m2b(ns, nlb); + size_t mapped_len = len; + int64_t offset = nvme_l2b(ns, slba); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + bool pract = !!(prinfo & NVME_PRINFO_PRACT); + NvmeBounceContext *ctx; + uint16_t status; + + trace_pci_nvme_dif_rw(pract, prinfo); + + ctx = g_new0(NvmeBounceContext, 1); + ctx->req = req; + + if (wrz) { + BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP; + + if (prinfo & NVME_PRINFO_PRCHK_MASK) { + status = NVME_INVALID_PROT_INFO | NVME_DNR; + goto err; + } + + if (pract) { + uint8_t *mbuf, *end; + int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + + status = nvme_check_prinfo(ns, prinfo, slba, reftag); + if (status) { + goto err; + } + + flags = 0; + + ctx->mdata.bounce = g_malloc0(mlen); + + qemu_iovec_init(&ctx->mdata.iov, 1); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + mbuf = ctx->mdata.bounce; + end = mbuf + mlen; + + if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) { + pil = 0; + } + + for (; mbuf < end; mbuf += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + + dif->apptag = cpu_to_be16(apptag); + dif->reftag = cpu_to_be32(reftag); + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_1: + case NVME_ID_NS_DPS_TYPE_2: + reftag++; + } + } + } + + req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags, + nvme_dif_rw_mdata_out_cb, ctx); + return NVME_NO_COMPLETE; + } + + if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) { + mapped_len += mlen; + } + + status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd); + if (status) { + goto err; + } + + ctx->data.bounce = g_malloc(len); + + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); + + if (req->cmd.opcode == NVME_CMD_READ) { + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_READ); + + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_dif_rw_mdata_in_cb, ctx); + return NVME_NO_COMPLETE; + } + + status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + goto err; + } + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_init(&ctx->mdata.iov, 1); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + if (!(pract && ns->lbaf.ms == 8)) { + status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + goto err; + } + } + + status = nvme_check_prinfo(ns, prinfo, slba, reftag); + if (status) { + goto err; + } + + if (pract) { + /* splice generated protection information into the buffer */ + nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, + apptag, &reftag); + } else { + status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, + slba, apptag, appmask, &reftag); + if (status) { + goto err; + } + } + + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_WRITE); + + req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_dif_rw_mdata_out_cb, ctx); + + return NVME_NO_COMPLETE; + +err: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + return status; +} diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build new file mode 100644 index 00000000000..3cf40046eea --- /dev/null +++ b/hw/nvme/meson.build @@ -0,0 +1 @@ +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c new file mode 100644 index 00000000000..8b5f98c7618 --- /dev/null +++ b/hw/nvme/ns.c @@ -0,0 +1,595 @@ +/* + * QEMU NVM Express Virtual Namespace + * + * Copyright (c) 2019 CNEX Labs + * Copyright (c) 2020 Samsung Electronics + * + * Authors: + * Klaus Jensen + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "sysemu/sysemu.h" +#include "sysemu/block-backend.h" + +#include "nvme.h" +#include "trace.h" + +#define MIN_DISCARD_GRANULARITY (4 * KiB) +#define NVME_DEFAULT_ZONE_SIZE (128 * MiB) + +void nvme_ns_init_format(NvmeNamespace *ns) +{ + NvmeIdNs *id_ns = &ns->id_ns; + BlockDriverInfo bdi; + int npdg, nlbas, ret; + + ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; + ns->lbasz = 1 << ns->lbaf.ds; + + nlbas = ns->size / (ns->lbasz + ns->lbaf.ms); + + id_ns->nsze = cpu_to_le64(nlbas); + + /* no thin provisioning */ + id_ns->ncap = id_ns->nsze; + id_ns->nuse = id_ns->ncap; + + ns->moff = (int64_t)nlbas << ns->lbaf.ds; + + npdg = ns->blkconf.discard_granularity / ns->lbasz; + + ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); + if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { + npdg = bdi.cluster_size / ns->lbasz; + } + + id_ns->npda = id_ns->npdg = npdg - 1; +} + +static int nvme_ns_init(NvmeNamespace *ns, Error **errp) +{ + static uint64_t ns_count; + NvmeIdNs *id_ns = &ns->id_ns; + uint8_t ds; + uint16_t ms; + int i; + + ns->csi = NVME_CSI_NVM; + ns->status = 0x0; + + ns->id_ns.dlfeat = 0x1; + + /* support DULBE and I/O optimization fields */ + id_ns->nsfeat |= (0x4 | 0x10); + + if (ns->params.shared) { + id_ns->nmic |= NVME_NMIC_NS_SHARED; + } + + /* Substitute a missing EUI-64 by an autogenerated one */ + ++ns_count; + if (!ns->params.eui64 && ns->params.eui64_default) { + ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT; + } + + /* simple copy */ + id_ns->mssrl = cpu_to_le16(ns->params.mssrl); + id_ns->mcl = cpu_to_le32(ns->params.mcl); + id_ns->msrc = ns->params.msrc; + id_ns->eui64 = cpu_to_be64(ns->params.eui64); + + ds = 31 - clz32(ns->blkconf.logical_block_size); + ms = ns->params.ms; + + id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE; + + if (ms && ns->params.mset) { + id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED; + } + + id_ns->dpc = 0x1f; + id_ns->dps = ns->params.pi; + if (ns->params.pi && ns->params.pil) { + id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT; + } + + static const NvmeLBAF lbaf[16] = { + [0] = { .ds = 9 }, + [1] = { .ds = 9, .ms = 8 }, + [2] = { .ds = 9, .ms = 16 }, + [3] = { .ds = 9, .ms = 64 }, + [4] = { .ds = 12 }, + [5] = { .ds = 12, .ms = 8 }, + [6] = { .ds = 12, .ms = 16 }, + [7] = { .ds = 12, .ms = 64 }, + }; + + memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); + id_ns->nlbaf = 7; + + for (i = 0; i <= id_ns->nlbaf; i++) { + NvmeLBAF *lbaf = &id_ns->lbaf[i]; + if (lbaf->ds == ds) { + if (lbaf->ms == ms) { + id_ns->flbas |= i; + goto lbaf_found; + } + } + } + + /* add non-standard lba format */ + id_ns->nlbaf++; + id_ns->lbaf[id_ns->nlbaf].ds = ds; + id_ns->lbaf[id_ns->nlbaf].ms = ms; + id_ns->flbas |= id_ns->nlbaf; + +lbaf_found: + nvme_ns_init_format(ns); + + return 0; +} + +static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) +{ + bool read_only; + + if (!blkconf_blocksizes(&ns->blkconf, errp)) { + return -1; + } + + read_only = !blk_supports_write_perm(ns->blkconf.blk); + if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) { + return -1; + } + + if (ns->blkconf.discard_granularity == -1) { + ns->blkconf.discard_granularity = + MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY); + } + + ns->size = blk_getlength(ns->blkconf.blk); + if (ns->size < 0) { + error_setg_errno(errp, -ns->size, "could not get blockdev size"); + return -1; + } + + return 0; +} + +static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) +{ + uint64_t zone_size, zone_cap; + + /* Make sure that the values of ZNS properties are sane */ + if (ns->params.zone_size_bs) { + zone_size = ns->params.zone_size_bs; + } else { + zone_size = NVME_DEFAULT_ZONE_SIZE; + } + if (ns->params.zone_cap_bs) { + zone_cap = ns->params.zone_cap_bs; + } else { + zone_cap = zone_size; + } + if (zone_cap > zone_size) { + error_setg(errp, "zone capacity %"PRIu64"B exceeds " + "zone size %"PRIu64"B", zone_cap, zone_size); + return -1; + } + if (zone_size < ns->lbasz) { + error_setg(errp, "zone size %"PRIu64"B too small, " + "must be at least %zuB", zone_size, ns->lbasz); + return -1; + } + if (zone_cap < ns->lbasz) { + error_setg(errp, "zone capacity %"PRIu64"B too small, " + "must be at least %zuB", zone_cap, ns->lbasz); + return -1; + } + + /* + * Save the main zone geometry values to avoid + * calculating them later again. + */ + ns->zone_size = zone_size / ns->lbasz; + ns->zone_capacity = zone_cap / ns->lbasz; + ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size; + + /* Do a few more sanity checks of ZNS properties */ + if (!ns->num_zones) { + error_setg(errp, + "insufficient drive capacity, must be at least the size " + "of one zone (%"PRIu64"B)", zone_size); + return -1; + } + + return 0; +} + +static void nvme_ns_zoned_init_state(NvmeNamespace *ns) +{ + uint64_t start = 0, zone_size = ns->zone_size; + uint64_t capacity = ns->num_zones * zone_size; + NvmeZone *zone; + int i; + + ns->zone_array = g_new0(NvmeZone, ns->num_zones); + if (ns->params.zd_extension_size) { + ns->zd_extensions = g_malloc0(ns->params.zd_extension_size * + ns->num_zones); + } + + QTAILQ_INIT(&ns->exp_open_zones); + QTAILQ_INIT(&ns->imp_open_zones); + QTAILQ_INIT(&ns->closed_zones); + QTAILQ_INIT(&ns->full_zones); + + zone = ns->zone_array; + for (i = 0; i < ns->num_zones; i++, zone++) { + if (start + zone_size > capacity) { + zone_size = capacity - start; + } + zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE; + nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); + zone->d.za = 0; + zone->d.zcap = ns->zone_capacity; + zone->d.zslba = start; + zone->d.wp = start; + zone->w_ptr = start; + start += zone_size; + } + + ns->zone_size_log2 = 0; + if (is_power_of_2(ns->zone_size)) { + ns->zone_size_log2 = 63 - clz64(ns->zone_size); + } +} + +static void nvme_ns_init_zoned(NvmeNamespace *ns) +{ + NvmeIdNsZoned *id_ns_z; + int i; + + nvme_ns_zoned_init_state(ns); + + id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); + + /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */ + id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); + id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); + id_ns_z->zoc = 0; + id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00; + + for (i = 0; i <= ns->id_ns.nlbaf; i++) { + id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size); + id_ns_z->lbafe[i].zdes = + ns->params.zd_extension_size >> 6; /* Units of 64B */ + } + + ns->csi = NVME_CSI_ZONED; + ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); + ns->id_ns.ncap = ns->id_ns.nsze; + ns->id_ns.nuse = ns->id_ns.ncap; + + /* + * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated" + * status of logical blocks. Since the spec defines that logical blocks + * SHALL be deallocated when then zone is in the Empty or Offline states, + * we can only support DULBE if the zone size is a multiple of the + * calculated NPDG. + */ + if (ns->zone_size % (ns->id_ns.npdg + 1)) { + warn_report("the zone size (%"PRIu64" blocks) is not a multiple of " + "the calculated deallocation granularity (%d blocks); " + "DULBE support disabled", + ns->zone_size, ns->id_ns.npdg + 1); + + ns->id_ns.nsfeat &= ~0x4; + } + + ns->id_ns_zoned = id_ns_z; +} + +static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone) +{ + uint8_t state; + + zone->w_ptr = zone->d.wp; + state = nvme_get_zone_state(zone); + if (zone->d.wp != zone->d.zslba || + (zone->d.za & NVME_ZA_ZD_EXT_VALID)) { + if (state != NVME_ZONE_STATE_CLOSED) { + trace_pci_nvme_clear_ns_close(state, zone->d.zslba); + nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED); + } + nvme_aor_inc_active(ns); + QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry); + } else { + trace_pci_nvme_clear_ns_reset(state, zone->d.zslba); + nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); + } +} + +/* + * Close all the zones that are currently open. + */ +static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) +{ + NvmeZone *zone, *next; + + QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { + QTAILQ_REMOVE(&ns->closed_zones, zone, entry); + nvme_aor_dec_active(ns); + nvme_clear_zone(ns, zone); + } + QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { + QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); + nvme_aor_dec_open(ns); + nvme_aor_dec_active(ns); + nvme_clear_zone(ns, zone); + } + QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { + QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); + nvme_aor_dec_open(ns); + nvme_aor_dec_active(ns); + nvme_clear_zone(ns, zone); + } + + assert(ns->nr_open_zones == 0); +} + +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) +{ + if (!ns->blkconf.blk) { + error_setg(errp, "block backend not configured"); + return -1; + } + + if (ns->params.pi && ns->params.ms < 8) { + error_setg(errp, "at least 8 bytes of metadata required to enable " + "protection information"); + return -1; + } + + if (ns->params.nsid > NVME_MAX_NAMESPACES) { + error_setg(errp, "invalid namespace id (must be between 0 and %d)", + NVME_MAX_NAMESPACES); + return -1; + } + + if (ns->params.zoned) { + if (ns->params.max_active_zones) { + if (ns->params.max_open_zones > ns->params.max_active_zones) { + error_setg(errp, "max_open_zones (%u) exceeds " + "max_active_zones (%u)", ns->params.max_open_zones, + ns->params.max_active_zones); + return -1; + } + + if (!ns->params.max_open_zones) { + ns->params.max_open_zones = ns->params.max_active_zones; + } + } + + if (ns->params.zd_extension_size) { + if (ns->params.zd_extension_size & 0x3f) { + error_setg(errp, "zone descriptor extension size must be a " + "multiple of 64B"); + return -1; + } + if ((ns->params.zd_extension_size >> 6) > 0xff) { + error_setg(errp, + "zone descriptor extension size is too large"); + return -1; + } + } + } + + return 0; +} + +int nvme_ns_setup(NvmeNamespace *ns, Error **errp) +{ + if (nvme_ns_check_constraints(ns, errp)) { + return -1; + } + + if (nvme_ns_init_blk(ns, errp)) { + return -1; + } + + if (nvme_ns_init(ns, errp)) { + return -1; + } + if (ns->params.zoned) { + if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) { + return -1; + } + nvme_ns_init_zoned(ns); + } + + return 0; +} + +void nvme_ns_drain(NvmeNamespace *ns) +{ + blk_drain(ns->blkconf.blk); +} + +void nvme_ns_shutdown(NvmeNamespace *ns) +{ + blk_flush(ns->blkconf.blk); + if (ns->params.zoned) { + nvme_zoned_ns_shutdown(ns); + } +} + +void nvme_ns_cleanup(NvmeNamespace *ns) +{ + if (ns->params.zoned) { + g_free(ns->id_ns_zoned); + g_free(ns->zone_array); + g_free(ns->zd_extensions); + } +} + +static void nvme_ns_unrealize(DeviceState *dev) +{ + NvmeNamespace *ns = NVME_NS(dev); + + nvme_ns_drain(ns); + nvme_ns_shutdown(ns); + nvme_ns_cleanup(ns); +} + +static void nvme_ns_realize(DeviceState *dev, Error **errp) +{ + NvmeNamespace *ns = NVME_NS(dev); + BusState *s = qdev_get_parent_bus(dev); + NvmeCtrl *n = NVME(s->parent); + NvmeSubsystem *subsys = n->subsys; + uint32_t nsid = ns->params.nsid; + int i; + + if (!n->subsys) { + if (ns->params.detached) { + error_setg(errp, "detached requires that the nvme device is " + "linked to an nvme-subsys device"); + return; + } + } else { + /* + * If this namespace belongs to a subsystem (through a link on the + * controller device), reparent the device. + */ + if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { + return; + } + } + + if (nvme_ns_setup(ns, errp)) { + return; + } + + if (!nsid) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) { + continue; + } + + nsid = ns->params.nsid = i; + break; + } + + if (!nsid) { + error_setg(errp, "no free namespace id"); + return; + } + } else { + if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) { + error_setg(errp, "namespace id '%d' already allocated", nsid); + return; + } + } + + if (subsys) { + subsys->namespaces[nsid] = ns; + + if (ns->params.detached) { + return; + } + + if (ns->params.shared) { + for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { + NvmeCtrl *ctrl = subsys->ctrls[i]; + + if (ctrl) { + nvme_attach_ns(ctrl, ns); + } + } + + return; + } + } + + nvme_attach_ns(n, ns); +} + +static Property nvme_ns_props[] = { + DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), + DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), + DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), + DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), + DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), + DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), + DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), + DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), + DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), + DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), + DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), + DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), + DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), + DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false), + DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs, + NVME_DEFAULT_ZONE_SIZE), + DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs, + 0), + DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace, + params.cross_zone_read, false), + DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace, + params.max_active_zones, 0), + DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace, + params.max_open_zones, 0), + DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, + params.zd_extension_size, 0), + DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, + true), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nvme_ns_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + + dc->bus_type = TYPE_NVME_BUS; + dc->realize = nvme_ns_realize; + dc->unrealize = nvme_ns_unrealize; + device_class_set_props(dc, nvme_ns_props); + dc->desc = "Virtual NVMe namespace"; +} + +static void nvme_ns_instance_init(Object *obj) +{ + NvmeNamespace *ns = NVME_NS(obj); + char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid); + + device_add_bootindex_property(obj, &ns->bootindex, "bootindex", + bootindex, DEVICE(obj)); + + g_free(bootindex); +} + +static const TypeInfo nvme_ns_info = { + .name = TYPE_NVME_NS, + .parent = TYPE_DEVICE, + .class_init = nvme_ns_class_init, + .instance_size = sizeof(NvmeNamespace), + .instance_init = nvme_ns_instance_init, +}; + +static void nvme_ns_register_types(void) +{ + type_register_static(&nvme_ns_info); +} + +type_init(nvme_ns_register_types) diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h new file mode 100644 index 00000000000..83ffabade4c --- /dev/null +++ b/hw/nvme/nvme.h @@ -0,0 +1,556 @@ +/* + * QEMU NVM Express + * + * Copyright (c) 2012 Intel Corporation + * Copyright (c) 2021 Minwoo Im + * Copyright (c) 2021 Samsung Electronics Co., Ltd. + * + * Authors: + * Keith Busch + * Klaus Jensen + * Gollu Appalanaidu + * Dmitry Fomichev + * Minwoo Im + * + * This code is licensed under the GNU GPL v2 or later. + */ + +#ifndef HW_NVME_INTERNAL_H +#define HW_NVME_INTERNAL_H + +#include "qemu/uuid.h" +#include "hw/pci/pci.h" +#include "hw/block/block.h" + +#include "block/nvme.h" + +#define NVME_MAX_CONTROLLERS 32 +#define NVME_MAX_NAMESPACES 256 +#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) + +QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); + +typedef struct NvmeCtrl NvmeCtrl; +typedef struct NvmeNamespace NvmeNamespace; + +#define TYPE_NVME_BUS "nvme-bus" +OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS) + +typedef struct NvmeBus { + BusState parent_bus; +} NvmeBus; + +#define TYPE_NVME_SUBSYS "nvme-subsys" +#define NVME_SUBSYS(obj) \ + OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) + +typedef struct NvmeSubsystem { + DeviceState parent_obj; + NvmeBus bus; + uint8_t subnqn[256]; + + NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + + struct { + char *nqn; + } params; +} NvmeSubsystem; + +int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); +void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n); + +static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, + uint32_t cntlid) +{ + if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { + return NULL; + } + + return subsys->ctrls[cntlid]; +} + +static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, + uint32_t nsid) +{ + if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { + return NULL; + } + + return subsys->namespaces[nsid]; +} + +#define TYPE_NVME_NS "nvme-ns" +#define NVME_NS(obj) \ + OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) + +typedef struct NvmeZone { + NvmeZoneDescr d; + uint64_t w_ptr; + QTAILQ_ENTRY(NvmeZone) entry; +} NvmeZone; + +typedef struct NvmeNamespaceParams { + bool detached; + bool shared; + uint32_t nsid; + QemuUUID uuid; + uint64_t eui64; + bool eui64_default; + + uint16_t ms; + uint8_t mset; + uint8_t pi; + uint8_t pil; + + uint16_t mssrl; + uint32_t mcl; + uint8_t msrc; + + bool zoned; + bool cross_zone_read; + uint64_t zone_size_bs; + uint64_t zone_cap_bs; + uint32_t max_active_zones; + uint32_t max_open_zones; + uint32_t zd_extension_size; +} NvmeNamespaceParams; + +typedef struct NvmeNamespace { + DeviceState parent_obj; + BlockConf blkconf; + int32_t bootindex; + int64_t size; + int64_t moff; + NvmeIdNs id_ns; + NvmeLBAF lbaf; + size_t lbasz; + const uint32_t *iocs; + uint8_t csi; + uint16_t status; + int attached; + + QTAILQ_ENTRY(NvmeNamespace) entry; + + NvmeIdNsZoned *id_ns_zoned; + NvmeZone *zone_array; + QTAILQ_HEAD(, NvmeZone) exp_open_zones; + QTAILQ_HEAD(, NvmeZone) imp_open_zones; + QTAILQ_HEAD(, NvmeZone) closed_zones; + QTAILQ_HEAD(, NvmeZone) full_zones; + uint32_t num_zones; + uint64_t zone_size; + uint64_t zone_capacity; + uint32_t zone_size_log2; + uint8_t *zd_extensions; + int32_t nr_open_zones; + int32_t nr_active_zones; + + NvmeNamespaceParams params; + + struct { + uint32_t err_rec; + } features; +} NvmeNamespace; + +static inline uint32_t nvme_nsid(NvmeNamespace *ns) +{ + if (ns) { + return ns->params.nsid; + } + + return 0; +} + +static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) +{ + return lba << ns->lbaf.ds; +} + +static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) +{ + return ns->lbaf.ms * lba; +} + +static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) +{ + return ns->moff + nvme_m2b(ns, lba); +} + +static inline bool nvme_ns_ext(NvmeNamespace *ns) +{ + return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); +} + +static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) +{ + return zone->d.zs >> 4; +} + +static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) +{ + zone->d.zs = state << 4; +} + +static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) +{ + return zone->d.zslba + ns->zone_size; +} + +static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) +{ + return zone->d.zslba + zone->d.zcap; +} + +static inline bool nvme_wp_is_valid(NvmeZone *zone) +{ + uint8_t st = nvme_get_zone_state(zone); + + return st != NVME_ZONE_STATE_FULL && + st != NVME_ZONE_STATE_READ_ONLY && + st != NVME_ZONE_STATE_OFFLINE; +} + +static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, + uint32_t zone_idx) +{ + return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; +} + +static inline void nvme_aor_inc_open(NvmeNamespace *ns) +{ + assert(ns->nr_open_zones >= 0); + if (ns->params.max_open_zones) { + ns->nr_open_zones++; + assert(ns->nr_open_zones <= ns->params.max_open_zones); + } +} + +static inline void nvme_aor_dec_open(NvmeNamespace *ns) +{ + if (ns->params.max_open_zones) { + assert(ns->nr_open_zones > 0); + ns->nr_open_zones--; + } + assert(ns->nr_open_zones >= 0); +} + +static inline void nvme_aor_inc_active(NvmeNamespace *ns) +{ + assert(ns->nr_active_zones >= 0); + if (ns->params.max_active_zones) { + ns->nr_active_zones++; + assert(ns->nr_active_zones <= ns->params.max_active_zones); + } +} + +static inline void nvme_aor_dec_active(NvmeNamespace *ns) +{ + if (ns->params.max_active_zones) { + assert(ns->nr_active_zones > 0); + ns->nr_active_zones--; + assert(ns->nr_active_zones >= ns->nr_open_zones); + } + assert(ns->nr_active_zones >= 0); +} + +void nvme_ns_init_format(NvmeNamespace *ns); +int nvme_ns_setup(NvmeNamespace *ns, Error **errp); +void nvme_ns_drain(NvmeNamespace *ns); +void nvme_ns_shutdown(NvmeNamespace *ns); +void nvme_ns_cleanup(NvmeNamespace *ns); + +typedef struct NvmeAsyncEvent { + QTAILQ_ENTRY(NvmeAsyncEvent) entry; + NvmeAerResult result; +} NvmeAsyncEvent; + +enum { + NVME_SG_ALLOC = 1 << 0, + NVME_SG_DMA = 1 << 1, +}; + +typedef struct NvmeSg { + int flags; + + union { + QEMUSGList qsg; + QEMUIOVector iov; + }; +} NvmeSg; + +typedef enum NvmeTxDirection { + NVME_TX_DIRECTION_TO_DEVICE = 0, + NVME_TX_DIRECTION_FROM_DEVICE = 1, +} NvmeTxDirection; + +typedef struct NvmeRequest { + struct NvmeSQueue *sq; + struct NvmeNamespace *ns; + BlockAIOCB *aiocb; + uint16_t status; + void *opaque; + NvmeCqe cqe; + NvmeCmd cmd; + BlockAcctCookie acct; + NvmeSg sg; + QTAILQ_ENTRY(NvmeRequest)entry; +} NvmeRequest; + +typedef struct NvmeBounceContext { + NvmeRequest *req; + + struct { + QEMUIOVector iov; + uint8_t *bounce; + } data, mdata; +} NvmeBounceContext; + +static inline const char *nvme_adm_opc_str(uint8_t opc) +{ + switch (opc) { + case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; + case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; + case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; + case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; + case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; + case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; + case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; + case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; + case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; + case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; + case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; + case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; + default: return "NVME_ADM_CMD_UNKNOWN"; + } +} + +static inline const char *nvme_io_opc_str(uint8_t opc) +{ + switch (opc) { + case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; + case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; + case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; + case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; + case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; + case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; + case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; + case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; + case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; + case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; + case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; + default: return "NVME_NVM_CMD_UNKNOWN"; + } +} + +typedef struct NvmeSQueue { + struct NvmeCtrl *ctrl; + uint16_t sqid; + uint16_t cqid; + uint32_t head; + uint32_t tail; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + NvmeRequest *io_req; + QTAILQ_HEAD(, NvmeRequest) req_list; + QTAILQ_HEAD(, NvmeRequest) out_req_list; + QTAILQ_ENTRY(NvmeSQueue) entry; +} NvmeSQueue; + +typedef struct NvmeCQueue { + struct NvmeCtrl *ctrl; + uint8_t phase; + uint16_t cqid; + uint16_t irq_enabled; + uint32_t head; + uint32_t tail; + uint32_t vector; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + QTAILQ_HEAD(, NvmeSQueue) sq_list; + QTAILQ_HEAD(, NvmeRequest) req_list; +} NvmeCQueue; + +#define TYPE_NVME "nvme" +#define NVME(obj) \ + OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) + +typedef struct NvmeParams { + char *serial; + uint32_t num_queues; /* deprecated since 5.1 */ + uint32_t max_ioqpairs; + uint16_t msix_qsize; + uint32_t cmb_size_mb; + uint8_t aerl; + uint32_t aer_max_queued; + uint8_t mdts; + uint8_t vsl; + bool use_intel_id; + uint8_t zasl; + bool auto_transition_zones; + bool legacy_cmb; +} NvmeParams; + +typedef struct NvmeCtrl { + PCIDevice parent_obj; + MemoryRegion bar0; + MemoryRegion iomem; + NvmeBar bar; + NvmeParams params; + NvmeBus bus; + + uint16_t cntlid; + bool qs_created; + uint32_t page_size; + uint16_t page_bits; + uint16_t max_prp_ents; + uint16_t cqe_size; + uint16_t sqe_size; + uint32_t reg_size; + uint32_t max_q_ents; + uint8_t outstanding_aers; + uint32_t irq_status; + int cq_pending; + uint64_t host_timestamp; /* Timestamp sent by the host */ + uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + uint64_t starttime_ms; + uint16_t temperature; + uint8_t smart_critical_warning; + + struct { + MemoryRegion mem; + uint8_t *buf; + bool cmse; + hwaddr cba; + } cmb; + + struct { + HostMemoryBackend *dev; + bool cmse; + hwaddr cba; + } pmr; + + uint8_t aer_mask; + NvmeRequest **aer_reqs; + QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; + int aer_queued; + + uint32_t dmrsl; + + /* Namespace ID is started with 1 so bitmap should be 1-based */ +#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) + DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); + + NvmeSubsystem *subsys; + + NvmeNamespace namespace; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + NvmeSQueue **sq; + NvmeCQueue **cq; + NvmeSQueue admin_sq; + NvmeCQueue admin_cq; + NvmeIdCtrl id_ctrl; + + struct { + struct { + uint16_t temp_thresh_hi; + uint16_t temp_thresh_low; + }; + uint32_t async_config; + } features; +} NvmeCtrl; + +static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) +{ + if (!nsid || nsid > NVME_MAX_NAMESPACES) { + return NULL; + } + + return n->namespaces[nsid]; +} + +static inline NvmeCQueue *nvme_cq(NvmeRequest *req) +{ + NvmeSQueue *sq = req->sq; + NvmeCtrl *n = sq->ctrl; + + return n->cq[sq->cqid]; +} + +static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) +{ + NvmeSQueue *sq = req->sq; + return sq->ctrl; +} + +static inline uint16_t nvme_cid(NvmeRequest *req) +{ + if (!req) { + return 0xffff; + } + + return le16_to_cpu(req->cqe.cid); +} + +void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); +uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +void nvme_rw_complete_cb(void *opaque, int ret); +uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd); + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static const uint16_t t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, + uint32_t reftag); +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba); +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint32_t *reftag); +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint8_t prinfo, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint32_t *reftag); +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); + + +#endif /* HW_NVME_INTERNAL_H */ diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c new file mode 100644 index 00000000000..fb58d639504 --- /dev/null +++ b/hw/nvme/subsys.c @@ -0,0 +1,96 @@ +/* + * QEMU NVM Express Subsystem: nvme-subsys + * + * Copyright (c) 2021 Minwoo Im + * + * This code is licensed under the GNU GPL v2. Refer COPYING. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "nvme.h" + +int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) +{ + NvmeSubsystem *subsys = n->subsys; + int cntlid, nsid; + + for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { + if (!subsys->ctrls[cntlid]) { + break; + } + } + + if (cntlid == ARRAY_SIZE(subsys->ctrls)) { + error_setg(errp, "no more free controller id"); + return -1; + } + + subsys->ctrls[cntlid] = n; + + for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) { + NvmeNamespace *ns = subsys->namespaces[nsid]; + if (ns && ns->params.shared && !ns->params.detached) { + nvme_attach_ns(n, ns); + } + } + + return cntlid; +} + +void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) +{ + subsys->ctrls[n->cntlid] = NULL; + n->cntlid = -1; +} + +static void nvme_subsys_setup(NvmeSubsystem *subsys) +{ + const char *nqn = subsys->params.nqn ? + subsys->params.nqn : subsys->parent_obj.id; + + snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn), + "nqn.2019-08.org.qemu:%s", nqn); +} + +static void nvme_subsys_realize(DeviceState *dev, Error **errp) +{ + NvmeSubsystem *subsys = NVME_SUBSYS(dev); + + qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id); + + nvme_subsys_setup(subsys); +} + +static Property nvme_subsystem_props[] = { + DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nvme_subsys_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + + dc->realize = nvme_subsys_realize; + dc->desc = "Virtual NVMe subsystem"; + dc->hotpluggable = false; + + device_class_set_props(dc, nvme_subsystem_props); +} + +static const TypeInfo nvme_subsys_info = { + .name = TYPE_NVME_SUBSYS, + .parent = TYPE_DEVICE, + .class_init = nvme_subsys_class_init, + .instance_size = sizeof(NvmeSubsystem), +}; + +static void nvme_subsys_register_types(void) +{ + type_register_static(&nvme_subsys_info); +} + +type_init(nvme_subsys_register_types) diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events new file mode 100644 index 00000000000..ff6cafd520d --- /dev/null +++ b/hw/nvme/trace-events @@ -0,0 +1,200 @@ +# successful events +pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" +pci_nvme_irq_pin(void) "pulsing IRQ pin" +pci_nvme_irq_masked(void) "IRQ is masked" +pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" +pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" +pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" +pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" +pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid 0x%"PRIx32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_flush_ns(uint32_t nsid) "nsid 0x%"PRIx32"" +pci_nvme_format_set(uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" +pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" +pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_misc_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" +pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" +pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" +pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" +pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" +pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" +pci_nvme_dsm(uint32_t nr, uint32_t attr) "nr %"PRIu32" attr 0x%"PRIx32"" +pci_nvme_dsm_deallocate(uint64_t slba, uint32_t nlb) "slba %"PRIu64" nlb %"PRIu32"" +pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" +pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" +pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" +pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" +pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" +pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8"" +pci_nvme_identify_ctrl(void) "identify controller" +pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" +pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16"" +pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" +pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" +pci_nvme_identify_cmd_set(void) "identify i/o command set" +pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" +pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32"" +pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32"" +pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" +pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" +pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" +pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +pci_nvme_process_aers(int queued) "queued %d" +pci_nvme_aer(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aer_aerl_exceeded(void) "aerl exceeded" +pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" +pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8"" +pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32"" +pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +pci_nvme_enqueue_event_noqueue(int queued) "queued %d" +pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" +pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" +pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16"" +pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" +pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" +pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" +pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" +pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" +pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" +pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" +pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" +pci_nvme_mmio_stopped(void) "cleared controller enable bit" +pci_nvme_mmio_shutdown_set(void) "shutdown bit set" +pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" +pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_zns_zone_reset(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32"" +pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" +pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" +pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" + +# error conditions +pci_nvme_err_mdts(size_t len) "len %zu" +pci_nvme_err_zasl(size_t len) "len %zu" +pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8"" +pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64"" +pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64"" +pci_nvme_err_cfs(void) "controller fatal status" +pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16"" +pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8"" +pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32"" +pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" +pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" +pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" +pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" +pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64"" +pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" +pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" +pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64"" +pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32"" +pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64"" +pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64"" +pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64"" +pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64"" +pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" +pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" +pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded" +pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded" +pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32"" +pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32"" +pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" +pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" +pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" +pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" +pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" +pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" +pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" +pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u" +pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" +pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" +pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32"" +pci_nvme_err_startfail(void) "setting controller enable bit failed" +pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8"" + +# undefined behavior +pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" +pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" +pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" +pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" +pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" +pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" +pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" +pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" +pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" +pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" +pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" +pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" +pci_nvme_ub_too_many_mappings(void) "too many prp/sgl mappings" diff --git a/hw/nvme/trace.h b/hw/nvme/trace.h new file mode 100644 index 00000000000..b398ea107f5 --- /dev/null +++ b/hw/nvme/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_nvme.h" diff --git a/hw/nvram/Kconfig b/hw/nvram/Kconfig index e872fcb1941..24cfc18f8b1 100644 --- a/hw/nvram/Kconfig +++ b/hw/nvram/Kconfig @@ -15,3 +15,22 @@ config NMC93XX_EEPROM config CHRP_NVRAM bool + +config XLNX_EFUSE_CRC + bool + +config XLNX_EFUSE + bool + select XLNX_EFUSE_CRC + +config XLNX_EFUSE_VERSAL + bool + select XLNX_EFUSE + +config XLNX_EFUSE_ZYNQMP + bool + select XLNX_EFUSE + +config XLNX_BBRAM + bool + select XLNX_EFUSE_CRC diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 9b8dcca4ead..c06b30de112 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -878,6 +878,7 @@ static struct { { "etc/tpm/log", 150 }, { "etc/acpi/rsdp", 160 }, { "bootorder", 170 }, + { "etc/msr_feature_control", 180 }, #define FW_CFG_ORDER_OVERRIDE_LAST 200 }; diff --git a/hw/nvram/meson.build b/hw/nvram/meson.build index fd2951a860f..202a5466e63 100644 --- a/hw/nvram/meson.build +++ b/hw/nvram/meson.build @@ -9,5 +9,13 @@ softmmu_ss.add(when: 'CONFIG_AT24C', if_true: files('eeprom_at24c.c')) softmmu_ss.add(when: 'CONFIG_MAC_NVRAM', if_true: files('mac_nvram.c')) softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_otp.c')) softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_nvm.c')) +softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_CRC', if_true: files('xlnx-efuse-crc.c')) +softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE', if_true: files('xlnx-efuse.c')) +softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_VERSAL', if_true: files( + 'xlnx-versal-efuse-cache.c', + 'xlnx-versal-efuse-ctrl.c')) +softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_ZYNQMP', if_true: files( + 'xlnx-zynqmp-efuse.c')) +softmmu_ss.add(when: 'CONFIG_XLNX_BBRAM', if_true: files('xlnx-bbram.c')) specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_nvram.c')) diff --git a/hw/nvram/nrf51_nvm.c b/hw/nvram/nrf51_nvm.c index 7b3460d52da..7f1db8c4239 100644 --- a/hw/nvram/nrf51_nvm.c +++ b/hw/nvram/nrf51_nvm.c @@ -21,7 +21,6 @@ #include "qapi/error.h" #include "qemu/log.h" #include "qemu/module.h" -#include "exec/address-spaces.h" #include "hw/arm/nrf51.h" #include "hw/nvram/nrf51_nvm.h" #include "hw/qdev-properties.h" diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 01f77520146..fbfdf47e268 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -26,14 +26,12 @@ #include "qemu/module.h" #include "qemu/units.h" #include "qapi/error.h" -#include "cpu.h" #include #include "sysemu/block-backend.h" #include "sysemu/device_tree.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" -#include "hw/sysbus.h" #include "migration/vmstate.h" #include "hw/nvram/chrp_nvram.h" #include "hw/ppc/spapr.h" diff --git a/hw/nvram/trace-events b/hw/nvram/trace-events index e0231932959..5e33b24d47a 100644 --- a/hw/nvram/trace-events +++ b/hw/nvram/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # ds1225y.c nvram_read(uint32_t addr, uint32_t ret) "read addr %d: 0x%02x" diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c new file mode 100644 index 00000000000..b70828e5bf1 --- /dev/null +++ b/hw/nvram/xlnx-bbram.c @@ -0,0 +1,545 @@ +/* + * QEMU model of the Xilinx BBRAM Battery Backed RAM + * + * Copyright (c) 2014-2021 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/nvram/xlnx-bbram.h" + +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "sysemu/blockdev.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/nvram/xlnx-efuse.h" + +#ifndef XLNX_BBRAM_ERR_DEBUG +#define XLNX_BBRAM_ERR_DEBUG 0 +#endif + +REG32(BBRAM_STATUS, 0x0) + FIELD(BBRAM_STATUS, AES_CRC_PASS, 9, 1) + FIELD(BBRAM_STATUS, AES_CRC_DONE, 8, 1) + FIELD(BBRAM_STATUS, BBRAM_ZEROIZED, 4, 1) + FIELD(BBRAM_STATUS, PGM_MODE, 0, 1) +REG32(BBRAM_CTRL, 0x4) + FIELD(BBRAM_CTRL, ZEROIZE, 0, 1) +REG32(PGM_MODE, 0x8) +REG32(BBRAM_AES_CRC, 0xc) +REG32(BBRAM_0, 0x10) +REG32(BBRAM_1, 0x14) +REG32(BBRAM_2, 0x18) +REG32(BBRAM_3, 0x1c) +REG32(BBRAM_4, 0x20) +REG32(BBRAM_5, 0x24) +REG32(BBRAM_6, 0x28) +REG32(BBRAM_7, 0x2c) +REG32(BBRAM_8, 0x30) +REG32(BBRAM_SLVERR, 0x34) + FIELD(BBRAM_SLVERR, ENABLE, 0, 1) +REG32(BBRAM_ISR, 0x38) + FIELD(BBRAM_ISR, APB_SLVERR, 0, 1) +REG32(BBRAM_IMR, 0x3c) + FIELD(BBRAM_IMR, APB_SLVERR, 0, 1) +REG32(BBRAM_IER, 0x40) + FIELD(BBRAM_IER, APB_SLVERR, 0, 1) +REG32(BBRAM_IDR, 0x44) + FIELD(BBRAM_IDR, APB_SLVERR, 0, 1) +REG32(BBRAM_MSW_LOCK, 0x4c) + FIELD(BBRAM_MSW_LOCK, VAL, 0, 1) + +#define R_MAX (R_BBRAM_MSW_LOCK + 1) + +#define RAM_MAX (A_BBRAM_8 + 4 - A_BBRAM_0) + +#define BBRAM_PGM_MAGIC 0x757bdf0d + +QEMU_BUILD_BUG_ON(R_MAX != ARRAY_SIZE(((XlnxBBRam *)0)->regs)); + +static bool bbram_msw_locked(XlnxBBRam *s) +{ + return ARRAY_FIELD_EX32(s->regs, BBRAM_MSW_LOCK, VAL) != 0; +} + +static bool bbram_pgm_enabled(XlnxBBRam *s) +{ + return ARRAY_FIELD_EX32(s->regs, BBRAM_STATUS, PGM_MODE) != 0; +} + +static void bbram_bdrv_error(XlnxBBRam *s, int rc, gchar *detail) +{ + Error *errp; + + error_setg_errno(&errp, -rc, "%s: BBRAM backstore %s failed.", + blk_name(s->blk), detail); + error_report("%s", error_get_pretty(errp)); + error_free(errp); + + g_free(detail); +} + +static void bbram_bdrv_read(XlnxBBRam *s, Error **errp) +{ + uint32_t *ram = &s->regs[R_BBRAM_0]; + int nr = RAM_MAX; + + if (!s->blk) { + return; + } + + s->blk_ro = !blk_supports_write_perm(s->blk); + if (!s->blk_ro) { + int rc; + + rc = blk_set_perm(s->blk, + (BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE), + BLK_PERM_ALL, NULL); + if (rc) { + s->blk_ro = true; + } + } + if (s->blk_ro) { + warn_report("%s: Skip saving updates to read-only BBRAM backstore.", + blk_name(s->blk)); + } + + if (blk_pread(s->blk, 0, ram, nr) < 0) { + error_setg(errp, + "%s: Failed to read %u bytes from BBRAM backstore.", + blk_name(s->blk), nr); + return; + } + + /* Convert from little-endian backstore for each 32-bit word */ + nr /= 4; + while (nr--) { + ram[nr] = le32_to_cpu(ram[nr]); + } +} + +static void bbram_bdrv_sync(XlnxBBRam *s, uint64_t hwaddr) +{ + uint32_t le32; + unsigned offset; + int rc; + + assert(A_BBRAM_0 <= hwaddr && hwaddr <= A_BBRAM_8); + + /* Backstore is always in little-endian */ + le32 = cpu_to_le32(s->regs[hwaddr / 4]); + + /* Update zeroized flag */ + if (le32 && (hwaddr != A_BBRAM_8 || s->bbram8_wo)) { + ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, BBRAM_ZEROIZED, 0); + } + + if (!s->blk || s->blk_ro) { + return; + } + + offset = hwaddr - A_BBRAM_0; + rc = blk_pwrite(s->blk, offset, &le32, 4, 0); + if (rc < 0) { + bbram_bdrv_error(s, rc, g_strdup_printf("write to offset %u", offset)); + } +} + +static void bbram_bdrv_zero(XlnxBBRam *s) +{ + int rc; + + ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, BBRAM_ZEROIZED, 1); + + if (!s->blk || s->blk_ro) { + return; + } + + rc = blk_make_zero(s->blk, 0); + if (rc < 0) { + bbram_bdrv_error(s, rc, g_strdup("zeroizing")); + } + + /* Restore bbram8 if it is non-zero */ + if (s->regs[R_BBRAM_8]) { + bbram_bdrv_sync(s, A_BBRAM_8); + } +} + +static void bbram_zeroize(XlnxBBRam *s) +{ + int nr = RAM_MAX - (s->bbram8_wo ? 0 : 4); /* only wo bbram8 is cleared */ + + memset(&s->regs[R_BBRAM_0], 0, nr); + bbram_bdrv_zero(s); +} + +static void bbram_update_irq(XlnxBBRam *s) +{ + bool pending = s->regs[R_BBRAM_ISR] & ~s->regs[R_BBRAM_IMR]; + + qemu_set_irq(s->irq_bbram, pending); +} + +static void bbram_ctrl_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + uint32_t val = val64; + + if (val & R_BBRAM_CTRL_ZEROIZE_MASK) { + bbram_zeroize(s); + /* The bit is self clearing */ + s->regs[R_BBRAM_CTRL] &= ~R_BBRAM_CTRL_ZEROIZE_MASK; + } +} + +static void bbram_pgm_mode_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + uint32_t val = val64; + + if (val == BBRAM_PGM_MAGIC) { + bbram_zeroize(s); + + /* The status bit is cleared only by POR */ + ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, PGM_MODE, 1); + } +} + +static void bbram_aes_crc_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + uint32_t calc_crc; + + if (!bbram_pgm_enabled(s)) { + /* We are not in programming mode, don't do anything */ + return; + } + + /* Perform the AES integrity check */ + s->regs[R_BBRAM_STATUS] |= R_BBRAM_STATUS_AES_CRC_DONE_MASK; + + /* + * Set check status. + * + * ZynqMP BBRAM check has a zero-u32 prepended; see: + * https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_bbramps_zynqmp.c#L311 + */ + calc_crc = xlnx_efuse_calc_crc(&s->regs[R_BBRAM_0], + (R_BBRAM_8 - R_BBRAM_0), s->crc_zpads); + + ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, AES_CRC_PASS, + (s->regs[R_BBRAM_AES_CRC] == calc_crc)); +} + +static uint64_t bbram_key_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + uint32_t original_data = *(uint32_t *) reg->data; + + if (bbram_pgm_enabled(s)) { + return val64; + } else { + /* We are not in programming mode, don't do anything */ + qemu_log_mask(LOG_GUEST_ERROR, + "Not in programming mode, dropping the write\n"); + return original_data; + } +} + +static void bbram_key_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + + bbram_bdrv_sync(s, reg->access->addr); +} + +static uint64_t bbram_wo_postr(RegisterInfo *reg, uint64_t val) +{ + return 0; +} + +static uint64_t bbram_r8_postr(RegisterInfo *reg, uint64_t val) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + + return s->bbram8_wo ? bbram_wo_postr(reg, val) : val; +} + +static bool bbram_r8_readonly(XlnxBBRam *s) +{ + return !bbram_pgm_enabled(s) || bbram_msw_locked(s); +} + +static uint64_t bbram_r8_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + + if (bbram_r8_readonly(s)) { + val64 = *(uint32_t *)reg->data; + } + + return val64; +} + +static void bbram_r8_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + + if (!bbram_r8_readonly(s)) { + bbram_bdrv_sync(s, A_BBRAM_8); + } +} + +static uint64_t bbram_msw_lock_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + + /* Never lock if bbram8 is wo; and, only POR can clear the lock */ + if (s->bbram8_wo) { + val64 = 0; + } else { + val64 |= s->regs[R_BBRAM_MSW_LOCK]; + } + + return val64; +} + +static void bbram_isr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + + bbram_update_irq(s); +} + +static uint64_t bbram_ier_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + uint32_t val = val64; + + s->regs[R_BBRAM_IMR] &= ~val; + bbram_update_irq(s); + return 0; +} + +static uint64_t bbram_idr_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxBBRam *s = XLNX_BBRAM(reg->opaque); + uint32_t val = val64; + + s->regs[R_BBRAM_IMR] |= val; + bbram_update_irq(s); + return 0; +} + +static RegisterAccessInfo bbram_ctrl_regs_info[] = { + { .name = "BBRAM_STATUS", .addr = A_BBRAM_STATUS, + .rsvd = 0xee, + .ro = 0x3ff, + },{ .name = "BBRAM_CTRL", .addr = A_BBRAM_CTRL, + .post_write = bbram_ctrl_postw, + },{ .name = "PGM_MODE", .addr = A_PGM_MODE, + .post_write = bbram_pgm_mode_postw, + },{ .name = "BBRAM_AES_CRC", .addr = A_BBRAM_AES_CRC, + .post_write = bbram_aes_crc_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_0", .addr = A_BBRAM_0, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_1", .addr = A_BBRAM_1, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_2", .addr = A_BBRAM_2, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_3", .addr = A_BBRAM_3, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_4", .addr = A_BBRAM_4, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_5", .addr = A_BBRAM_5, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_6", .addr = A_BBRAM_6, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_7", .addr = A_BBRAM_7, + .pre_write = bbram_key_prew, + .post_write = bbram_key_postw, + .post_read = bbram_wo_postr, + },{ .name = "BBRAM_8", .addr = A_BBRAM_8, + .pre_write = bbram_r8_prew, + .post_write = bbram_r8_postw, + .post_read = bbram_r8_postr, + },{ .name = "BBRAM_SLVERR", .addr = A_BBRAM_SLVERR, + .rsvd = ~1, + },{ .name = "BBRAM_ISR", .addr = A_BBRAM_ISR, + .w1c = 0x1, + .post_write = bbram_isr_postw, + },{ .name = "BBRAM_IMR", .addr = A_BBRAM_IMR, + .ro = 0x1, + },{ .name = "BBRAM_IER", .addr = A_BBRAM_IER, + .pre_write = bbram_ier_prew, + },{ .name = "BBRAM_IDR", .addr = A_BBRAM_IDR, + .pre_write = bbram_idr_prew, + },{ .name = "BBRAM_MSW_LOCK", .addr = A_BBRAM_MSW_LOCK, + .pre_write = bbram_msw_lock_prew, + .ro = ~R_BBRAM_MSW_LOCK_VAL_MASK, + } +}; + +static void bbram_ctrl_reset(DeviceState *dev) +{ + XlnxBBRam *s = XLNX_BBRAM(dev); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { + if (i < R_BBRAM_0 || i > R_BBRAM_8) { + register_reset(&s->regs_info[i]); + } + } + + bbram_update_irq(s); +} + +static const MemoryRegionOps bbram_ctrl_ops = { + .read = register_read_memory, + .write = register_write_memory, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void bbram_ctrl_realize(DeviceState *dev, Error **errp) +{ + XlnxBBRam *s = XLNX_BBRAM(dev); + + if (s->crc_zpads) { + s->bbram8_wo = true; + } + + bbram_bdrv_read(s, errp); +} + +static void bbram_ctrl_init(Object *obj) +{ + XlnxBBRam *s = XLNX_BBRAM(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + RegisterInfoArray *reg_array; + + reg_array = + register_init_block32(DEVICE(obj), bbram_ctrl_regs_info, + ARRAY_SIZE(bbram_ctrl_regs_info), + s->regs_info, s->regs, + &bbram_ctrl_ops, + XLNX_BBRAM_ERR_DEBUG, + R_MAX * 4); + + sysbus_init_mmio(sbd, ®_array->mem); + sysbus_init_irq(sbd, &s->irq_bbram); +} + +static void bbram_prop_set_drive(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + + qdev_prop_drive.set(obj, v, name, opaque, errp); + + /* Fill initial data if backend is attached after realized */ + if (dev->realized) { + bbram_bdrv_read(XLNX_BBRAM(obj), errp); + } +} + +static void bbram_prop_get_drive(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + qdev_prop_drive.get(obj, v, name, opaque, errp); +} + +static void bbram_prop_release_drive(Object *obj, const char *name, + void *opaque) +{ + qdev_prop_drive.release(obj, name, opaque); +} + +static const PropertyInfo bbram_prop_drive = { + .name = "str", + .description = "Node name or ID of a block device to use as BBRAM backend", + .realized_set_allowed = true, + .get = bbram_prop_get_drive, + .set = bbram_prop_set_drive, + .release = bbram_prop_release_drive, +}; + +static const VMStateDescription vmstate_bbram_ctrl = { + .name = TYPE_XLNX_BBRAM, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, XlnxBBRam, R_MAX), + VMSTATE_END_OF_LIST(), + } +}; + +static Property bbram_ctrl_props[] = { + DEFINE_PROP("drive", XlnxBBRam, blk, bbram_prop_drive, BlockBackend *), + DEFINE_PROP_UINT32("crc-zpads", XlnxBBRam, crc_zpads, 1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void bbram_ctrl_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = bbram_ctrl_reset; + dc->realize = bbram_ctrl_realize; + dc->vmsd = &vmstate_bbram_ctrl; + device_class_set_props(dc, bbram_ctrl_props); +} + +static const TypeInfo bbram_ctrl_info = { + .name = TYPE_XLNX_BBRAM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxBBRam), + .class_init = bbram_ctrl_class_init, + .instance_init = bbram_ctrl_init, +}; + +static void bbram_ctrl_register_types(void) +{ + type_register_static(&bbram_ctrl_info); +} + +type_init(bbram_ctrl_register_types) diff --git a/hw/nvram/xlnx-efuse-crc.c b/hw/nvram/xlnx-efuse-crc.c new file mode 100644 index 00000000000..5a5cc13f39f --- /dev/null +++ b/hw/nvram/xlnx-efuse-crc.c @@ -0,0 +1,119 @@ +/* + * Xilinx eFuse/bbram CRC calculator + * + * Copyright (c) 2021 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "hw/nvram/xlnx-efuse.h" + +static uint32_t xlnx_efuse_u37_crc(uint32_t prev_crc, uint32_t data, + uint32_t addr) +{ + /* A table for 7-bit slicing */ + static const uint32_t crc_tab[128] = { + 0x00000000, 0xe13b70f7, 0xc79a971f, 0x26a1e7e8, + 0x8ad958cf, 0x6be22838, 0x4d43cfd0, 0xac78bf27, + 0x105ec76f, 0xf165b798, 0xd7c45070, 0x36ff2087, + 0x9a879fa0, 0x7bbcef57, 0x5d1d08bf, 0xbc267848, + 0x20bd8ede, 0xc186fe29, 0xe72719c1, 0x061c6936, + 0xaa64d611, 0x4b5fa6e6, 0x6dfe410e, 0x8cc531f9, + 0x30e349b1, 0xd1d83946, 0xf779deae, 0x1642ae59, + 0xba3a117e, 0x5b016189, 0x7da08661, 0x9c9bf696, + 0x417b1dbc, 0xa0406d4b, 0x86e18aa3, 0x67dafa54, + 0xcba24573, 0x2a993584, 0x0c38d26c, 0xed03a29b, + 0x5125dad3, 0xb01eaa24, 0x96bf4dcc, 0x77843d3b, + 0xdbfc821c, 0x3ac7f2eb, 0x1c661503, 0xfd5d65f4, + 0x61c69362, 0x80fde395, 0xa65c047d, 0x4767748a, + 0xeb1fcbad, 0x0a24bb5a, 0x2c855cb2, 0xcdbe2c45, + 0x7198540d, 0x90a324fa, 0xb602c312, 0x5739b3e5, + 0xfb410cc2, 0x1a7a7c35, 0x3cdb9bdd, 0xdde0eb2a, + 0x82f63b78, 0x63cd4b8f, 0x456cac67, 0xa457dc90, + 0x082f63b7, 0xe9141340, 0xcfb5f4a8, 0x2e8e845f, + 0x92a8fc17, 0x73938ce0, 0x55326b08, 0xb4091bff, + 0x1871a4d8, 0xf94ad42f, 0xdfeb33c7, 0x3ed04330, + 0xa24bb5a6, 0x4370c551, 0x65d122b9, 0x84ea524e, + 0x2892ed69, 0xc9a99d9e, 0xef087a76, 0x0e330a81, + 0xb21572c9, 0x532e023e, 0x758fe5d6, 0x94b49521, + 0x38cc2a06, 0xd9f75af1, 0xff56bd19, 0x1e6dcdee, + 0xc38d26c4, 0x22b65633, 0x0417b1db, 0xe52cc12c, + 0x49547e0b, 0xa86f0efc, 0x8ecee914, 0x6ff599e3, + 0xd3d3e1ab, 0x32e8915c, 0x144976b4, 0xf5720643, + 0x590ab964, 0xb831c993, 0x9e902e7b, 0x7fab5e8c, + 0xe330a81a, 0x020bd8ed, 0x24aa3f05, 0xc5914ff2, + 0x69e9f0d5, 0x88d28022, 0xae7367ca, 0x4f48173d, + 0xf36e6f75, 0x12551f82, 0x34f4f86a, 0xd5cf889d, + 0x79b737ba, 0x988c474d, 0xbe2da0a5, 0x5f16d052 + }; + + /* + * eFuse calculation is shown here: + * https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_utils.c#L1496 + * + * Each u32 word is appended a 5-bit value, for a total of 37 bits; see: + * https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_utils.c#L1356 + */ + uint32_t crc = prev_crc; + const unsigned rshf = 7; + const uint32_t im = (1 << rshf) - 1; + const uint32_t rm = (1 << (32 - rshf)) - 1; + const uint32_t i2 = (1 << 2) - 1; + const uint32_t r2 = (1 << 30) - 1; + + unsigned j; + uint32_t i, r; + uint64_t w; + + w = (uint64_t)(addr) << 32; + w |= data; + + /* Feed 35 bits, in 5 rounds, each a slice of 7 bits */ + for (j = 0; j < 5; j++) { + r = rm & (crc >> rshf); + i = im & (crc ^ w); + crc = crc_tab[i] ^ r; + + w >>= rshf; + } + + /* Feed the remaining 2 bits */ + r = r2 & (crc >> 2); + i = i2 & (crc ^ w); + crc = crc_tab[i << (rshf - 2)] ^ r; + + return crc; +} + +uint32_t xlnx_efuse_calc_crc(const uint32_t *data, unsigned u32_cnt, + unsigned zpads) +{ + uint32_t crc = 0; + unsigned index; + + for (index = zpads; index; index--) { + crc = xlnx_efuse_u37_crc(crc, 0, (index + u32_cnt)); + } + + for (index = u32_cnt; index; index--) { + crc = xlnx_efuse_u37_crc(crc, data[index - 1], index); + } + + return crc; +} diff --git a/hw/nvram/xlnx-efuse.c b/hw/nvram/xlnx-efuse.c new file mode 100644 index 00000000000..a0fd77b586d --- /dev/null +++ b/hw/nvram/xlnx-efuse.c @@ -0,0 +1,283 @@ +/* + * QEMU model of the EFUSE eFuse + * + * Copyright (c) 2015 Xilinx Inc. + * + * Written by Edgar E. Iglesias + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/nvram/xlnx-efuse.h" + +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "sysemu/blockdev.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" + +#define TBIT0_OFFSET 28 +#define TBIT1_OFFSET 29 +#define TBIT2_OFFSET 30 +#define TBIT3_OFFSET 31 +#define TBITS_PATTERN (0x0AU << TBIT0_OFFSET) +#define TBITS_MASK (0x0FU << TBIT0_OFFSET) + +bool xlnx_efuse_get_bit(XlnxEFuse *s, unsigned int bit) +{ + bool b = s->fuse32[bit / 32] & (1 << (bit % 32)); + return b; +} + +static int efuse_bytes(XlnxEFuse *s) +{ + return ROUND_UP((s->efuse_nr * s->efuse_size) / 8, 4); +} + +static int efuse_bdrv_read(XlnxEFuse *s, Error **errp) +{ + uint32_t *ram = s->fuse32; + int nr = efuse_bytes(s); + + if (!s->blk) { + return 0; + } + + s->blk_ro = !blk_supports_write_perm(s->blk); + if (!s->blk_ro) { + int rc; + + rc = blk_set_perm(s->blk, + (BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE), + BLK_PERM_ALL, NULL); + if (rc) { + s->blk_ro = true; + } + } + if (s->blk_ro) { + warn_report("%s: Skip saving updates to read-only eFUSE backstore.", + blk_name(s->blk)); + } + + if (blk_pread(s->blk, 0, ram, nr) < 0) { + error_setg(errp, "%s: Failed to read %u bytes from eFUSE backstore.", + blk_name(s->blk), nr); + return -1; + } + + /* Convert from little-endian backstore for each 32-bit row */ + nr /= 4; + while (nr--) { + ram[nr] = le32_to_cpu(ram[nr]); + } + + return 0; +} + +static void efuse_bdrv_sync(XlnxEFuse *s, unsigned int bit) +{ + unsigned int row_offset; + uint32_t le32; + + if (!s->blk || s->blk_ro) { + return; /* Silent on read-only backend to avoid message flood */ + } + + /* Backstore is always in little-endian */ + le32 = cpu_to_le32(xlnx_efuse_get_row(s, bit)); + + row_offset = (bit / 32) * 4; + if (blk_pwrite(s->blk, row_offset, &le32, 4, 0) < 0) { + error_report("%s: Failed to write offset %u of eFUSE backstore.", + blk_name(s->blk), row_offset); + } +} + +static int efuse_ro_bits_cmp(const void *a, const void *b) +{ + uint32_t i = *(const uint32_t *)a; + uint32_t j = *(const uint32_t *)b; + + return (i > j) - (i < j); +} + +static void efuse_ro_bits_sort(XlnxEFuse *s) +{ + uint32_t *ary = s->ro_bits; + const uint32_t cnt = s->ro_bits_cnt; + + if (ary && cnt > 1) { + qsort(ary, cnt, sizeof(ary[0]), efuse_ro_bits_cmp); + } +} + +static bool efuse_ro_bits_find(XlnxEFuse *s, uint32_t k) +{ + const uint32_t *ary = s->ro_bits; + const uint32_t cnt = s->ro_bits_cnt; + + if (!ary || !cnt) { + return false; + } + + return bsearch(&k, ary, cnt, sizeof(ary[0]), efuse_ro_bits_cmp) != NULL; +} + +bool xlnx_efuse_set_bit(XlnxEFuse *s, unsigned int bit) +{ + if (efuse_ro_bits_find(s, bit)) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + qemu_log_mask(LOG_GUEST_ERROR, "%s: WARN: " + "Ignored setting of readonly efuse bit<%u,%u>!\n", + path, (bit / 32), (bit % 32)); + return false; + } + + s->fuse32[bit / 32] |= 1 << (bit % 32); + efuse_bdrv_sync(s, bit); + return true; +} + +bool xlnx_efuse_k256_check(XlnxEFuse *s, uint32_t crc, unsigned start) +{ + uint32_t calc; + + /* A key always occupies multiple of whole rows */ + assert((start % 32) == 0); + + calc = xlnx_efuse_calc_crc(&s->fuse32[start / 32], (256 / 32), 0); + return calc == crc; +} + +uint32_t xlnx_efuse_tbits_check(XlnxEFuse *s) +{ + int nr; + uint32_t check = 0; + + for (nr = s->efuse_nr; nr-- > 0; ) { + int efuse_start_row_num = (s->efuse_size * nr) / 32; + uint32_t data = s->fuse32[efuse_start_row_num]; + + /* + * If the option is on, auto-init blank T-bits. + * (non-blank will still be reported as '0' in the check, e.g., + * for error-injection tests) + */ + if ((data & TBITS_MASK) == 0 && s->init_tbits) { + data |= TBITS_PATTERN; + + s->fuse32[efuse_start_row_num] = data; + efuse_bdrv_sync(s, (efuse_start_row_num * 32 + TBIT0_OFFSET)); + } + + check = (check << 1) | ((data & TBITS_MASK) == TBITS_PATTERN); + } + + return check; +} + +static void efuse_realize(DeviceState *dev, Error **errp) +{ + XlnxEFuse *s = XLNX_EFUSE(dev); + + /* Sort readonly-list for bsearch lookup */ + efuse_ro_bits_sort(s); + + if ((s->efuse_size % 32) != 0) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + error_setg(errp, + "%s.efuse-size: %u: property value not multiple of 32.", + path, s->efuse_size); + return; + } + + s->fuse32 = g_malloc0(efuse_bytes(s)); + if (efuse_bdrv_read(s, errp)) { + g_free(s->fuse32); + } +} + +static void efuse_prop_set_drive(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + + qdev_prop_drive.set(obj, v, name, opaque, errp); + + /* Fill initial data if backend is attached after realized */ + if (dev->realized) { + efuse_bdrv_read(XLNX_EFUSE(obj), errp); + } +} + +static void efuse_prop_get_drive(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + qdev_prop_drive.get(obj, v, name, opaque, errp); +} + +static void efuse_prop_release_drive(Object *obj, const char *name, + void *opaque) +{ + qdev_prop_drive.release(obj, name, opaque); +} + +static const PropertyInfo efuse_prop_drive = { + .name = "str", + .description = "Node name or ID of a block device to use as eFUSE backend", + .realized_set_allowed = true, + .get = efuse_prop_get_drive, + .set = efuse_prop_set_drive, + .release = efuse_prop_release_drive, +}; + +static Property efuse_properties[] = { + DEFINE_PROP("drive", XlnxEFuse, blk, efuse_prop_drive, BlockBackend *), + DEFINE_PROP_UINT8("efuse-nr", XlnxEFuse, efuse_nr, 3), + DEFINE_PROP_UINT32("efuse-size", XlnxEFuse, efuse_size, 64 * 32), + DEFINE_PROP_BOOL("init-factory-tbits", XlnxEFuse, init_tbits, true), + DEFINE_PROP_ARRAY("read-only", XlnxEFuse, ro_bits_cnt, ro_bits, + qdev_prop_uint32, uint32_t), + DEFINE_PROP_END_OF_LIST(), +}; + +static void efuse_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = efuse_realize; + device_class_set_props(dc, efuse_properties); +} + +static const TypeInfo efuse_info = { + .name = TYPE_XLNX_EFUSE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XlnxEFuse), + .class_init = efuse_class_init, +}; + +static void efuse_register_types(void) +{ + type_register_static(&efuse_info); +} +type_init(efuse_register_types) diff --git a/hw/nvram/xlnx-versal-efuse-cache.c b/hw/nvram/xlnx-versal-efuse-cache.c new file mode 100644 index 00000000000..eaec64d785e --- /dev/null +++ b/hw/nvram/xlnx-versal-efuse-cache.c @@ -0,0 +1,114 @@ +/* + * QEMU model of the EFuse_Cache + * + * Copyright (c) 2017 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/nvram/xlnx-versal-efuse.h" + +#include "qemu/log.h" +#include "hw/qdev-properties.h" + +#define MR_SIZE 0xC00 + +static uint64_t efuse_cache_read(void *opaque, hwaddr addr, unsigned size) +{ + XlnxVersalEFuseCache *s = XLNX_VERSAL_EFUSE_CACHE(opaque); + unsigned int w0 = QEMU_ALIGN_DOWN(addr * 8, 32); + unsigned int w1 = QEMU_ALIGN_DOWN((addr + size - 1) * 8, 32); + + uint64_t ret; + + assert(w0 == w1 || (w0 + 32) == w1); + + ret = xlnx_versal_efuse_read_row(s->efuse, w1, NULL); + if (w0 < w1) { + ret <<= 32; + ret |= xlnx_versal_efuse_read_row(s->efuse, w0, NULL); + } + + /* If 'addr' unaligned, the guest is always assumed to be little-endian. */ + addr &= 3; + if (addr) { + ret >>= 8 * addr; + } + + return ret; +} + +static void efuse_cache_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + /* No Register Writes allowed */ + qemu_log_mask(LOG_GUEST_ERROR, "%s: efuse cache registers are read-only", + __func__); +} + +static const MemoryRegionOps efuse_cache_ops = { + .read = efuse_cache_read, + .write = efuse_cache_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +static void efuse_cache_init(Object *obj) +{ + XlnxVersalEFuseCache *s = XLNX_VERSAL_EFUSE_CACHE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + memory_region_init_io(&s->iomem, obj, &efuse_cache_ops, s, + TYPE_XLNX_VERSAL_EFUSE_CACHE, MR_SIZE); + sysbus_init_mmio(sbd, &s->iomem); +} + +static Property efuse_cache_props[] = { + DEFINE_PROP_LINK("efuse", + XlnxVersalEFuseCache, efuse, + TYPE_XLNX_EFUSE, XlnxEFuse *), + + DEFINE_PROP_END_OF_LIST(), +}; + +static void efuse_cache_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, efuse_cache_props); +} + +static const TypeInfo efuse_cache_info = { + .name = TYPE_XLNX_VERSAL_EFUSE_CACHE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxVersalEFuseCache), + .class_init = efuse_cache_class_init, + .instance_init = efuse_cache_init, +}; + +static void efuse_cache_register_types(void) +{ + type_register_static(&efuse_cache_info); +} + +type_init(efuse_cache_register_types) diff --git a/hw/nvram/xlnx-versal-efuse-ctrl.c b/hw/nvram/xlnx-versal-efuse-ctrl.c new file mode 100644 index 00000000000..b35ba65ab57 --- /dev/null +++ b/hw/nvram/xlnx-versal-efuse-ctrl.c @@ -0,0 +1,793 @@ +/* + * QEMU model of the Versal eFuse controller + * + * Copyright (c) 2020 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/nvram/xlnx-versal-efuse.h" + +#include "qemu/log.h" +#include "qapi/error.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" + +#ifndef XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG +#define XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG 0 +#endif + +REG32(WR_LOCK, 0x0) + FIELD(WR_LOCK, LOCK, 0, 16) +REG32(CFG, 0x4) + FIELD(CFG, SLVERR_ENABLE, 5, 1) + FIELD(CFG, MARGIN_RD, 2, 1) + FIELD(CFG, PGM_EN, 1, 1) +REG32(STATUS, 0x8) + FIELD(STATUS, AES_USER_KEY_1_CRC_PASS, 11, 1) + FIELD(STATUS, AES_USER_KEY_1_CRC_DONE, 10, 1) + FIELD(STATUS, AES_USER_KEY_0_CRC_PASS, 9, 1) + FIELD(STATUS, AES_USER_KEY_0_CRC_DONE, 8, 1) + FIELD(STATUS, AES_CRC_PASS, 7, 1) + FIELD(STATUS, AES_CRC_DONE, 6, 1) + FIELD(STATUS, CACHE_DONE, 5, 1) + FIELD(STATUS, CACHE_LOAD, 4, 1) + FIELD(STATUS, EFUSE_2_TBIT, 2, 1) + FIELD(STATUS, EFUSE_1_TBIT, 1, 1) + FIELD(STATUS, EFUSE_0_TBIT, 0, 1) +REG32(EFUSE_PGM_ADDR, 0xc) + FIELD(EFUSE_PGM_ADDR, PAGE, 13, 4) + FIELD(EFUSE_PGM_ADDR, ROW, 5, 8) + FIELD(EFUSE_PGM_ADDR, COLUMN, 0, 5) +REG32(EFUSE_RD_ADDR, 0x10) + FIELD(EFUSE_RD_ADDR, PAGE, 13, 4) + FIELD(EFUSE_RD_ADDR, ROW, 5, 8) +REG32(EFUSE_RD_DATA, 0x14) +REG32(TPGM, 0x18) + FIELD(TPGM, VALUE, 0, 16) +REG32(TRD, 0x1c) + FIELD(TRD, VALUE, 0, 8) +REG32(TSU_H_PS, 0x20) + FIELD(TSU_H_PS, VALUE, 0, 8) +REG32(TSU_H_PS_CS, 0x24) + FIELD(TSU_H_PS_CS, VALUE, 0, 8) +REG32(TRDM, 0x28) + FIELD(TRDM, VALUE, 0, 8) +REG32(TSU_H_CS, 0x2c) + FIELD(TSU_H_CS, VALUE, 0, 8) +REG32(EFUSE_ISR, 0x30) + FIELD(EFUSE_ISR, APB_SLVERR, 31, 1) + FIELD(EFUSE_ISR, CACHE_PARITY_E2, 14, 1) + FIELD(EFUSE_ISR, CACHE_PARITY_E1, 13, 1) + FIELD(EFUSE_ISR, CACHE_PARITY_E0S, 12, 1) + FIELD(EFUSE_ISR, CACHE_PARITY_E0R, 11, 1) + FIELD(EFUSE_ISR, CACHE_APB_SLVERR, 10, 1) + FIELD(EFUSE_ISR, CACHE_REQ_ERROR, 9, 1) + FIELD(EFUSE_ISR, MAIN_REQ_ERROR, 8, 1) + FIELD(EFUSE_ISR, READ_ON_CACHE_LD, 7, 1) + FIELD(EFUSE_ISR, CACHE_FSM_ERROR, 6, 1) + FIELD(EFUSE_ISR, MAIN_FSM_ERROR, 5, 1) + FIELD(EFUSE_ISR, CACHE_ERROR, 4, 1) + FIELD(EFUSE_ISR, RD_ERROR, 3, 1) + FIELD(EFUSE_ISR, RD_DONE, 2, 1) + FIELD(EFUSE_ISR, PGM_ERROR, 1, 1) + FIELD(EFUSE_ISR, PGM_DONE, 0, 1) +REG32(EFUSE_IMR, 0x34) + FIELD(EFUSE_IMR, APB_SLVERR, 31, 1) + FIELD(EFUSE_IMR, CACHE_PARITY_E2, 14, 1) + FIELD(EFUSE_IMR, CACHE_PARITY_E1, 13, 1) + FIELD(EFUSE_IMR, CACHE_PARITY_E0S, 12, 1) + FIELD(EFUSE_IMR, CACHE_PARITY_E0R, 11, 1) + FIELD(EFUSE_IMR, CACHE_APB_SLVERR, 10, 1) + FIELD(EFUSE_IMR, CACHE_REQ_ERROR, 9, 1) + FIELD(EFUSE_IMR, MAIN_REQ_ERROR, 8, 1) + FIELD(EFUSE_IMR, READ_ON_CACHE_LD, 7, 1) + FIELD(EFUSE_IMR, CACHE_FSM_ERROR, 6, 1) + FIELD(EFUSE_IMR, MAIN_FSM_ERROR, 5, 1) + FIELD(EFUSE_IMR, CACHE_ERROR, 4, 1) + FIELD(EFUSE_IMR, RD_ERROR, 3, 1) + FIELD(EFUSE_IMR, RD_DONE, 2, 1) + FIELD(EFUSE_IMR, PGM_ERROR, 1, 1) + FIELD(EFUSE_IMR, PGM_DONE, 0, 1) +REG32(EFUSE_IER, 0x38) + FIELD(EFUSE_IER, APB_SLVERR, 31, 1) + FIELD(EFUSE_IER, CACHE_PARITY_E2, 14, 1) + FIELD(EFUSE_IER, CACHE_PARITY_E1, 13, 1) + FIELD(EFUSE_IER, CACHE_PARITY_E0S, 12, 1) + FIELD(EFUSE_IER, CACHE_PARITY_E0R, 11, 1) + FIELD(EFUSE_IER, CACHE_APB_SLVERR, 10, 1) + FIELD(EFUSE_IER, CACHE_REQ_ERROR, 9, 1) + FIELD(EFUSE_IER, MAIN_REQ_ERROR, 8, 1) + FIELD(EFUSE_IER, READ_ON_CACHE_LD, 7, 1) + FIELD(EFUSE_IER, CACHE_FSM_ERROR, 6, 1) + FIELD(EFUSE_IER, MAIN_FSM_ERROR, 5, 1) + FIELD(EFUSE_IER, CACHE_ERROR, 4, 1) + FIELD(EFUSE_IER, RD_ERROR, 3, 1) + FIELD(EFUSE_IER, RD_DONE, 2, 1) + FIELD(EFUSE_IER, PGM_ERROR, 1, 1) + FIELD(EFUSE_IER, PGM_DONE, 0, 1) +REG32(EFUSE_IDR, 0x3c) + FIELD(EFUSE_IDR, APB_SLVERR, 31, 1) + FIELD(EFUSE_IDR, CACHE_PARITY_E2, 14, 1) + FIELD(EFUSE_IDR, CACHE_PARITY_E1, 13, 1) + FIELD(EFUSE_IDR, CACHE_PARITY_E0S, 12, 1) + FIELD(EFUSE_IDR, CACHE_PARITY_E0R, 11, 1) + FIELD(EFUSE_IDR, CACHE_APB_SLVERR, 10, 1) + FIELD(EFUSE_IDR, CACHE_REQ_ERROR, 9, 1) + FIELD(EFUSE_IDR, MAIN_REQ_ERROR, 8, 1) + FIELD(EFUSE_IDR, READ_ON_CACHE_LD, 7, 1) + FIELD(EFUSE_IDR, CACHE_FSM_ERROR, 6, 1) + FIELD(EFUSE_IDR, MAIN_FSM_ERROR, 5, 1) + FIELD(EFUSE_IDR, CACHE_ERROR, 4, 1) + FIELD(EFUSE_IDR, RD_ERROR, 3, 1) + FIELD(EFUSE_IDR, RD_DONE, 2, 1) + FIELD(EFUSE_IDR, PGM_ERROR, 1, 1) + FIELD(EFUSE_IDR, PGM_DONE, 0, 1) +REG32(EFUSE_CACHE_LOAD, 0x40) + FIELD(EFUSE_CACHE_LOAD, LOAD, 0, 1) +REG32(EFUSE_PGM_LOCK, 0x44) + FIELD(EFUSE_PGM_LOCK, SPK_ID_LOCK, 0, 1) +REG32(EFUSE_AES_CRC, 0x48) +REG32(EFUSE_AES_USR_KEY0_CRC, 0x4c) +REG32(EFUSE_AES_USR_KEY1_CRC, 0x50) +REG32(EFUSE_PD, 0x54) +REG32(EFUSE_ANLG_OSC_SW_1LP, 0x60) +REG32(EFUSE_TEST_CTRL, 0x100) + +#define R_MAX (R_EFUSE_TEST_CTRL + 1) + +#define R_WR_LOCK_UNLOCK_PASSCODE (0xDF0D) + +/* + * eFuse layout references: + * https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilnvm/src/xnvm_efuse_hw.h + */ +#define BIT_POS_OF(A_) \ + ((uint32_t)((A_) & (R_EFUSE_PGM_ADDR_ROW_MASK | \ + R_EFUSE_PGM_ADDR_COLUMN_MASK))) + +#define BIT_POS(R_, C_) \ + ((uint32_t)((R_EFUSE_PGM_ADDR_ROW_MASK \ + & ((R_) << R_EFUSE_PGM_ADDR_ROW_SHIFT)) \ + | \ + (R_EFUSE_PGM_ADDR_COLUMN_MASK \ + & ((C_) << R_EFUSE_PGM_ADDR_COLUMN_SHIFT)))) + +#define EFUSE_TBIT_POS(A_) (BIT_POS_OF(A_) >= BIT_POS(0, 28)) + +#define EFUSE_ANCHOR_ROW (0) +#define EFUSE_ANCHOR_3_COL (27) +#define EFUSE_ANCHOR_1_COL (1) + +#define EFUSE_AES_KEY_START BIT_POS(12, 0) +#define EFUSE_AES_KEY_END BIT_POS(19, 31) +#define EFUSE_USER_KEY_0_START BIT_POS(20, 0) +#define EFUSE_USER_KEY_0_END BIT_POS(27, 31) +#define EFUSE_USER_KEY_1_START BIT_POS(28, 0) +#define EFUSE_USER_KEY_1_END BIT_POS(35, 31) + +#define EFUSE_RD_BLOCKED_START EFUSE_AES_KEY_START +#define EFUSE_RD_BLOCKED_END EFUSE_USER_KEY_1_END + +#define EFUSE_GLITCH_DET_WR_LK BIT_POS(4, 31) +#define EFUSE_PPK0_WR_LK BIT_POS(43, 6) +#define EFUSE_PPK1_WR_LK BIT_POS(43, 7) +#define EFUSE_PPK2_WR_LK BIT_POS(43, 8) +#define EFUSE_AES_WR_LK BIT_POS(43, 11) +#define EFUSE_USER_KEY_0_WR_LK BIT_POS(43, 13) +#define EFUSE_USER_KEY_1_WR_LK BIT_POS(43, 15) +#define EFUSE_PUF_SYN_LK BIT_POS(43, 16) +#define EFUSE_DNA_WR_LK BIT_POS(43, 27) +#define EFUSE_BOOT_ENV_WR_LK BIT_POS(43, 28) + +#define EFUSE_PGM_LOCKED_START BIT_POS(44, 0) +#define EFUSE_PGM_LOCKED_END BIT_POS(51, 31) + +#define EFUSE_PUF_PAGE (2) +#define EFUSE_PUF_SYN_START BIT_POS(129, 0) +#define EFUSE_PUF_SYN_END BIT_POS(255, 27) + +#define EFUSE_KEY_CRC_LK_ROW (43) +#define EFUSE_AES_KEY_CRC_LK_MASK ((1U << 9) | (1U << 10)) +#define EFUSE_USER_KEY_0_CRC_LK_MASK (1U << 12) +#define EFUSE_USER_KEY_1_CRC_LK_MASK (1U << 14) + +/* + * A handy macro to return value of an array element, + * or a specific default if given index is out of bound. + */ +#define ARRAY_GET(A_, I_, D_) \ + ((unsigned int)(I_) < ARRAY_SIZE(A_) ? (A_)[I_] : (D_)) + +QEMU_BUILD_BUG_ON(R_MAX != ARRAY_SIZE(((XlnxVersalEFuseCtrl *)0)->regs)); + +typedef struct XlnxEFuseLkSpec { + uint16_t row; + uint16_t lk_bit; +} XlnxEFuseLkSpec; + +static void efuse_imr_update_irq(XlnxVersalEFuseCtrl *s) +{ + bool pending = s->regs[R_EFUSE_ISR] & ~s->regs[R_EFUSE_IMR]; + qemu_set_irq(s->irq_efuse_imr, pending); +} + +static void efuse_isr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + efuse_imr_update_irq(s); +} + +static uint64_t efuse_ier_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + uint32_t val = val64; + + s->regs[R_EFUSE_IMR] &= ~val; + efuse_imr_update_irq(s); + return 0; +} + +static uint64_t efuse_idr_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + uint32_t val = val64; + + s->regs[R_EFUSE_IMR] |= val; + efuse_imr_update_irq(s); + return 0; +} + +static void efuse_status_tbits_sync(XlnxVersalEFuseCtrl *s) +{ + uint32_t check = xlnx_efuse_tbits_check(s->efuse); + uint32_t val = s->regs[R_STATUS]; + + val = FIELD_DP32(val, STATUS, EFUSE_0_TBIT, !!(check & (1 << 0))); + val = FIELD_DP32(val, STATUS, EFUSE_1_TBIT, !!(check & (1 << 1))); + val = FIELD_DP32(val, STATUS, EFUSE_2_TBIT, !!(check & (1 << 2))); + + s->regs[R_STATUS] = val; +} + +static void efuse_anchor_bits_check(XlnxVersalEFuseCtrl *s) +{ + unsigned page; + + if (!s->efuse || !s->efuse->init_tbits) { + return; + } + + for (page = 0; page < s->efuse->efuse_nr; page++) { + uint32_t row = 0, bit; + + row = FIELD_DP32(row, EFUSE_PGM_ADDR, PAGE, page); + row = FIELD_DP32(row, EFUSE_PGM_ADDR, ROW, EFUSE_ANCHOR_ROW); + + bit = FIELD_DP32(row, EFUSE_PGM_ADDR, COLUMN, EFUSE_ANCHOR_3_COL); + if (!xlnx_efuse_get_bit(s->efuse, bit)) { + xlnx_efuse_set_bit(s->efuse, bit); + } + + bit = FIELD_DP32(row, EFUSE_PGM_ADDR, COLUMN, EFUSE_ANCHOR_1_COL); + if (!xlnx_efuse_get_bit(s->efuse, bit)) { + xlnx_efuse_set_bit(s->efuse, bit); + } + } +} + +static void efuse_key_crc_check(RegisterInfo *reg, uint32_t crc, + uint32_t pass_mask, uint32_t done_mask, + unsigned first, uint32_t lk_mask) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + uint32_t r, lk_bits; + + /* + * To start, assume both DONE and PASS, and clear PASS by xor + * if CRC-check fails or CRC-check disabled by lock fuse. + */ + r = s->regs[R_STATUS] | done_mask | pass_mask; + + lk_bits = xlnx_efuse_get_row(s->efuse, EFUSE_KEY_CRC_LK_ROW) & lk_mask; + if (lk_bits == 0 && xlnx_efuse_k256_check(s->efuse, crc, first)) { + pass_mask = 0; + } + + s->regs[R_STATUS] = r ^ pass_mask; +} + +static void efuse_data_sync(XlnxVersalEFuseCtrl *s) +{ + efuse_status_tbits_sync(s); +} + +static int efuse_lk_spec_cmp(const void *a, const void *b) +{ + uint16_t r1 = ((const XlnxEFuseLkSpec *)a)->row; + uint16_t r2 = ((const XlnxEFuseLkSpec *)b)->row; + + return (r1 > r2) - (r1 < r2); +} + +static void efuse_lk_spec_sort(XlnxVersalEFuseCtrl *s) +{ + XlnxEFuseLkSpec *ary = s->extra_pg0_lock_spec; + const uint32_t n8 = s->extra_pg0_lock_n16 * 2; + const uint32_t sz = sizeof(ary[0]); + const uint32_t cnt = n8 / sz; + + if (ary && cnt) { + qsort(ary, cnt, sz, efuse_lk_spec_cmp); + } +} + +static uint32_t efuse_lk_spec_find(XlnxVersalEFuseCtrl *s, uint32_t row) +{ + const XlnxEFuseLkSpec *ary = s->extra_pg0_lock_spec; + const uint32_t n8 = s->extra_pg0_lock_n16 * 2; + const uint32_t sz = sizeof(ary[0]); + const uint32_t cnt = n8 / sz; + const XlnxEFuseLkSpec *item = NULL; + + if (ary && cnt) { + XlnxEFuseLkSpec k = { .row = row, }; + + item = bsearch(&k, ary, cnt, sz, efuse_lk_spec_cmp); + } + + return item ? item->lk_bit : 0; +} + +static uint32_t efuse_bit_locked(XlnxVersalEFuseCtrl *s, uint32_t bit) +{ + /* Hard-coded locks */ + static const uint16_t pg0_hard_lock[] = { + [4] = EFUSE_GLITCH_DET_WR_LK, + [37] = EFUSE_BOOT_ENV_WR_LK, + + [8 ... 11] = EFUSE_DNA_WR_LK, + [12 ... 19] = EFUSE_AES_WR_LK, + [20 ... 27] = EFUSE_USER_KEY_0_WR_LK, + [28 ... 35] = EFUSE_USER_KEY_1_WR_LK, + [64 ... 71] = EFUSE_PPK0_WR_LK, + [72 ... 79] = EFUSE_PPK1_WR_LK, + [80 ... 87] = EFUSE_PPK2_WR_LK, + }; + + uint32_t row = FIELD_EX32(bit, EFUSE_PGM_ADDR, ROW); + uint32_t lk_bit = ARRAY_GET(pg0_hard_lock, row, 0); + + return lk_bit ? lk_bit : efuse_lk_spec_find(s, row); +} + +static bool efuse_pgm_locked(XlnxVersalEFuseCtrl *s, unsigned int bit) +{ + + unsigned int lock = 1; + + /* Global lock */ + if (!ARRAY_FIELD_EX32(s->regs, CFG, PGM_EN)) { + goto ret_lock; + } + + /* Row lock */ + switch (FIELD_EX32(bit, EFUSE_PGM_ADDR, PAGE)) { + case 0: + if (ARRAY_FIELD_EX32(s->regs, EFUSE_PGM_LOCK, SPK_ID_LOCK) && + bit >= EFUSE_PGM_LOCKED_START && bit <= EFUSE_PGM_LOCKED_END) { + goto ret_lock; + } + + lock = efuse_bit_locked(s, bit); + break; + case EFUSE_PUF_PAGE: + if (bit < EFUSE_PUF_SYN_START || bit > EFUSE_PUF_SYN_END) { + lock = 0; + goto ret_lock; + } + + lock = EFUSE_PUF_SYN_LK; + break; + default: + lock = 0; + goto ret_lock; + } + + /* Row lock by an efuse bit */ + if (lock) { + lock = xlnx_efuse_get_bit(s->efuse, lock); + } + + ret_lock: + return lock != 0; +} + +static void efuse_pgm_addr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + unsigned bit = val64; + bool ok = false; + + /* Always zero out PGM_ADDR because it is write-only */ + s->regs[R_EFUSE_PGM_ADDR] = 0; + + /* + * Indicate error if bit is write-protected (or read-only + * as guarded by efuse_set_bit()). + * + * Keep it simple by not modeling program timing. + * + * Note: model must NEVER clear the PGM_ERROR bit; it is + * up to guest to do so (or by reset). + */ + if (efuse_pgm_locked(s, bit)) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Denied setting of efuse<%u, %u, %u>\n", + path, + FIELD_EX32(bit, EFUSE_PGM_ADDR, PAGE), + FIELD_EX32(bit, EFUSE_PGM_ADDR, ROW), + FIELD_EX32(bit, EFUSE_PGM_ADDR, COLUMN)); + } else if (xlnx_efuse_set_bit(s->efuse, bit)) { + ok = true; + if (EFUSE_TBIT_POS(bit)) { + efuse_status_tbits_sync(s); + } + } + + if (!ok) { + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_ERROR, 1); + } + + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_DONE, 1); + efuse_imr_update_irq(s); +} + +static void efuse_rd_addr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + unsigned bit = val64; + bool denied; + + /* Always zero out RD_ADDR because it is write-only */ + s->regs[R_EFUSE_RD_ADDR] = 0; + + /* + * Indicate error if row is read-blocked. + * + * Note: model must NEVER clear the RD_ERROR bit; it is + * up to guest to do so (or by reset). + */ + s->regs[R_EFUSE_RD_DATA] = xlnx_versal_efuse_read_row(s->efuse, + bit, &denied); + if (denied) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Denied reading of efuse<%u, %u>\n", + path, + FIELD_EX32(bit, EFUSE_RD_ADDR, PAGE), + FIELD_EX32(bit, EFUSE_RD_ADDR, ROW)); + + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_ERROR, 1); + } + + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_DONE, 1); + efuse_imr_update_irq(s); + return; +} + +static uint64_t efuse_cache_load_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + + if (val64 & R_EFUSE_CACHE_LOAD_LOAD_MASK) { + efuse_data_sync(s); + + ARRAY_FIELD_DP32(s->regs, STATUS, CACHE_DONE, 1); + efuse_imr_update_irq(s); + } + + return 0; +} + +static uint64_t efuse_pgm_lock_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque); + + /* Ignore all other bits */ + val64 = FIELD_EX32(val64, EFUSE_PGM_LOCK, SPK_ID_LOCK); + + /* Once the bit is written 1, only reset will clear it to 0 */ + val64 |= ARRAY_FIELD_EX32(s->regs, EFUSE_PGM_LOCK, SPK_ID_LOCK); + + return val64; +} + +static void efuse_aes_crc_postw(RegisterInfo *reg, uint64_t val64) +{ + efuse_key_crc_check(reg, val64, + R_STATUS_AES_CRC_PASS_MASK, + R_STATUS_AES_CRC_DONE_MASK, + EFUSE_AES_KEY_START, + EFUSE_AES_KEY_CRC_LK_MASK); +} + +static void efuse_aes_u0_crc_postw(RegisterInfo *reg, uint64_t val64) +{ + efuse_key_crc_check(reg, val64, + R_STATUS_AES_USER_KEY_0_CRC_PASS_MASK, + R_STATUS_AES_USER_KEY_0_CRC_DONE_MASK, + EFUSE_USER_KEY_0_START, + EFUSE_USER_KEY_0_CRC_LK_MASK); +} + +static void efuse_aes_u1_crc_postw(RegisterInfo *reg, uint64_t val64) +{ + efuse_key_crc_check(reg, val64, + R_STATUS_AES_USER_KEY_1_CRC_PASS_MASK, + R_STATUS_AES_USER_KEY_1_CRC_DONE_MASK, + EFUSE_USER_KEY_1_START, + EFUSE_USER_KEY_1_CRC_LK_MASK); +} + +static uint64_t efuse_wr_lock_prew(RegisterInfo *reg, uint64_t val) +{ + return val != R_WR_LOCK_UNLOCK_PASSCODE; +} + +static const RegisterAccessInfo efuse_ctrl_regs_info[] = { + { .name = "WR_LOCK", .addr = A_WR_LOCK, + .reset = 0x1, + .pre_write = efuse_wr_lock_prew, + },{ .name = "CFG", .addr = A_CFG, + .rsvd = 0x9, + },{ .name = "STATUS", .addr = A_STATUS, + .rsvd = 0x8, + .ro = 0xfff, + },{ .name = "EFUSE_PGM_ADDR", .addr = A_EFUSE_PGM_ADDR, + .post_write = efuse_pgm_addr_postw, + },{ .name = "EFUSE_RD_ADDR", .addr = A_EFUSE_RD_ADDR, + .rsvd = 0x1f, + .post_write = efuse_rd_addr_postw, + },{ .name = "EFUSE_RD_DATA", .addr = A_EFUSE_RD_DATA, + .ro = 0xffffffff, + },{ .name = "TPGM", .addr = A_TPGM, + },{ .name = "TRD", .addr = A_TRD, + .reset = 0x19, + },{ .name = "TSU_H_PS", .addr = A_TSU_H_PS, + .reset = 0xff, + },{ .name = "TSU_H_PS_CS", .addr = A_TSU_H_PS_CS, + .reset = 0x11, + },{ .name = "TRDM", .addr = A_TRDM, + .reset = 0x3a, + },{ .name = "TSU_H_CS", .addr = A_TSU_H_CS, + .reset = 0x16, + },{ .name = "EFUSE_ISR", .addr = A_EFUSE_ISR, + .rsvd = 0x7fff8000, + .w1c = 0x80007fff, + .post_write = efuse_isr_postw, + },{ .name = "EFUSE_IMR", .addr = A_EFUSE_IMR, + .reset = 0x80007fff, + .rsvd = 0x7fff8000, + .ro = 0xffffffff, + },{ .name = "EFUSE_IER", .addr = A_EFUSE_IER, + .rsvd = 0x7fff8000, + .pre_write = efuse_ier_prew, + },{ .name = "EFUSE_IDR", .addr = A_EFUSE_IDR, + .rsvd = 0x7fff8000, + .pre_write = efuse_idr_prew, + },{ .name = "EFUSE_CACHE_LOAD", .addr = A_EFUSE_CACHE_LOAD, + .pre_write = efuse_cache_load_prew, + },{ .name = "EFUSE_PGM_LOCK", .addr = A_EFUSE_PGM_LOCK, + .pre_write = efuse_pgm_lock_prew, + },{ .name = "EFUSE_AES_CRC", .addr = A_EFUSE_AES_CRC, + .post_write = efuse_aes_crc_postw, + },{ .name = "EFUSE_AES_USR_KEY0_CRC", .addr = A_EFUSE_AES_USR_KEY0_CRC, + .post_write = efuse_aes_u0_crc_postw, + },{ .name = "EFUSE_AES_USR_KEY1_CRC", .addr = A_EFUSE_AES_USR_KEY1_CRC, + .post_write = efuse_aes_u1_crc_postw, + },{ .name = "EFUSE_PD", .addr = A_EFUSE_PD, + .ro = 0xfffffffe, + },{ .name = "EFUSE_ANLG_OSC_SW_1LP", .addr = A_EFUSE_ANLG_OSC_SW_1LP, + },{ .name = "EFUSE_TEST_CTRL", .addr = A_EFUSE_TEST_CTRL, + .reset = 0x8, + } +}; + +static void efuse_ctrl_reg_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + RegisterInfoArray *reg_array = opaque; + XlnxVersalEFuseCtrl *s; + Object *dev; + + assert(reg_array != NULL); + + dev = reg_array->mem.owner; + assert(dev); + + s = XLNX_VERSAL_EFUSE_CTRL(dev); + + if (addr != A_WR_LOCK && s->regs[R_WR_LOCK]) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + qemu_log_mask(LOG_GUEST_ERROR, + "%s[reg_0x%02lx]: Attempt to write locked register.\n", + path, (long)addr); + } else { + register_write_memory(opaque, addr, data, size); + } +} + +static void efuse_ctrl_register_reset(RegisterInfo *reg) +{ + if (!reg->data || !reg->access) { + return; + } + + /* Reset must not trigger some registers' writers */ + switch (reg->access->addr) { + case A_EFUSE_AES_CRC: + case A_EFUSE_AES_USR_KEY0_CRC: + case A_EFUSE_AES_USR_KEY1_CRC: + *(uint32_t *)reg->data = reg->access->reset; + return; + } + + register_reset(reg); +} + +static void efuse_ctrl_reset(DeviceState *dev) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(dev); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { + efuse_ctrl_register_reset(&s->regs_info[i]); + } + + efuse_anchor_bits_check(s); + efuse_data_sync(s); + efuse_imr_update_irq(s); +} + +static const MemoryRegionOps efuse_ctrl_ops = { + .read = register_read_memory, + .write = efuse_ctrl_reg_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void efuse_ctrl_realize(DeviceState *dev, Error **errp) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(dev); + const uint32_t lks_sz = sizeof(XlnxEFuseLkSpec) / 2; + + if (!s->efuse) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + error_setg(errp, "%s.efuse: link property not connected to XLNX-EFUSE", + path); + return; + } + + /* Sort property-defined pgm-locks for bsearch lookup */ + if ((s->extra_pg0_lock_n16 % lks_sz) != 0) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + error_setg(errp, + "%s.pg0-lock: array property item-count not multiple of %u", + path, lks_sz); + return; + } + + efuse_lk_spec_sort(s); +} + +static void efuse_ctrl_init(Object *obj) +{ + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + RegisterInfoArray *reg_array; + + reg_array = + register_init_block32(DEVICE(obj), efuse_ctrl_regs_info, + ARRAY_SIZE(efuse_ctrl_regs_info), + s->regs_info, s->regs, + &efuse_ctrl_ops, + XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG, + R_MAX * 4); + + sysbus_init_mmio(sbd, ®_array->mem); + sysbus_init_irq(sbd, &s->irq_efuse_imr); +} + +static const VMStateDescription vmstate_efuse_ctrl = { + .name = TYPE_XLNX_VERSAL_EFUSE_CTRL, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, XlnxVersalEFuseCtrl, R_MAX), + VMSTATE_END_OF_LIST(), + } +}; + +static Property efuse_ctrl_props[] = { + DEFINE_PROP_LINK("efuse", + XlnxVersalEFuseCtrl, efuse, + TYPE_XLNX_EFUSE, XlnxEFuse *), + DEFINE_PROP_ARRAY("pg0-lock", + XlnxVersalEFuseCtrl, extra_pg0_lock_n16, + extra_pg0_lock_spec, qdev_prop_uint16, uint16_t), + + DEFINE_PROP_END_OF_LIST(), +}; + +static void efuse_ctrl_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = efuse_ctrl_reset; + dc->realize = efuse_ctrl_realize; + dc->vmsd = &vmstate_efuse_ctrl; + device_class_set_props(dc, efuse_ctrl_props); +} + +static const TypeInfo efuse_ctrl_info = { + .name = TYPE_XLNX_VERSAL_EFUSE_CTRL, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxVersalEFuseCtrl), + .class_init = efuse_ctrl_class_init, + .instance_init = efuse_ctrl_init, +}; + +static void efuse_ctrl_register_types(void) +{ + type_register_static(&efuse_ctrl_info); +} + +type_init(efuse_ctrl_register_types) + +/* + * Retrieve a row, with unreadable bits returned as 0. + */ +uint32_t xlnx_versal_efuse_read_row(XlnxEFuse *efuse, + uint32_t bit, bool *denied) +{ + bool dummy; + + if (!denied) { + denied = &dummy; + } + + if (bit >= EFUSE_RD_BLOCKED_START && bit <= EFUSE_RD_BLOCKED_END) { + *denied = true; + return 0; + } + + *denied = false; + return xlnx_efuse_get_row(efuse, bit); +} diff --git a/hw/nvram/xlnx-zynqmp-efuse.c b/hw/nvram/xlnx-zynqmp-efuse.c new file mode 100644 index 00000000000..228ba0bbfaf --- /dev/null +++ b/hw/nvram/xlnx-zynqmp-efuse.c @@ -0,0 +1,861 @@ +/* + * QEMU model of the ZynqMP eFuse + * + * Copyright (c) 2015 Xilinx Inc. + * + * Written by Edgar E. Iglesias + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/nvram/xlnx-zynqmp-efuse.h" + +#include "qemu/log.h" +#include "qapi/error.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" + +#ifndef ZYNQMP_EFUSE_ERR_DEBUG +#define ZYNQMP_EFUSE_ERR_DEBUG 0 +#endif + +REG32(WR_LOCK, 0x0) + FIELD(WR_LOCK, LOCK, 0, 16) +REG32(CFG, 0x4) + FIELD(CFG, SLVERR_ENABLE, 5, 1) + FIELD(CFG, MARGIN_RD, 2, 2) + FIELD(CFG, PGM_EN, 1, 1) + FIELD(CFG, EFUSE_CLK_SEL, 0, 1) +REG32(STATUS, 0x8) + FIELD(STATUS, AES_CRC_PASS, 7, 1) + FIELD(STATUS, AES_CRC_DONE, 6, 1) + FIELD(STATUS, CACHE_DONE, 5, 1) + FIELD(STATUS, CACHE_LOAD, 4, 1) + FIELD(STATUS, EFUSE_3_TBIT, 2, 1) + FIELD(STATUS, EFUSE_2_TBIT, 1, 1) + FIELD(STATUS, EFUSE_0_TBIT, 0, 1) +REG32(EFUSE_PGM_ADDR, 0xc) + FIELD(EFUSE_PGM_ADDR, EFUSE, 11, 2) + FIELD(EFUSE_PGM_ADDR, ROW, 5, 6) + FIELD(EFUSE_PGM_ADDR, COLUMN, 0, 5) +REG32(EFUSE_RD_ADDR, 0x10) + FIELD(EFUSE_RD_ADDR, EFUSE, 11, 2) + FIELD(EFUSE_RD_ADDR, ROW, 5, 6) +REG32(EFUSE_RD_DATA, 0x14) +REG32(TPGM, 0x18) + FIELD(TPGM, VALUE, 0, 16) +REG32(TRD, 0x1c) + FIELD(TRD, VALUE, 0, 8) +REG32(TSU_H_PS, 0x20) + FIELD(TSU_H_PS, VALUE, 0, 8) +REG32(TSU_H_PS_CS, 0x24) + FIELD(TSU_H_PS_CS, VALUE, 0, 8) +REG32(TSU_H_CS, 0x2c) + FIELD(TSU_H_CS, VALUE, 0, 4) +REG32(EFUSE_ISR, 0x30) + FIELD(EFUSE_ISR, APB_SLVERR, 31, 1) + FIELD(EFUSE_ISR, CACHE_ERROR, 4, 1) + FIELD(EFUSE_ISR, RD_ERROR, 3, 1) + FIELD(EFUSE_ISR, RD_DONE, 2, 1) + FIELD(EFUSE_ISR, PGM_ERROR, 1, 1) + FIELD(EFUSE_ISR, PGM_DONE, 0, 1) +REG32(EFUSE_IMR, 0x34) + FIELD(EFUSE_IMR, APB_SLVERR, 31, 1) + FIELD(EFUSE_IMR, CACHE_ERROR, 4, 1) + FIELD(EFUSE_IMR, RD_ERROR, 3, 1) + FIELD(EFUSE_IMR, RD_DONE, 2, 1) + FIELD(EFUSE_IMR, PGM_ERROR, 1, 1) + FIELD(EFUSE_IMR, PGM_DONE, 0, 1) +REG32(EFUSE_IER, 0x38) + FIELD(EFUSE_IER, APB_SLVERR, 31, 1) + FIELD(EFUSE_IER, CACHE_ERROR, 4, 1) + FIELD(EFUSE_IER, RD_ERROR, 3, 1) + FIELD(EFUSE_IER, RD_DONE, 2, 1) + FIELD(EFUSE_IER, PGM_ERROR, 1, 1) + FIELD(EFUSE_IER, PGM_DONE, 0, 1) +REG32(EFUSE_IDR, 0x3c) + FIELD(EFUSE_IDR, APB_SLVERR, 31, 1) + FIELD(EFUSE_IDR, CACHE_ERROR, 4, 1) + FIELD(EFUSE_IDR, RD_ERROR, 3, 1) + FIELD(EFUSE_IDR, RD_DONE, 2, 1) + FIELD(EFUSE_IDR, PGM_ERROR, 1, 1) + FIELD(EFUSE_IDR, PGM_DONE, 0, 1) +REG32(EFUSE_CACHE_LOAD, 0x40) + FIELD(EFUSE_CACHE_LOAD, LOAD, 0, 1) +REG32(EFUSE_PGM_LOCK, 0x44) + FIELD(EFUSE_PGM_LOCK, SPK_ID_LOCK, 0, 1) +REG32(EFUSE_AES_CRC, 0x48) +REG32(EFUSE_TBITS_PRGRMG_EN, 0x100) + FIELD(EFUSE_TBITS_PRGRMG_EN, TBITS_PRGRMG_EN, 3, 1) +REG32(DNA_0, 0x100c) +REG32(DNA_1, 0x1010) +REG32(DNA_2, 0x1014) +REG32(IPDISABLE, 0x1018) + FIELD(IPDISABLE, VCU_DIS, 8, 1) + FIELD(IPDISABLE, GPU_DIS, 5, 1) + FIELD(IPDISABLE, APU3_DIS, 3, 1) + FIELD(IPDISABLE, APU2_DIS, 2, 1) + FIELD(IPDISABLE, APU1_DIS, 1, 1) + FIELD(IPDISABLE, APU0_DIS, 0, 1) +REG32(SYSOSC_CTRL, 0x101c) + FIELD(SYSOSC_CTRL, SYSOSC_EN, 0, 1) +REG32(USER_0, 0x1020) +REG32(USER_1, 0x1024) +REG32(USER_2, 0x1028) +REG32(USER_3, 0x102c) +REG32(USER_4, 0x1030) +REG32(USER_5, 0x1034) +REG32(USER_6, 0x1038) +REG32(USER_7, 0x103c) +REG32(MISC_USER_CTRL, 0x1040) + FIELD(MISC_USER_CTRL, FPD_SC_EN_0, 14, 1) + FIELD(MISC_USER_CTRL, LPD_SC_EN_0, 11, 1) + FIELD(MISC_USER_CTRL, LBIST_EN, 10, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_7, 7, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_6, 6, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_5, 5, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_4, 4, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_3, 3, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_2, 2, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_1, 1, 1) + FIELD(MISC_USER_CTRL, USR_WRLK_0, 0, 1) +REG32(ROM_RSVD, 0x1044) + FIELD(ROM_RSVD, PBR_BOOT_ERROR, 0, 3) +REG32(PUF_CHASH, 0x1050) +REG32(PUF_MISC, 0x1054) + FIELD(PUF_MISC, REGISTER_DIS, 31, 1) + FIELD(PUF_MISC, SYN_WRLK, 30, 1) + FIELD(PUF_MISC, SYN_INVLD, 29, 1) + FIELD(PUF_MISC, TEST2_DIS, 28, 1) + FIELD(PUF_MISC, UNUSED27, 27, 1) + FIELD(PUF_MISC, UNUSED26, 26, 1) + FIELD(PUF_MISC, UNUSED25, 25, 1) + FIELD(PUF_MISC, UNUSED24, 24, 1) + FIELD(PUF_MISC, AUX, 0, 24) +REG32(SEC_CTRL, 0x1058) + FIELD(SEC_CTRL, PPK1_INVLD, 30, 2) + FIELD(SEC_CTRL, PPK1_WRLK, 29, 1) + FIELD(SEC_CTRL, PPK0_INVLD, 27, 2) + FIELD(SEC_CTRL, PPK0_WRLK, 26, 1) + FIELD(SEC_CTRL, RSA_EN, 11, 15) + FIELD(SEC_CTRL, SEC_LOCK, 10, 1) + FIELD(SEC_CTRL, PROG_GATE_2, 9, 1) + FIELD(SEC_CTRL, PROG_GATE_1, 8, 1) + FIELD(SEC_CTRL, PROG_GATE_0, 7, 1) + FIELD(SEC_CTRL, DFT_DIS, 6, 1) + FIELD(SEC_CTRL, JTAG_DIS, 5, 1) + FIELD(SEC_CTRL, ERROR_DIS, 4, 1) + FIELD(SEC_CTRL, BBRAM_DIS, 3, 1) + FIELD(SEC_CTRL, ENC_ONLY, 2, 1) + FIELD(SEC_CTRL, AES_WRLK, 1, 1) + FIELD(SEC_CTRL, AES_RDLK, 0, 1) +REG32(SPK_ID, 0x105c) +REG32(PPK0_0, 0x10a0) +REG32(PPK0_1, 0x10a4) +REG32(PPK0_2, 0x10a8) +REG32(PPK0_3, 0x10ac) +REG32(PPK0_4, 0x10b0) +REG32(PPK0_5, 0x10b4) +REG32(PPK0_6, 0x10b8) +REG32(PPK0_7, 0x10bc) +REG32(PPK0_8, 0x10c0) +REG32(PPK0_9, 0x10c4) +REG32(PPK0_10, 0x10c8) +REG32(PPK0_11, 0x10cc) +REG32(PPK1_0, 0x10d0) +REG32(PPK1_1, 0x10d4) +REG32(PPK1_2, 0x10d8) +REG32(PPK1_3, 0x10dc) +REG32(PPK1_4, 0x10e0) +REG32(PPK1_5, 0x10e4) +REG32(PPK1_6, 0x10e8) +REG32(PPK1_7, 0x10ec) +REG32(PPK1_8, 0x10f0) +REG32(PPK1_9, 0x10f4) +REG32(PPK1_10, 0x10f8) +REG32(PPK1_11, 0x10fc) + +#define BIT_POS(ROW, COLUMN) (ROW * 32 + COLUMN) +#define R_MAX (R_PPK1_11 + 1) + +/* #define EFUSE_XOSC 26 */ + +/* + * eFUSE layout references: + * ZynqMP: UG1085 (v2.1) August 21, 2019, p.277, Table 12-13 + */ +#define EFUSE_AES_RDLK BIT_POS(22, 0) +#define EFUSE_AES_WRLK BIT_POS(22, 1) +#define EFUSE_ENC_ONLY BIT_POS(22, 2) +#define EFUSE_BBRAM_DIS BIT_POS(22, 3) +#define EFUSE_ERROR_DIS BIT_POS(22, 4) +#define EFUSE_JTAG_DIS BIT_POS(22, 5) +#define EFUSE_DFT_DIS BIT_POS(22, 6) +#define EFUSE_PROG_GATE_0 BIT_POS(22, 7) +#define EFUSE_PROG_GATE_1 BIT_POS(22, 7) +#define EFUSE_PROG_GATE_2 BIT_POS(22, 9) +#define EFUSE_SEC_LOCK BIT_POS(22, 10) +#define EFUSE_RSA_EN BIT_POS(22, 11) +#define EFUSE_RSA_EN14 BIT_POS(22, 25) +#define EFUSE_PPK0_WRLK BIT_POS(22, 26) +#define EFUSE_PPK0_INVLD BIT_POS(22, 27) +#define EFUSE_PPK0_INVLD_1 BIT_POS(22, 28) +#define EFUSE_PPK1_WRLK BIT_POS(22, 29) +#define EFUSE_PPK1_INVLD BIT_POS(22, 30) +#define EFUSE_PPK1_INVLD_1 BIT_POS(22, 31) + +/* Areas. */ +#define EFUSE_TRIM_START BIT_POS(1, 0) +#define EFUSE_TRIM_END BIT_POS(1, 30) +#define EFUSE_DNA_START BIT_POS(3, 0) +#define EFUSE_DNA_END BIT_POS(5, 31) +#define EFUSE_AES_START BIT_POS(24, 0) +#define EFUSE_AES_END BIT_POS(31, 31) +#define EFUSE_ROM_START BIT_POS(17, 0) +#define EFUSE_ROM_END BIT_POS(17, 31) +#define EFUSE_IPDIS_START BIT_POS(6, 0) +#define EFUSE_IPDIS_END BIT_POS(6, 31) +#define EFUSE_USER_START BIT_POS(8, 0) +#define EFUSE_USER_END BIT_POS(15, 31) +#define EFUSE_BISR_START BIT_POS(32, 0) +#define EFUSE_BISR_END BIT_POS(39, 31) + +#define EFUSE_USER_CTRL_START BIT_POS(16, 0) +#define EFUSE_USER_CTRL_END BIT_POS(16, 16) +#define EFUSE_USER_CTRL_MASK ((uint32_t)MAKE_64BIT_MASK(0, 17)) + +#define EFUSE_PUF_CHASH_START BIT_POS(20, 0) +#define EFUSE_PUF_CHASH_END BIT_POS(20, 31) +#define EFUSE_PUF_MISC_START BIT_POS(21, 0) +#define EFUSE_PUF_MISC_END BIT_POS(21, 31) +#define EFUSE_PUF_SYN_WRLK BIT_POS(21, 30) + +#define EFUSE_SPK_START BIT_POS(23, 0) +#define EFUSE_SPK_END BIT_POS(23, 31) + +#define EFUSE_PPK0_START BIT_POS(40, 0) +#define EFUSE_PPK0_END BIT_POS(51, 31) +#define EFUSE_PPK1_START BIT_POS(52, 0) +#define EFUSE_PPK1_END BIT_POS(63, 31) + +#define EFUSE_CACHE_FLD(s, reg, field) \ + ARRAY_FIELD_DP32((s)->regs, reg, field, \ + (xlnx_efuse_get_row((s->efuse), EFUSE_ ## field) \ + >> (EFUSE_ ## field % 32))) + +#define EFUSE_CACHE_BIT(s, reg, field) \ + ARRAY_FIELD_DP32((s)->regs, reg, field, xlnx_efuse_get_bit((s->efuse), \ + EFUSE_ ## field)) + +#define FBIT_UNKNOWN (~0) + +QEMU_BUILD_BUG_ON(R_MAX != ARRAY_SIZE(((XlnxZynqMPEFuse *)0)->regs)); + +static void update_tbit_status(XlnxZynqMPEFuse *s) +{ + unsigned int check = xlnx_efuse_tbits_check(s->efuse); + uint32_t val = s->regs[R_STATUS]; + + val = FIELD_DP32(val, STATUS, EFUSE_0_TBIT, !!(check & (1 << 0))); + val = FIELD_DP32(val, STATUS, EFUSE_2_TBIT, !!(check & (1 << 1))); + val = FIELD_DP32(val, STATUS, EFUSE_3_TBIT, !!(check & (1 << 2))); + + s->regs[R_STATUS] = val; +} + +/* Update the u32 array from efuse bits. Slow but simple approach. */ +static void cache_sync_u32(XlnxZynqMPEFuse *s, unsigned int r_start, + unsigned int f_start, unsigned int f_end, + unsigned int f_written) +{ + uint32_t *u32 = &s->regs[r_start]; + unsigned int fbit, wbits = 0, u32_off = 0; + + /* Avoid working on bits that are not relevant. */ + if (f_written != FBIT_UNKNOWN + && (f_written < f_start || f_written > f_end)) { + return; + } + + for (fbit = f_start; fbit <= f_end; fbit++, wbits++) { + if (wbits == 32) { + /* Update the key offset. */ + u32_off += 1; + wbits = 0; + } + u32[u32_off] |= xlnx_efuse_get_bit(s->efuse, fbit) << wbits; + } +} + +/* + * Keep the syncs in bit order so we can bail out for the + * slower ones. + */ +static void zynqmp_efuse_sync_cache(XlnxZynqMPEFuse *s, unsigned int bit) +{ + EFUSE_CACHE_BIT(s, SEC_CTRL, AES_RDLK); + EFUSE_CACHE_BIT(s, SEC_CTRL, AES_WRLK); + EFUSE_CACHE_BIT(s, SEC_CTRL, ENC_ONLY); + EFUSE_CACHE_BIT(s, SEC_CTRL, BBRAM_DIS); + EFUSE_CACHE_BIT(s, SEC_CTRL, ERROR_DIS); + EFUSE_CACHE_BIT(s, SEC_CTRL, JTAG_DIS); + EFUSE_CACHE_BIT(s, SEC_CTRL, DFT_DIS); + EFUSE_CACHE_BIT(s, SEC_CTRL, PROG_GATE_0); + EFUSE_CACHE_BIT(s, SEC_CTRL, PROG_GATE_1); + EFUSE_CACHE_BIT(s, SEC_CTRL, PROG_GATE_2); + EFUSE_CACHE_BIT(s, SEC_CTRL, SEC_LOCK); + EFUSE_CACHE_BIT(s, SEC_CTRL, PPK0_WRLK); + EFUSE_CACHE_BIT(s, SEC_CTRL, PPK1_WRLK); + + EFUSE_CACHE_FLD(s, SEC_CTRL, RSA_EN); + EFUSE_CACHE_FLD(s, SEC_CTRL, PPK0_INVLD); + EFUSE_CACHE_FLD(s, SEC_CTRL, PPK1_INVLD); + + /* Update the tbits. */ + update_tbit_status(s); + + /* Sync the various areas. */ + s->regs[R_MISC_USER_CTRL] = xlnx_efuse_get_row(s->efuse, + EFUSE_USER_CTRL_START) + & EFUSE_USER_CTRL_MASK; + s->regs[R_PUF_CHASH] = xlnx_efuse_get_row(s->efuse, EFUSE_PUF_CHASH_START); + s->regs[R_PUF_MISC] = xlnx_efuse_get_row(s->efuse, EFUSE_PUF_MISC_START); + + cache_sync_u32(s, R_DNA_0, EFUSE_DNA_START, EFUSE_DNA_END, bit); + + if (bit < EFUSE_AES_START) { + return; + } + + cache_sync_u32(s, R_ROM_RSVD, EFUSE_ROM_START, EFUSE_ROM_END, bit); + cache_sync_u32(s, R_IPDISABLE, EFUSE_IPDIS_START, EFUSE_IPDIS_END, bit); + cache_sync_u32(s, R_USER_0, EFUSE_USER_START, EFUSE_USER_END, bit); + cache_sync_u32(s, R_SPK_ID, EFUSE_SPK_START, EFUSE_SPK_END, bit); + cache_sync_u32(s, R_PPK0_0, EFUSE_PPK0_START, EFUSE_PPK0_END, bit); + cache_sync_u32(s, R_PPK1_0, EFUSE_PPK1_START, EFUSE_PPK1_END, bit); +} + +static void zynqmp_efuse_update_irq(XlnxZynqMPEFuse *s) +{ + bool pending = s->regs[R_EFUSE_ISR] & s->regs[R_EFUSE_IMR]; + qemu_set_irq(s->irq, pending); +} + +static void zynqmp_efuse_isr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque); + zynqmp_efuse_update_irq(s); +} + +static uint64_t zynqmp_efuse_ier_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque); + uint32_t val = val64; + + s->regs[R_EFUSE_IMR] |= val; + zynqmp_efuse_update_irq(s); + return 0; +} + +static uint64_t zynqmp_efuse_idr_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque); + uint32_t val = val64; + + s->regs[R_EFUSE_IMR] &= ~val; + zynqmp_efuse_update_irq(s); + return 0; +} + +static void zynqmp_efuse_pgm_addr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque); + unsigned bit = val64; + unsigned page = FIELD_EX32(bit, EFUSE_PGM_ADDR, EFUSE); + bool puf_prot = false; + const char *errmsg = NULL; + + /* Allow only valid array, and adjust for skipped array 1 */ + switch (page) { + case 0: + break; + case 2 ... 3: + bit = FIELD_DP32(bit, EFUSE_PGM_ADDR, EFUSE, page - 1); + puf_prot = xlnx_efuse_get_bit(s->efuse, EFUSE_PUF_SYN_WRLK); + break; + default: + errmsg = "Invalid address"; + goto pgm_done; + } + + if (ARRAY_FIELD_EX32(s->regs, WR_LOCK, LOCK)) { + errmsg = "Array write-locked"; + goto pgm_done; + } + + if (!ARRAY_FIELD_EX32(s->regs, CFG, PGM_EN)) { + errmsg = "Array pgm-disabled"; + goto pgm_done; + } + + if (puf_prot) { + errmsg = "PUF_HD-store write-locked"; + goto pgm_done; + } + + if (ARRAY_FIELD_EX32(s->regs, SEC_CTRL, AES_WRLK) + && bit >= EFUSE_AES_START && bit <= EFUSE_AES_END) { + errmsg = "AES key-store Write-locked"; + goto pgm_done; + } + + if (!xlnx_efuse_set_bit(s->efuse, bit)) { + errmsg = "Write failed"; + } + + pgm_done: + if (!errmsg) { + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_ERROR, 0); + } else { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_ERROR, 1); + qemu_log_mask(LOG_GUEST_ERROR, + "%s - eFuse write error: %s; addr=0x%x\n", + path, errmsg, (unsigned)val64); + } + + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_DONE, 1); + zynqmp_efuse_update_irq(s); +} + +static void zynqmp_efuse_rd_addr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque); + g_autofree char *path = NULL; + + /* + * Grant reads only to allowed bits; reference sources: + * 1/ XilSKey - XilSKey_ZynqMp_EfusePs_ReadRow() + * 2/ UG1085, v2.0, table 12-13 + * (note: enumerates the masks as per described in + * references to avoid mental translation). + */ +#define COL_MASK(L_, H_) \ + ((uint32_t)MAKE_64BIT_MASK((L_), (1 + (H_) - (L_)))) + + static const uint32_t ary0_col_mask[] = { + /* XilSKey - XSK_ZYNQMP_EFUSEPS_TBITS_ROW */ + [0] = COL_MASK(28, 31), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_USR{0:7}_FUSE_ROW */ + [8] = COL_MASK(0, 31), [9] = COL_MASK(0, 31), + [10] = COL_MASK(0, 31), [11] = COL_MASK(0, 31), + [12] = COL_MASK(0, 31), [13] = COL_MASK(0, 31), + [14] = COL_MASK(0, 31), [15] = COL_MASK(0, 31), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_MISC_USR_CTRL_ROW */ + [16] = COL_MASK(0, 7) | COL_MASK(10, 16), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_PBR_BOOT_ERR_ROW */ + [17] = COL_MASK(0, 2), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_PUF_CHASH_ROW */ + [20] = COL_MASK(0, 31), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_PUF_AUX_ROW */ + [21] = COL_MASK(0, 23) | COL_MASK(29, 31), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_SEC_CTRL_ROW */ + [22] = COL_MASK(0, 31), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_SPK_ID_ROW */ + [23] = COL_MASK(0, 31), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_PPK0_START_ROW */ + [40] = COL_MASK(0, 31), [41] = COL_MASK(0, 31), + [42] = COL_MASK(0, 31), [43] = COL_MASK(0, 31), + [44] = COL_MASK(0, 31), [45] = COL_MASK(0, 31), + [46] = COL_MASK(0, 31), [47] = COL_MASK(0, 31), + [48] = COL_MASK(0, 31), [49] = COL_MASK(0, 31), + [50] = COL_MASK(0, 31), [51] = COL_MASK(0, 31), + + /* XilSKey - XSK_ZYNQMP_EFUSEPS_PPK1_START_ROW */ + [52] = COL_MASK(0, 31), [53] = COL_MASK(0, 31), + [54] = COL_MASK(0, 31), [55] = COL_MASK(0, 31), + [56] = COL_MASK(0, 31), [57] = COL_MASK(0, 31), + [58] = COL_MASK(0, 31), [59] = COL_MASK(0, 31), + [60] = COL_MASK(0, 31), [61] = COL_MASK(0, 31), + [62] = COL_MASK(0, 31), [63] = COL_MASK(0, 31), + }; + + uint32_t col_mask = COL_MASK(0, 31); +#undef COL_MASK + + uint32_t efuse_idx = s->regs[R_EFUSE_RD_ADDR]; + uint32_t efuse_ary = FIELD_EX32(efuse_idx, EFUSE_RD_ADDR, EFUSE); + uint32_t efuse_row = FIELD_EX32(efuse_idx, EFUSE_RD_ADDR, ROW); + + switch (efuse_ary) { + case 0: /* Various */ + if (efuse_row >= ARRAY_SIZE(ary0_col_mask)) { + goto denied; + } + + col_mask = ary0_col_mask[efuse_row]; + if (!col_mask) { + goto denied; + } + break; + case 2: /* PUF helper data, adjust for skipped array 1 */ + case 3: + val64 = FIELD_DP32(efuse_idx, EFUSE_RD_ADDR, EFUSE, efuse_ary - 1); + break; + default: + goto denied; + } + + s->regs[R_EFUSE_RD_DATA] = xlnx_efuse_get_row(s->efuse, val64) & col_mask; + + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_ERROR, 0); + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_DONE, 1); + zynqmp_efuse_update_irq(s); + return; + + denied: + path = object_get_canonical_path(OBJECT(s)); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Denied efuse read from array %u, row %u\n", + path, efuse_ary, efuse_row); + + s->regs[R_EFUSE_RD_DATA] = 0; + + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_ERROR, 1); + ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_DONE, 0); + zynqmp_efuse_update_irq(s); +} + +static void zynqmp_efuse_aes_crc_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque); + bool ok; + + ok = xlnx_efuse_k256_check(s->efuse, (uint32_t)val64, EFUSE_AES_START); + + ARRAY_FIELD_DP32(s->regs, STATUS, AES_CRC_PASS, (ok ? 1 : 0)); + ARRAY_FIELD_DP32(s->regs, STATUS, AES_CRC_DONE, 1); + + s->regs[R_EFUSE_AES_CRC] = 0; /* crc value is write-only */ +} + +static uint64_t zynqmp_efuse_cache_load_prew(RegisterInfo *reg, + uint64_t valu64) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque); + + if (valu64 & R_EFUSE_CACHE_LOAD_LOAD_MASK) { + zynqmp_efuse_sync_cache(s, FBIT_UNKNOWN); + ARRAY_FIELD_DP32(s->regs, STATUS, CACHE_DONE, 1); + zynqmp_efuse_update_irq(s); + } + + return 0; +} + +static uint64_t zynqmp_efuse_wr_lock_prew(RegisterInfo *reg, uint64_t val) +{ + return val == 0xDF0D ? 0 : 1; +} + +static RegisterAccessInfo zynqmp_efuse_regs_info[] = { + { .name = "WR_LOCK", .addr = A_WR_LOCK, + .reset = 0x1, + .pre_write = zynqmp_efuse_wr_lock_prew, + },{ .name = "CFG", .addr = A_CFG, + },{ .name = "STATUS", .addr = A_STATUS, + .rsvd = 0x8, + .ro = 0xff, + },{ .name = "EFUSE_PGM_ADDR", .addr = A_EFUSE_PGM_ADDR, + .post_write = zynqmp_efuse_pgm_addr_postw + },{ .name = "EFUSE_RD_ADDR", .addr = A_EFUSE_RD_ADDR, + .rsvd = 0x1f, + .post_write = zynqmp_efuse_rd_addr_postw, + },{ .name = "EFUSE_RD_DATA", .addr = A_EFUSE_RD_DATA, + .ro = 0xffffffff, + },{ .name = "TPGM", .addr = A_TPGM, + },{ .name = "TRD", .addr = A_TRD, + .reset = 0x1b, + },{ .name = "TSU_H_PS", .addr = A_TSU_H_PS, + .reset = 0xff, + },{ .name = "TSU_H_PS_CS", .addr = A_TSU_H_PS_CS, + .reset = 0xb, + },{ .name = "TSU_H_CS", .addr = A_TSU_H_CS, + .reset = 0x7, + },{ .name = "EFUSE_ISR", .addr = A_EFUSE_ISR, + .rsvd = 0x7fffffe0, + .w1c = 0x8000001f, + .post_write = zynqmp_efuse_isr_postw, + },{ .name = "EFUSE_IMR", .addr = A_EFUSE_IMR, + .reset = 0x8000001f, + .rsvd = 0x7fffffe0, + .ro = 0xffffffff, + },{ .name = "EFUSE_IER", .addr = A_EFUSE_IER, + .rsvd = 0x7fffffe0, + .pre_write = zynqmp_efuse_ier_prew, + },{ .name = "EFUSE_IDR", .addr = A_EFUSE_IDR, + .rsvd = 0x7fffffe0, + .pre_write = zynqmp_efuse_idr_prew, + },{ .name = "EFUSE_CACHE_LOAD", .addr = A_EFUSE_CACHE_LOAD, + .pre_write = zynqmp_efuse_cache_load_prew, + },{ .name = "EFUSE_PGM_LOCK", .addr = A_EFUSE_PGM_LOCK, + },{ .name = "EFUSE_AES_CRC", .addr = A_EFUSE_AES_CRC, + .post_write = zynqmp_efuse_aes_crc_postw, + },{ .name = "EFUSE_TBITS_PRGRMG_EN", .addr = A_EFUSE_TBITS_PRGRMG_EN, + .reset = R_EFUSE_TBITS_PRGRMG_EN_TBITS_PRGRMG_EN_MASK, + },{ .name = "DNA_0", .addr = A_DNA_0, + .ro = 0xffffffff, + },{ .name = "DNA_1", .addr = A_DNA_1, + .ro = 0xffffffff, + },{ .name = "DNA_2", .addr = A_DNA_2, + .ro = 0xffffffff, + },{ .name = "IPDISABLE", .addr = A_IPDISABLE, + .ro = 0xffffffff, + },{ .name = "SYSOSC_CTRL", .addr = A_SYSOSC_CTRL, + .ro = 0xffffffff, + },{ .name = "USER_0", .addr = A_USER_0, + .ro = 0xffffffff, + },{ .name = "USER_1", .addr = A_USER_1, + .ro = 0xffffffff, + },{ .name = "USER_2", .addr = A_USER_2, + .ro = 0xffffffff, + },{ .name = "USER_3", .addr = A_USER_3, + .ro = 0xffffffff, + },{ .name = "USER_4", .addr = A_USER_4, + .ro = 0xffffffff, + },{ .name = "USER_5", .addr = A_USER_5, + .ro = 0xffffffff, + },{ .name = "USER_6", .addr = A_USER_6, + .ro = 0xffffffff, + },{ .name = "USER_7", .addr = A_USER_7, + .ro = 0xffffffff, + },{ .name = "MISC_USER_CTRL", .addr = A_MISC_USER_CTRL, + .ro = 0xffffffff, + },{ .name = "ROM_RSVD", .addr = A_ROM_RSVD, + .ro = 0xffffffff, + },{ .name = "PUF_CHASH", .addr = A_PUF_CHASH, + .ro = 0xffffffff, + },{ .name = "PUF_MISC", .addr = A_PUF_MISC, + .ro = 0xffffffff, + },{ .name = "SEC_CTRL", .addr = A_SEC_CTRL, + .ro = 0xffffffff, + },{ .name = "SPK_ID", .addr = A_SPK_ID, + .ro = 0xffffffff, + },{ .name = "PPK0_0", .addr = A_PPK0_0, + .ro = 0xffffffff, + },{ .name = "PPK0_1", .addr = A_PPK0_1, + .ro = 0xffffffff, + },{ .name = "PPK0_2", .addr = A_PPK0_2, + .ro = 0xffffffff, + },{ .name = "PPK0_3", .addr = A_PPK0_3, + .ro = 0xffffffff, + },{ .name = "PPK0_4", .addr = A_PPK0_4, + .ro = 0xffffffff, + },{ .name = "PPK0_5", .addr = A_PPK0_5, + .ro = 0xffffffff, + },{ .name = "PPK0_6", .addr = A_PPK0_6, + .ro = 0xffffffff, + },{ .name = "PPK0_7", .addr = A_PPK0_7, + .ro = 0xffffffff, + },{ .name = "PPK0_8", .addr = A_PPK0_8, + .ro = 0xffffffff, + },{ .name = "PPK0_9", .addr = A_PPK0_9, + .ro = 0xffffffff, + },{ .name = "PPK0_10", .addr = A_PPK0_10, + .ro = 0xffffffff, + },{ .name = "PPK0_11", .addr = A_PPK0_11, + .ro = 0xffffffff, + },{ .name = "PPK1_0", .addr = A_PPK1_0, + .ro = 0xffffffff, + },{ .name = "PPK1_1", .addr = A_PPK1_1, + .ro = 0xffffffff, + },{ .name = "PPK1_2", .addr = A_PPK1_2, + .ro = 0xffffffff, + },{ .name = "PPK1_3", .addr = A_PPK1_3, + .ro = 0xffffffff, + },{ .name = "PPK1_4", .addr = A_PPK1_4, + .ro = 0xffffffff, + },{ .name = "PPK1_5", .addr = A_PPK1_5, + .ro = 0xffffffff, + },{ .name = "PPK1_6", .addr = A_PPK1_6, + .ro = 0xffffffff, + },{ .name = "PPK1_7", .addr = A_PPK1_7, + .ro = 0xffffffff, + },{ .name = "PPK1_8", .addr = A_PPK1_8, + .ro = 0xffffffff, + },{ .name = "PPK1_9", .addr = A_PPK1_9, + .ro = 0xffffffff, + },{ .name = "PPK1_10", .addr = A_PPK1_10, + .ro = 0xffffffff, + },{ .name = "PPK1_11", .addr = A_PPK1_11, + .ro = 0xffffffff, + } +}; + +static void zynqmp_efuse_reg_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + RegisterInfoArray *reg_array = opaque; + XlnxZynqMPEFuse *s; + Object *dev; + + assert(reg_array != NULL); + + dev = reg_array->mem.owner; + assert(dev); + + s = XLNX_ZYNQMP_EFUSE(dev); + + if (addr != A_WR_LOCK && s->regs[R_WR_LOCK]) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + qemu_log_mask(LOG_GUEST_ERROR, + "%s[reg_0x%02lx]: Attempt to write locked register.\n", + path, (long)addr); + } else { + register_write_memory(opaque, addr, data, size); + } +} + +static const MemoryRegionOps zynqmp_efuse_ops = { + .read = register_read_memory, + .write = zynqmp_efuse_reg_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void zynqmp_efuse_register_reset(RegisterInfo *reg) +{ + if (!reg->data || !reg->access) { + return; + } + + /* Reset must not trigger some registers' writers */ + switch (reg->access->addr) { + case A_EFUSE_AES_CRC: + *(uint32_t *)reg->data = reg->access->reset; + return; + } + + register_reset(reg); +} + +static void zynqmp_efuse_reset(DeviceState *dev) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(dev); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { + zynqmp_efuse_register_reset(&s->regs_info[i]); + } + + zynqmp_efuse_sync_cache(s, FBIT_UNKNOWN); + ARRAY_FIELD_DP32(s->regs, STATUS, CACHE_DONE, 1); + zynqmp_efuse_update_irq(s); +} + +static void zynqmp_efuse_realize(DeviceState *dev, Error **errp) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(dev); + + if (!s->efuse) { + g_autofree char *path = object_get_canonical_path(OBJECT(s)); + + error_setg(errp, "%s.efuse: link property not connected to XLNX-EFUSE", + path); + return; + } + + s->efuse->dev = dev; +} + +static void zynqmp_efuse_init(Object *obj) +{ + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + RegisterInfoArray *reg_array; + + reg_array = + register_init_block32(DEVICE(obj), zynqmp_efuse_regs_info, + ARRAY_SIZE(zynqmp_efuse_regs_info), + s->regs_info, s->regs, + &zynqmp_efuse_ops, + ZYNQMP_EFUSE_ERR_DEBUG, + R_MAX * 4); + + sysbus_init_mmio(sbd, ®_array->mem); + sysbus_init_irq(sbd, &s->irq); +} + +static const VMStateDescription vmstate_efuse = { + .name = TYPE_XLNX_ZYNQMP_EFUSE, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(regs, XlnxZynqMPEFuse, R_MAX), + VMSTATE_END_OF_LIST(), + } +}; + +static Property zynqmp_efuse_props[] = { + DEFINE_PROP_LINK("efuse", + XlnxZynqMPEFuse, efuse, + TYPE_XLNX_EFUSE, XlnxEFuse *), + + DEFINE_PROP_END_OF_LIST(), +}; + +static void zynqmp_efuse_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = zynqmp_efuse_reset; + dc->realize = zynqmp_efuse_realize; + dc->vmsd = &vmstate_efuse; + device_class_set_props(dc, zynqmp_efuse_props); +} + + +static const TypeInfo efuse_info = { + .name = TYPE_XLNX_ZYNQMP_EFUSE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxZynqMPEFuse), + .class_init = zynqmp_efuse_class_init, + .instance_init = zynqmp_efuse_init, +}; + +static void efuse_register_types(void) +{ + type_register_static(&efuse_info); +} + +type_init(efuse_register_types) diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c index 39f1d344ae9..73fe383c2de 100644 --- a/hw/openrisc/openrisc_sim.c +++ b/hw/openrisc/openrisc_sim.c @@ -29,7 +29,6 @@ #include "net/net.h" #include "hw/loader.h" #include "hw/qdev-properties.h" -#include "exec/address-spaces.h" #include "sysemu/sysemu.h" #include "hw/sysbus.h" #include "sysemu/qtest.h" diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index ec9907917eb..20099a8ae31 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -28,6 +28,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(GenPCIERootPort, GEN_PCIE_ROOT_PORT) (GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF) #define GEN_PCIE_ROOT_PORT_MSIX_NR_VECTOR 1 +#define GEN_PCIE_ROOT_DEFAULT_IO_RANGE 4096 struct GenPCIERootPort { /*< private >*/ @@ -75,6 +76,7 @@ static bool gen_rp_test_migrate_msix(void *opaque, int version_id) static void gen_rp_realize(DeviceState *dev, Error **errp) { PCIDevice *d = PCI_DEVICE(dev); + PCIESlot *s = PCIE_SLOT(d); GenPCIERootPort *grp = GEN_PCIE_ROOT_PORT(d); PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d); Error *local_err = NULL; @@ -85,6 +87,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp) return; } + if (grp->res_reserve.io == -1 && s->hotplug && !s->native_hotplug) { + grp->res_reserve.io = GEN_PCIE_ROOT_DEFAULT_IO_RANGE; + } int rc = pci_bridge_qemu_reserve_cap_init(d, 0, grp->res_reserve, errp); diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index aedded10642..10e6e7c2ab0 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -57,6 +57,7 @@ struct PXBDev { uint8_t bus_nr; uint16_t numa_node; + bool bypass_iommu; }; static PXBDev *convert_to_pxb(PCIDevice *dev) @@ -244,7 +245,7 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) } else { bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS); bds = qdev_new("pci-bridge"); - bds->id = dev_name; + bds->id = g_strdup(dev_name); qdev_prop_set_uint8(bds, PCI_BRIDGE_DEV_PROP_CHASSIS_NR, pxb->bus_nr); qdev_prop_set_bit(bds, PCI_BRIDGE_DEV_PROP_SHPC, false); } @@ -255,6 +256,7 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) bus->map_irq = pxb_map_irq_fn; PCI_HOST_BRIDGE(ds)->bus = bus; + PCI_HOST_BRIDGE(ds)->bypass_iommu = pxb->bypass_iommu; pxb_register_bus(dev, bus, &local_err); if (local_err) { @@ -301,6 +303,7 @@ static Property pxb_dev_properties[] = { /* Note: 0 is not a legal PXB bus number. */ DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED), + DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig index 2ccc96f02ce..2b5f7d58cc5 100644 --- a/hw/pci-host/Kconfig +++ b/hw/pci-host/Kconfig @@ -6,7 +6,7 @@ config XEN_IGD_PASSTHROUGH default y depends on XEN && PCI_I440FX -config PREP_PCI +config RAVEN_PCI bool select PCI select OR_IRQ @@ -72,3 +72,8 @@ config REMOTE_PCIHOST config SH_PCI bool select PCI + +config MV64361 + bool + select PCI + select I8259 diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c index 2a2db7cea69..a57e81e3a97 100644 --- a/hw/pci-host/bonito.c +++ b/hw/pci-host/bonito.c @@ -49,10 +49,10 @@ #include "migration/vmstate.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" -#include "exec/address-spaces.h" #include "hw/misc/unimp.h" #include "hw/registerfields.h" #include "qom/object.h" +#include "trace.h" /* #define DEBUG_BONITO */ @@ -186,7 +186,8 @@ FIELD(BONGENCFG, PCIQUEUE, 12, 1) #define BONITO_PCICONF_IDSEL_OFFSET 11 #define BONITO_PCICONF_FUN_MASK 0x700 /* [10:8] */ #define BONITO_PCICONF_FUN_OFFSET 8 -#define BONITO_PCICONF_REG_MASK 0xFC +#define BONITO_PCICONF_REG_MASK_DS (~3) /* Per datasheet */ +#define BONITO_PCICONF_REG_MASK_HW 0xff /* As seen running PMON */ #define BONITO_PCICONF_REG_OFFSET 0 @@ -465,7 +466,7 @@ static uint32_t bonito_sbridge_pciaddr(void *opaque, hwaddr addr) BONITO_PCICONF_IDSEL_OFFSET; devno = ctz32(idsel); funno = (cfgaddr & BONITO_PCICONF_FUN_MASK) >> BONITO_PCICONF_FUN_OFFSET; - regno = (cfgaddr & BONITO_PCICONF_REG_MASK) >> BONITO_PCICONF_REG_OFFSET; + regno = (cfgaddr & BONITO_PCICONF_REG_MASK_HW) >> BONITO_PCICONF_REG_OFFSET; if (idsel == 0) { error_report("error in bonito pci config address 0x" TARGET_FMT_plx @@ -496,6 +497,9 @@ static void bonito_spciconf_write(void *opaque, hwaddr addr, uint64_t val, if (pciaddr == 0xffffffff) { return; } + if (addr & ~BONITO_PCICONF_REG_MASK_DS) { + trace_bonito_spciconf_small_access(addr, size); + } /* set the pci address in s->config_reg */ phb->config_reg = (pciaddr) | (1u << 31); @@ -522,6 +526,9 @@ static uint64_t bonito_spciconf_read(void *opaque, hwaddr addr, unsigned size) if (pciaddr == 0xffffffff) { return MAKE_64BIT_MASK(0, size * 8); } + if (addr & ~BONITO_PCICONF_REG_MASK_DS) { + trace_bonito_spciconf_small_access(addr, size); + } /* set the pci address in s->config_reg */ phb->config_reg = (pciaddr) | (1u << 31); diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index 0f01f13a6ed..e7e162a00ab 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -112,26 +112,10 @@ static void acpi_dsdt_add_pci_osc(Aml *dev) UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); ifctx = aml_if(aml_equal(aml_arg(0), UUID)); ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(0))); - uint8_t byte_list[] = { - 0x1 << 0 /* support for functions other than function 0 */ | - 0x1 << 5 /* support for function 5 */ - }; - buf = aml_buffer(ARRAY_SIZE(byte_list), byte_list); + uint8_t byte_list[1] = {1}; + buf = aml_buffer(1, byte_list); aml_append(ifctx1, aml_return(buf)); aml_append(ifctx, ifctx1); - - /* - * PCI Firmware Specification 3.1 - * 4.6.5. _DSM for Ignoring PCI Boot Configurations - */ - /* Arg2: Function Index: 5 */ - ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(5))); - /* - * 0 - The operating system must not ignore the PCI configuration that - * firmware has done at boot time. - */ - aml_append(ifctx1, aml_return(aml_int(0))); - aml_append(ifctx, ifctx1); aml_append(method, ifctx); byte_list[0] = 0; diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c index 2bdbe7b4561..a6752fac5e8 100644 --- a/hw/pci-host/gpex.c +++ b/hw/pci-host/gpex.c @@ -83,12 +83,51 @@ static void gpex_host_realize(DeviceState *dev, Error **errp) int i; pcie_host_mmcfg_init(pex, PCIE_MMCFG_SIZE_MAX); + sysbus_init_mmio(sbd, &pex->mmio); + + /* + * Note that the MemoryRegions io_mmio and io_ioport that we pass + * to pci_register_root_bus() are not the same as the + * MemoryRegions io_mmio_window and io_ioport_window that we + * expose as SysBus MRs. The difference is in the behaviour of + * accesses to addresses where no PCI device has been mapped. + * + * io_mmio and io_ioport are the underlying PCI view of the PCI + * address space, and when a PCI device does a bus master access + * to a bad address this is reported back to it as a transaction + * failure. + * + * io_mmio_window and io_ioport_window implement "unmapped + * addresses read as -1 and ignore writes"; this is traditional + * x86 PC behaviour, which is not mandated by the PCI spec proper + * but expected by much PCI-using guest software, including Linux. + * + * In the interests of not being unnecessarily surprising, we + * implement it in the gpex PCI host controller, by providing the + * _window MRs, which are containers with io ops that implement + * the 'background' behaviour and which hold the real PCI MRs as + * subregions. + */ memory_region_init(&s->io_mmio, OBJECT(s), "gpex_mmio", UINT64_MAX); memory_region_init(&s->io_ioport, OBJECT(s), "gpex_ioport", 64 * 1024); - sysbus_init_mmio(sbd, &pex->mmio); - sysbus_init_mmio(sbd, &s->io_mmio); - sysbus_init_mmio(sbd, &s->io_ioport); + if (s->allow_unmapped_accesses) { + memory_region_init_io(&s->io_mmio_window, OBJECT(s), + &unassigned_io_ops, OBJECT(s), + "gpex_mmio_window", UINT64_MAX); + memory_region_init_io(&s->io_ioport_window, OBJECT(s), + &unassigned_io_ops, OBJECT(s), + "gpex_ioport_window", 64 * 1024); + + memory_region_add_subregion(&s->io_mmio_window, 0, &s->io_mmio); + memory_region_add_subregion(&s->io_ioport_window, 0, &s->io_ioport); + sysbus_init_mmio(sbd, &s->io_mmio_window); + sysbus_init_mmio(sbd, &s->io_ioport_window); + } else { + sysbus_init_mmio(sbd, &s->io_mmio); + sysbus_init_mmio(sbd, &s->io_ioport); + } + for (i = 0; i < GPEX_NUM_IRQS; i++) { sysbus_init_irq(sbd, &s->irq[i]); s->irq_num[i] = -1; @@ -108,6 +147,16 @@ static const char *gpex_host_root_bus_path(PCIHostState *host_bridge, return "0000:00"; } +static Property gpex_host_properties[] = { + /* + * Permit CPU accesses to unmapped areas of the PIO and MMIO windows + * (discarding writes and returning -1 for reads) rather than aborting. + */ + DEFINE_PROP_BOOL("allow-unmapped-accesses", GPEXHost, + allow_unmapped_accesses, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void gpex_host_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -117,6 +166,7 @@ static void gpex_host_class_init(ObjectClass *klass, void *data) dc->realize = gpex_host_realize; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->fw_name = "pci"; + device_class_set_props(dc, gpex_host_properties); } static void gpex_host_initfn(Object *obj) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 28c9bae8994..e08716142b6 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -314,14 +314,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type, return b; } -PCIBus *find_i440fx(void) -{ - PCIHostState *s = OBJECT_CHECK(PCIHostState, - object_resolve_path("/machine/i440fx", NULL), - TYPE_PCI_HOST_BRIDGE); - return s ? s->bus : NULL; -} - static void i440fx_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build index 87a896973e7..4c4f39c15c6 100644 --- a/hw/pci-host/meson.build +++ b/hw/pci-host/meson.build @@ -3,7 +3,7 @@ pci_ss.add(when: 'CONFIG_PAM', if_true: files('pam.c')) pci_ss.add(when: 'CONFIG_PCI_BONITO', if_true: files('bonito.c')) pci_ss.add(when: 'CONFIG_PCI_EXPRESS_DESIGNWARE', if_true: files('designware.c')) pci_ss.add(when: 'CONFIG_PCI_EXPRESS_GENERIC_BRIDGE', if_true: files('gpex.c')) -pci_ss.add(when: 'CONFIG_ACPI', if_true: files('gpex-acpi.c')) +pci_ss.add(when: ['CONFIG_PCI_EXPRESS_GENERIC_BRIDGE', 'CONFIG_ACPI'], if_true: files('gpex-acpi.c')) pci_ss.add(when: 'CONFIG_PCI_EXPRESS_Q35', if_true: files('q35.c')) pci_ss.add(when: 'CONFIG_PCI_EXPRESS_XILINX', if_true: files('xilinx-pcie.c')) pci_ss.add(when: 'CONFIG_PCI_I440FX', if_true: files('i440fx.c')) @@ -13,12 +13,14 @@ pci_ss.add(when: 'CONFIG_REMOTE_PCIHOST', if_true: files('remote.c')) pci_ss.add(when: 'CONFIG_SH_PCI', if_true: files('sh_pci.c')) # PPC devices -pci_ss.add(when: 'CONFIG_PREP_PCI', if_true: files('prep.c')) +pci_ss.add(when: 'CONFIG_RAVEN_PCI', if_true: files('raven.c')) pci_ss.add(when: 'CONFIG_GRACKLE_PCI', if_true: files('grackle.c')) # NewWorld PowerMac pci_ss.add(when: 'CONFIG_UNIN_PCI', if_true: files('uninorth.c')) # PowerPC E500 boards pci_ss.add(when: 'CONFIG_PPCE500_PCI', if_true: files('ppce500.c')) +# Pegasos2 +pci_ss.add(when: 'CONFIG_MV64361', if_true: files('mv64361.c')) # ARM devices pci_ss.add(when: 'CONFIG_VERSATILE_PCI', if_true: files('versatile.c')) diff --git a/hw/pci-host/mv64361.c b/hw/pci-host/mv64361.c new file mode 100644 index 00000000000..00b3ff7d909 --- /dev/null +++ b/hw/pci-host/mv64361.c @@ -0,0 +1,951 @@ +/* + * Marvell Discovery II MV64361 System Controller for + * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator + * + * Copyright (c) 2018-2020 BALATON Zoltan + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/hw.h" +#include "hw/sysbus.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "hw/irq.h" +#include "hw/intc/i8259.h" +#include "hw/qdev-properties.h" +#include "exec/address-spaces.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "trace.h" +#include "hw/pci-host/mv64361.h" +#include "mv643xx.h" + +#define TYPE_MV64361_PCI_BRIDGE "mv64361-pcibridge" + +static void mv64361_pcibridge_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->vendor_id = PCI_VENDOR_ID_MARVELL; + k->device_id = PCI_DEVICE_ID_MARVELL_MV6436X; + k->class_id = PCI_CLASS_BRIDGE_HOST; + /* + * PCI-facing part of the host bridge, + * not usable without the host-facing part + */ + dc->user_creatable = false; +} + +static const TypeInfo mv64361_pcibridge_info = { + .name = TYPE_MV64361_PCI_BRIDGE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIDevice), + .class_init = mv64361_pcibridge_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + + +#define TYPE_MV64361_PCI "mv64361-pcihost" +OBJECT_DECLARE_SIMPLE_TYPE(MV64361PCIState, MV64361_PCI) + +struct MV64361PCIState { + PCIHostState parent_obj; + + uint8_t index; + MemoryRegion io; + MemoryRegion mem; + qemu_irq irq[PCI_NUM_PINS]; + + uint32_t io_base; + uint32_t io_size; + uint32_t mem_base[4]; + uint32_t mem_size[4]; + uint64_t remap[5]; +}; + +static int mv64361_pcihost_map_irq(PCIDevice *pci_dev, int n) +{ + return (n + PCI_SLOT(pci_dev->devfn)) % PCI_NUM_PINS; +} + +static void mv64361_pcihost_set_irq(void *opaque, int n, int level) +{ + MV64361PCIState *s = opaque; + qemu_set_irq(s->irq[n], level); +} + +static void mv64361_pcihost_realize(DeviceState *dev, Error **errp) +{ + MV64361PCIState *s = MV64361_PCI(dev); + PCIHostState *h = PCI_HOST_BRIDGE(dev); + char *name; + + name = g_strdup_printf("pci%d-io", s->index); + memory_region_init(&s->io, OBJECT(dev), name, 0x10000); + g_free(name); + name = g_strdup_printf("pci%d-mem", s->index); + memory_region_init(&s->mem, OBJECT(dev), name, 1ULL << 32); + g_free(name); + name = g_strdup_printf("pci.%d", s->index); + h->bus = pci_register_root_bus(dev, name, mv64361_pcihost_set_irq, + mv64361_pcihost_map_irq, dev, + &s->mem, &s->io, 0, 4, TYPE_PCI_BUS); + g_free(name); + pci_create_simple(h->bus, 0, TYPE_MV64361_PCI_BRIDGE); +} + +static Property mv64361_pcihost_props[] = { + DEFINE_PROP_UINT8("index", MV64361PCIState, index, 0), + DEFINE_PROP_END_OF_LIST() +}; + +static void mv64361_pcihost_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = mv64361_pcihost_realize; + device_class_set_props(dc, mv64361_pcihost_props); + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); +} + +static const TypeInfo mv64361_pcihost_info = { + .name = TYPE_MV64361_PCI, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(MV64361PCIState), + .class_init = mv64361_pcihost_class_init, +}; + +static void mv64361_pci_register_types(void) +{ + type_register_static(&mv64361_pcihost_info); + type_register_static(&mv64361_pcibridge_info); +} + +type_init(mv64361_pci_register_types) + + +OBJECT_DECLARE_SIMPLE_TYPE(MV64361State, MV64361) + +struct MV64361State { + SysBusDevice parent_obj; + + MemoryRegion regs; + MV64361PCIState pci[2]; + MemoryRegion cpu_win[19]; + qemu_irq cpu_irq; + + /* registers state */ + uint32_t cpu_conf; + uint32_t regs_base; + uint32_t base_addr_enable; + uint64_t main_int_cr; + uint64_t cpu0_int_mask; + uint32_t gpp_io; + uint32_t gpp_level; + uint32_t gpp_value; + uint32_t gpp_int_cr; + uint32_t gpp_int_mask; + bool gpp_int_level; +}; + +enum mv64361_irq_cause { + MV64361_IRQ_DEVERR = 1, + MV64361_IRQ_DMAERR = 2, + MV64361_IRQ_CPUERR = 3, + MV64361_IRQ_IDMA0 = 4, + MV64361_IRQ_IDMA1 = 5, + MV64361_IRQ_IDMA2 = 6, + MV64361_IRQ_IDMA3 = 7, + MV64361_IRQ_TIMER0 = 8, + MV64361_IRQ_TIMER1 = 9, + MV64361_IRQ_TIMER2 = 10, + MV64361_IRQ_TIMER3 = 11, + MV64361_IRQ_PCI0 = 12, + MV64361_IRQ_SRAMERR = 13, + MV64361_IRQ_GBEERR = 14, + MV64361_IRQ_CERR = 15, + MV64361_IRQ_PCI1 = 16, + MV64361_IRQ_DRAMERR = 17, + MV64361_IRQ_WDNMI = 18, + MV64361_IRQ_WDE = 19, + MV64361_IRQ_PCI0IN = 20, + MV64361_IRQ_PCI0OUT = 21, + MV64361_IRQ_PCI1IN = 22, + MV64361_IRQ_PCI1OUT = 23, + MV64361_IRQ_P1_GPP0_7 = 24, + MV64361_IRQ_P1_GPP8_15 = 25, + MV64361_IRQ_P1_GPP16_23 = 26, + MV64361_IRQ_P1_GPP24_31 = 27, + MV64361_IRQ_P1_CPU_DB = 28, + /* 29-31: reserved */ + MV64361_IRQ_GBE0 = 32, + MV64361_IRQ_GBE1 = 33, + MV64361_IRQ_GBE2 = 34, + /* 35: reserved */ + MV64361_IRQ_SDMA0 = 36, + MV64361_IRQ_TWSI = 37, + MV64361_IRQ_SDMA1 = 38, + MV64361_IRQ_BRG = 39, + MV64361_IRQ_MPSC0 = 40, + MV64361_IRQ_MPSC1 = 41, + MV64361_IRQ_G0RX = 42, + MV64361_IRQ_G0TX = 43, + MV64361_IRQ_G0MISC = 44, + MV64361_IRQ_G1RX = 45, + MV64361_IRQ_G1TX = 46, + MV64361_IRQ_G1MISC = 47, + MV64361_IRQ_G2RX = 48, + MV64361_IRQ_G2TX = 49, + MV64361_IRQ_G2MISC = 50, + /* 51-55: reserved */ + MV64361_IRQ_P0_GPP0_7 = 56, + MV64361_IRQ_P0_GPP8_15 = 57, + MV64361_IRQ_P0_GPP16_23 = 58, + MV64361_IRQ_P0_GPP24_31 = 59, + MV64361_IRQ_P0_CPU_DB = 60, + /* 61-63: reserved */ +}; + +PCIBus *mv64361_get_pci_bus(DeviceState *dev, int n) +{ + MV64361State *mv = MV64361(dev); + return PCI_HOST_BRIDGE(&mv->pci[n])->bus; +} + +static void unmap_region(MemoryRegion *mr) +{ + if (memory_region_is_mapped(mr)) { + memory_region_del_subregion(get_system_memory(), mr); + object_unparent(OBJECT(mr)); + } +} + +static void map_pci_region(MemoryRegion *mr, MemoryRegion *parent, + struct Object *owner, const char *name, + hwaddr poffs, uint64_t size, hwaddr moffs) +{ + memory_region_init_alias(mr, owner, name, parent, poffs, size); + memory_region_add_subregion(get_system_memory(), moffs, mr); + trace_mv64361_region_map(name, poffs, size, moffs); +} + +static void set_mem_windows(MV64361State *s, uint32_t val) +{ + MV64361PCIState *p; + MemoryRegion *mr; + uint32_t mask; + int i; + + val &= 0x1fffff; + for (mask = 1, i = 0; i < 21; i++, mask <<= 1) { + if ((val & mask) != (s->base_addr_enable & mask)) { + trace_mv64361_region_enable(!(val & mask) ? "enable" : "disable", i); + /* + * 0-3 are SDRAM chip selects but we map all RAM directly + * 4-7 are device chip selects (not sure what those are) + * 8 is Boot device (ROM) chip select but we map that directly too + */ + if (i == 9) { + p = &s->pci[0]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->io, OBJECT(s), "pci0-io-win", + p->remap[4], (p->io_size + 1) << 16, + (p->io_base & 0xfffff) << 16); + } + } else if (i == 10) { + p = &s->pci[0]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem0-win", + p->remap[0], (p->mem_size[0] + 1) << 16, + (p->mem_base[0] & 0xfffff) << 16); + } + } else if (i == 11) { + p = &s->pci[0]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem1-win", + p->remap[1], (p->mem_size[1] + 1) << 16, + (p->mem_base[1] & 0xfffff) << 16); + } + } else if (i == 12) { + p = &s->pci[0]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem2-win", + p->remap[2], (p->mem_size[2] + 1) << 16, + (p->mem_base[2] & 0xfffff) << 16); + } + } else if (i == 13) { + p = &s->pci[0]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem3-win", + p->remap[3], (p->mem_size[3] + 1) << 16, + (p->mem_base[3] & 0xfffff) << 16); + } + } else if (i == 14) { + p = &s->pci[1]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->io, OBJECT(s), "pci1-io-win", + p->remap[4], (p->io_size + 1) << 16, + (p->io_base & 0xfffff) << 16); + } + } else if (i == 15) { + p = &s->pci[1]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem0-win", + p->remap[0], (p->mem_size[0] + 1) << 16, + (p->mem_base[0] & 0xfffff) << 16); + } + } else if (i == 16) { + p = &s->pci[1]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem1-win", + p->remap[1], (p->mem_size[1] + 1) << 16, + (p->mem_base[1] & 0xfffff) << 16); + } + } else if (i == 17) { + p = &s->pci[1]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem2-win", + p->remap[2], (p->mem_size[2] + 1) << 16, + (p->mem_base[2] & 0xfffff) << 16); + } + } else if (i == 18) { + p = &s->pci[1]; + mr = &s->cpu_win[i]; + unmap_region(mr); + if (!(val & mask)) { + map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem3-win", + p->remap[3], (p->mem_size[3] + 1) << 16, + (p->mem_base[3] & 0xfffff) << 16); + } + /* 19 is integrated SRAM */ + } else if (i == 20) { + mr = &s->regs; + unmap_region(mr); + if (!(val & mask)) { + memory_region_add_subregion(get_system_memory(), + (s->regs_base & 0xfffff) << 16, mr); + } + } + } + } + s->base_addr_enable = val; +} + +static void mv64361_update_irq(void *opaque, int n, int level) +{ + MV64361State *s = opaque; + uint64_t val = s->main_int_cr; + + if (level) { + val |= BIT_ULL(n); + } else { + val &= ~BIT_ULL(n); + } + if ((s->main_int_cr & s->cpu0_int_mask) != (val & s->cpu0_int_mask)) { + qemu_set_irq(s->cpu_irq, level); + } + s->main_int_cr = val; +} + +static uint64_t mv64361_read(void *opaque, hwaddr addr, unsigned int size) +{ + MV64361State *s = MV64361(opaque); + uint32_t ret = 0; + + switch (addr) { + case MV64340_CPU_CONFIG: + ret = s->cpu_conf; + break; + case MV64340_PCI_0_IO_BASE_ADDR: + ret = s->pci[0].io_base; + break; + case MV64340_PCI_0_IO_SIZE: + ret = s->pci[0].io_size; + break; + case MV64340_PCI_0_IO_ADDR_REMAP: + ret = s->pci[0].remap[4] >> 16; + break; + case MV64340_PCI_0_MEMORY0_BASE_ADDR: + ret = s->pci[0].mem_base[0]; + break; + case MV64340_PCI_0_MEMORY0_SIZE: + ret = s->pci[0].mem_size[0]; + break; + case MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP: + ret = (s->pci[0].remap[0] & 0xffff0000) >> 16; + break; + case MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP: + ret = s->pci[0].remap[0] >> 32; + break; + case MV64340_PCI_0_MEMORY1_BASE_ADDR: + ret = s->pci[0].mem_base[1]; + break; + case MV64340_PCI_0_MEMORY1_SIZE: + ret = s->pci[0].mem_size[1]; + break; + case MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP: + ret = (s->pci[0].remap[1] & 0xffff0000) >> 16; + break; + case MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP: + ret = s->pci[0].remap[1] >> 32; + break; + case MV64340_PCI_0_MEMORY2_BASE_ADDR: + ret = s->pci[0].mem_base[2]; + break; + case MV64340_PCI_0_MEMORY2_SIZE: + ret = s->pci[0].mem_size[2]; + break; + case MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP: + ret = (s->pci[0].remap[2] & 0xffff0000) >> 16; + break; + case MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP: + ret = s->pci[0].remap[2] >> 32; + break; + case MV64340_PCI_0_MEMORY3_BASE_ADDR: + ret = s->pci[0].mem_base[3]; + break; + case MV64340_PCI_0_MEMORY3_SIZE: + ret = s->pci[0].mem_size[3]; + break; + case MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP: + ret = (s->pci[0].remap[3] & 0xffff0000) >> 16; + break; + case MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP: + ret = s->pci[0].remap[3] >> 32; + break; + case MV64340_PCI_1_IO_BASE_ADDR: + ret = s->pci[1].io_base; + break; + case MV64340_PCI_1_IO_SIZE: + ret = s->pci[1].io_size; + break; + case MV64340_PCI_1_IO_ADDR_REMAP: + ret = s->pci[1].remap[4] >> 16; + break; + case MV64340_PCI_1_MEMORY0_BASE_ADDR: + ret = s->pci[1].mem_base[0]; + break; + case MV64340_PCI_1_MEMORY0_SIZE: + ret = s->pci[1].mem_size[0]; + break; + case MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP: + ret = (s->pci[1].remap[0] & 0xffff0000) >> 16; + break; + case MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP: + ret = s->pci[1].remap[0] >> 32; + break; + case MV64340_PCI_1_MEMORY1_BASE_ADDR: + ret = s->pci[1].mem_base[1]; + break; + case MV64340_PCI_1_MEMORY1_SIZE: + ret = s->pci[1].mem_size[1]; + break; + case MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP: + ret = (s->pci[1].remap[1] & 0xffff0000) >> 16; + break; + case MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP: + ret = s->pci[1].remap[1] >> 32; + break; + case MV64340_PCI_1_MEMORY2_BASE_ADDR: + ret = s->pci[1].mem_base[2]; + break; + case MV64340_PCI_1_MEMORY2_SIZE: + ret = s->pci[1].mem_size[2]; + break; + case MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP: + ret = (s->pci[1].remap[2] & 0xffff0000) >> 16; + break; + case MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP: + ret = s->pci[1].remap[2] >> 32; + break; + case MV64340_PCI_1_MEMORY3_BASE_ADDR: + ret = s->pci[1].mem_base[3]; + break; + case MV64340_PCI_1_MEMORY3_SIZE: + ret = s->pci[1].mem_size[3]; + break; + case MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP: + ret = (s->pci[1].remap[3] & 0xffff0000) >> 16; + break; + case MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP: + ret = s->pci[1].remap[3] >> 32; + break; + case MV64340_INTERNAL_SPACE_BASE_ADDR: + ret = s->regs_base; + break; + case MV64340_BASE_ADDR_ENABLE: + ret = s->base_addr_enable; + break; + case MV64340_PCI_0_CONFIG_ADDR: + ret = pci_host_conf_le_ops.read(PCI_HOST_BRIDGE(&s->pci[0]), 0, size); + break; + case MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG ... + MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG + 3: + ret = pci_host_data_le_ops.read(PCI_HOST_BRIDGE(&s->pci[0]), + addr - MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG, size); + break; + case MV64340_PCI_1_CONFIG_ADDR: + ret = pci_host_conf_le_ops.read(PCI_HOST_BRIDGE(&s->pci[1]), 0, size); + break; + case MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG ... + MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG + 3: + ret = pci_host_data_le_ops.read(PCI_HOST_BRIDGE(&s->pci[1]), + addr - MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG, size); + break; + case MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG: + /* FIXME: Should this be sent via the PCI bus somehow? */ + if (s->gpp_int_level && (s->gpp_value & BIT(31))) { + ret = pic_read_irq(isa_pic); + } + break; + case MV64340_MAIN_INTERRUPT_CAUSE_LOW: + ret = s->main_int_cr; + break; + case MV64340_MAIN_INTERRUPT_CAUSE_HIGH: + ret = s->main_int_cr >> 32; + break; + case MV64340_CPU_INTERRUPT0_MASK_LOW: + ret = s->cpu0_int_mask; + break; + case MV64340_CPU_INTERRUPT0_MASK_HIGH: + ret = s->cpu0_int_mask >> 32; + break; + case MV64340_CPU_INTERRUPT0_SELECT_CAUSE: + ret = s->main_int_cr; + if (s->main_int_cr & s->cpu0_int_mask) { + if (!(s->main_int_cr & s->cpu0_int_mask & 0xffffffff)) { + ret = s->main_int_cr >> 32 | BIT(30); + } else if ((s->main_int_cr & s->cpu0_int_mask) >> 32) { + ret |= BIT(31); + } + } + break; + case MV64340_CUNIT_ARBITER_CONTROL_REG: + ret = 0x11ff0000 | (s->gpp_int_level << 10); + break; + case MV64340_GPP_IO_CONTROL: + ret = s->gpp_io; + break; + case MV64340_GPP_LEVEL_CONTROL: + ret = s->gpp_level; + break; + case MV64340_GPP_VALUE: + ret = s->gpp_value; + break; + case MV64340_GPP_VALUE_SET: + case MV64340_GPP_VALUE_CLEAR: + ret = 0; + break; + case MV64340_GPP_INTERRUPT_CAUSE: + ret = s->gpp_int_cr; + break; + case MV64340_GPP_INTERRUPT_MASK0: + case MV64340_GPP_INTERRUPT_MASK1: + ret = s->gpp_int_mask; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: Unimplemented register read 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + } + if (addr != MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG) { + trace_mv64361_reg_read(addr, ret); + } + return ret; +} + +static void warn_swap_bit(uint64_t val) +{ + if ((val & 0x3000000ULL) >> 24 != 1) { + qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__); + } +} + +static void mv64361_set_pci_mem_remap(MV64361State *s, int bus, int idx, + uint64_t val, bool high) +{ + if (high) { + s->pci[bus].remap[idx] = val; + } else { + s->pci[bus].remap[idx] &= 0xffffffff00000000ULL; + s->pci[bus].remap[idx] |= (val & 0xffffULL) << 16; + } +} + +static void mv64361_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int size) +{ + MV64361State *s = MV64361(opaque); + + trace_mv64361_reg_write(addr, val); + switch (addr) { + case MV64340_CPU_CONFIG: + s->cpu_conf = val & 0xe4e3bffULL; + s->cpu_conf |= BIT(23); + break; + case MV64340_PCI_0_IO_BASE_ADDR: + s->pci[0].io_base = val & 0x30fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + s->pci[0].remap[4] = (val & 0xffffULL) << 16; + } + break; + case MV64340_PCI_0_IO_SIZE: + s->pci[0].io_size = val & 0xffffULL; + break; + case MV64340_PCI_0_IO_ADDR_REMAP: + s->pci[0].remap[4] = (val & 0xffffULL) << 16; + break; + case MV64340_PCI_0_MEMORY0_BASE_ADDR: + s->pci[0].mem_base[0] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 0, 0, val, false); + } + break; + case MV64340_PCI_0_MEMORY0_SIZE: + s->pci[0].mem_size[0] = val & 0xffffULL; + break; + case MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP: + case MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 0, 0, val, + (addr == MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP)); + break; + case MV64340_PCI_0_MEMORY1_BASE_ADDR: + s->pci[0].mem_base[1] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 0, 1, val, false); + } + break; + case MV64340_PCI_0_MEMORY1_SIZE: + s->pci[0].mem_size[1] = val & 0xffffULL; + break; + case MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP: + case MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 0, 1, val, + (addr == MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP)); + break; + case MV64340_PCI_0_MEMORY2_BASE_ADDR: + s->pci[0].mem_base[2] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 0, 2, val, false); + } + break; + case MV64340_PCI_0_MEMORY2_SIZE: + s->pci[0].mem_size[2] = val & 0xffffULL; + break; + case MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP: + case MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 0, 2, val, + (addr == MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP)); + break; + case MV64340_PCI_0_MEMORY3_BASE_ADDR: + s->pci[0].mem_base[3] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 0, 3, val, false); + } + break; + case MV64340_PCI_0_MEMORY3_SIZE: + s->pci[0].mem_size[3] = val & 0xffffULL; + break; + case MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP: + case MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 0, 3, val, + (addr == MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP)); + break; + case MV64340_PCI_1_IO_BASE_ADDR: + s->pci[1].io_base = val & 0x30fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + s->pci[1].remap[4] = (val & 0xffffULL) << 16; + } + break; + case MV64340_PCI_1_IO_SIZE: + s->pci[1].io_size = val & 0xffffULL; + break; + case MV64340_PCI_1_MEMORY0_BASE_ADDR: + s->pci[1].mem_base[0] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 1, 0, val, false); + } + break; + case MV64340_PCI_1_MEMORY0_SIZE: + s->pci[1].mem_size[0] = val & 0xffffULL; + break; + case MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP: + case MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 1, 0, val, + (addr == MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP)); + break; + case MV64340_PCI_1_MEMORY1_BASE_ADDR: + s->pci[1].mem_base[1] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 1, 1, val, false); + } + break; + case MV64340_PCI_1_MEMORY1_SIZE: + s->pci[1].mem_size[1] = val & 0xffffULL; + break; + case MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP: + case MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 1, 1, val, + (addr == MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP)); + break; + case MV64340_PCI_1_MEMORY2_BASE_ADDR: + s->pci[1].mem_base[2] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 1, 2, val, false); + } + break; + case MV64340_PCI_1_MEMORY2_SIZE: + s->pci[1].mem_size[2] = val & 0xffffULL; + break; + case MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP: + case MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 1, 2, val, + (addr == MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP)); + break; + case MV64340_PCI_1_MEMORY3_BASE_ADDR: + s->pci[1].mem_base[3] = val & 0x70fffffULL; + warn_swap_bit(val); + if (!(s->cpu_conf & BIT(27))) { + mv64361_set_pci_mem_remap(s, 1, 3, val, false); + } + break; + case MV64340_PCI_1_MEMORY3_SIZE: + s->pci[1].mem_size[3] = val & 0xffffULL; + break; + case MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP: + case MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP: + mv64361_set_pci_mem_remap(s, 1, 3, val, + (addr == MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP)); + break; + case MV64340_INTERNAL_SPACE_BASE_ADDR: + s->regs_base = val & 0xfffffULL; + break; + case MV64340_BASE_ADDR_ENABLE: + set_mem_windows(s, val); + break; + case MV64340_PCI_0_CONFIG_ADDR: + pci_host_conf_le_ops.write(PCI_HOST_BRIDGE(&s->pci[0]), 0, val, size); + break; + case MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG ... + MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG + 3: + pci_host_data_le_ops.write(PCI_HOST_BRIDGE(&s->pci[0]), + addr - MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG, val, size); + break; + case MV64340_PCI_1_CONFIG_ADDR: + pci_host_conf_le_ops.write(PCI_HOST_BRIDGE(&s->pci[1]), 0, val, size); + break; + case MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG ... + MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG + 3: + pci_host_data_le_ops.write(PCI_HOST_BRIDGE(&s->pci[1]), + addr - MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG, val, size); + break; + case MV64340_CPU_INTERRUPT0_MASK_LOW: + s->cpu0_int_mask &= 0xffffffff00000000ULL; + s->cpu0_int_mask |= val & 0xffffffffULL; + break; + case MV64340_CPU_INTERRUPT0_MASK_HIGH: + s->cpu0_int_mask &= 0xffffffffULL; + s->cpu0_int_mask |= val << 32; + break; + case MV64340_CUNIT_ARBITER_CONTROL_REG: + s->gpp_int_level = !!(val & BIT(10)); + break; + case MV64340_GPP_IO_CONTROL: + s->gpp_io = val; + break; + case MV64340_GPP_LEVEL_CONTROL: + s->gpp_level = val; + break; + case MV64340_GPP_VALUE: + s->gpp_value &= ~s->gpp_io; + s->gpp_value |= val & s->gpp_io; + break; + case MV64340_GPP_VALUE_SET: + s->gpp_value |= val & s->gpp_io; + break; + case MV64340_GPP_VALUE_CLEAR: + s->gpp_value &= ~(val & s->gpp_io); + break; + case MV64340_GPP_INTERRUPT_CAUSE: + if (!s->gpp_int_level && val != s->gpp_int_cr) { + int i; + uint32_t ch = s->gpp_int_cr ^ val; + s->gpp_int_cr = val; + for (i = 0; i < 4; i++) { + if ((ch & 0xff << i) && !(val & 0xff << i)) { + mv64361_update_irq(opaque, MV64361_IRQ_P0_GPP0_7 + i, 0); + } + } + } else { + s->gpp_int_cr = val; + } + break; + case MV64340_GPP_INTERRUPT_MASK0: + case MV64340_GPP_INTERRUPT_MASK1: + s->gpp_int_mask = val; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: Unimplemented register write 0x%" + HWADDR_PRIx " = %"PRIx64"\n", __func__, addr, val); + break; + } +} + +static const MemoryRegionOps mv64361_ops = { + .read = mv64361_read, + .write = mv64361_write, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void mv64361_gpp_irq(void *opaque, int n, int level) +{ + MV64361State *s = opaque; + uint32_t mask = BIT(n); + uint32_t val = s->gpp_value & ~mask; + + if (s->gpp_level & mask) { + level = !level; + } + val |= level << n; + if (val > s->gpp_value) { + s->gpp_value = val; + s->gpp_int_cr |= mask; + if (s->gpp_int_mask & mask) { + mv64361_update_irq(opaque, MV64361_IRQ_P0_GPP0_7 + n / 8, 1); + } + } else if (val < s->gpp_value) { + int b = n / 8; + s->gpp_value = val; + if (s->gpp_int_level && !(val & 0xff << b)) { + mv64361_update_irq(opaque, MV64361_IRQ_P0_GPP0_7 + b, 0); + } + } +} + +static void mv64361_realize(DeviceState *dev, Error **errp) +{ + MV64361State *s = MV64361(dev); + int i; + + s->base_addr_enable = 0x1fffff; + memory_region_init_io(&s->regs, OBJECT(s), &mv64361_ops, s, + TYPE_MV64361, 0x10000); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->regs); + for (i = 0; i < 2; i++) { + g_autofree char *name = g_strdup_printf("pcihost%d", i); + object_initialize_child(OBJECT(dev), name, &s->pci[i], + TYPE_MV64361_PCI); + DeviceState *pci = DEVICE(&s->pci[i]); + qdev_prop_set_uint8(pci, "index", i); + sysbus_realize_and_unref(SYS_BUS_DEVICE(pci), &error_fatal); + } + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->cpu_irq); + qdev_init_gpio_in_named(dev, mv64361_gpp_irq, "gpp", 32); + /* FIXME: PCI IRQ connections may be board specific */ + for (i = 0; i < PCI_NUM_PINS; i++) { + s->pci[1].irq[i] = qdev_get_gpio_in_named(dev, "gpp", 12 + i); + } +} + +static void mv64361_reset(DeviceState *dev) +{ + MV64361State *s = MV64361(dev); + int i, j; + + /* + * These values may be board specific + * Real chip supports init from an eprom but that's not modelled + */ + set_mem_windows(s, 0x1fffff); + s->cpu_conf = 0x28000ff; + s->regs_base = 0x100f100; + s->pci[0].io_base = 0x100f800; + s->pci[0].io_size = 0xff; + s->pci[0].mem_base[0] = 0x100c000; + s->pci[0].mem_size[0] = 0x1fff; + s->pci[0].mem_base[1] = 0x100f900; + s->pci[0].mem_size[1] = 0xff; + s->pci[0].mem_base[2] = 0x100f400; + s->pci[0].mem_size[2] = 0x1ff; + s->pci[0].mem_base[3] = 0x100f600; + s->pci[0].mem_size[3] = 0x1ff; + s->pci[1].io_base = 0x100fe00; + s->pci[1].io_size = 0xff; + s->pci[1].mem_base[0] = 0x1008000; + s->pci[1].mem_size[0] = 0x3fff; + s->pci[1].mem_base[1] = 0x100fd00; + s->pci[1].mem_size[1] = 0xff; + s->pci[1].mem_base[2] = 0x1002600; + s->pci[1].mem_size[2] = 0x1ff; + s->pci[1].mem_base[3] = 0x100ff80; + s->pci[1].mem_size[3] = 0x7f; + for (i = 0; i < 2; i++) { + for (j = 0; j < 4; j++) { + s->pci[i].remap[j] = s->pci[i].mem_base[j] << 16; + } + } + s->pci[0].remap[1] = 0; + s->pci[1].remap[1] = 0; + set_mem_windows(s, 0xfbfff); +} + +static void mv64361_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = mv64361_realize; + dc->reset = mv64361_reset; +} + +static const TypeInfo mv64361_type_info = { + .name = TYPE_MV64361, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(MV64361State), + .class_init = mv64361_class_init, +}; + +static void mv64361_register_types(void) +{ + type_register_static(&mv64361_type_info); +} + +type_init(mv64361_register_types) diff --git a/hw/pci-host/mv643xx.h b/hw/pci-host/mv643xx.h new file mode 100644 index 00000000000..cd26a43f188 --- /dev/null +++ b/hw/pci-host/mv643xx.h @@ -0,0 +1,918 @@ +/* + * mv643xx.h - MV-643XX Internal registers definition file. + * + * Copyright 2002 Momentum Computer, Inc. + * Author: Matthew Dharm + * Copyright 2002 GALILEO TECHNOLOGY, LTD. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#ifndef ASM_MV643XX_H +#define ASM_MV643XX_H + +/****************************************/ +/* Processor Address Space */ +/****************************************/ + +/* DDR SDRAM BAR and size registers */ + +#define MV64340_CS_0_BASE_ADDR 0x008 +#define MV64340_CS_0_SIZE 0x010 +#define MV64340_CS_1_BASE_ADDR 0x208 +#define MV64340_CS_1_SIZE 0x210 +#define MV64340_CS_2_BASE_ADDR 0x018 +#define MV64340_CS_2_SIZE 0x020 +#define MV64340_CS_3_BASE_ADDR 0x218 +#define MV64340_CS_3_SIZE 0x220 + +/* Devices BAR and size registers */ + +#define MV64340_DEV_CS0_BASE_ADDR 0x028 +#define MV64340_DEV_CS0_SIZE 0x030 +#define MV64340_DEV_CS1_BASE_ADDR 0x228 +#define MV64340_DEV_CS1_SIZE 0x230 +#define MV64340_DEV_CS2_BASE_ADDR 0x248 +#define MV64340_DEV_CS2_SIZE 0x250 +#define MV64340_DEV_CS3_BASE_ADDR 0x038 +#define MV64340_DEV_CS3_SIZE 0x040 +#define MV64340_BOOTCS_BASE_ADDR 0x238 +#define MV64340_BOOTCS_SIZE 0x240 + +/* PCI 0 BAR and size registers */ + +#define MV64340_PCI_0_IO_BASE_ADDR 0x048 +#define MV64340_PCI_0_IO_SIZE 0x050 +#define MV64340_PCI_0_MEMORY0_BASE_ADDR 0x058 +#define MV64340_PCI_0_MEMORY0_SIZE 0x060 +#define MV64340_PCI_0_MEMORY1_BASE_ADDR 0x080 +#define MV64340_PCI_0_MEMORY1_SIZE 0x088 +#define MV64340_PCI_0_MEMORY2_BASE_ADDR 0x258 +#define MV64340_PCI_0_MEMORY2_SIZE 0x260 +#define MV64340_PCI_0_MEMORY3_BASE_ADDR 0x280 +#define MV64340_PCI_0_MEMORY3_SIZE 0x288 + +/* PCI 1 BAR and size registers */ +#define MV64340_PCI_1_IO_BASE_ADDR 0x090 +#define MV64340_PCI_1_IO_SIZE 0x098 +#define MV64340_PCI_1_MEMORY0_BASE_ADDR 0x0a0 +#define MV64340_PCI_1_MEMORY0_SIZE 0x0a8 +#define MV64340_PCI_1_MEMORY1_BASE_ADDR 0x0b0 +#define MV64340_PCI_1_MEMORY1_SIZE 0x0b8 +#define MV64340_PCI_1_MEMORY2_BASE_ADDR 0x2a0 +#define MV64340_PCI_1_MEMORY2_SIZE 0x2a8 +#define MV64340_PCI_1_MEMORY3_BASE_ADDR 0x2b0 +#define MV64340_PCI_1_MEMORY3_SIZE 0x2b8 + +/* SRAM base address */ +#define MV64340_INTEGRATED_SRAM_BASE_ADDR 0x268 + +/* internal registers space base address */ +#define MV64340_INTERNAL_SPACE_BASE_ADDR 0x068 + +/* Enables the CS , DEV_CS , PCI 0 and PCI 1 windows above */ +#define MV64340_BASE_ADDR_ENABLE 0x278 + +/****************************************/ +/* PCI remap registers */ +/****************************************/ + + /* PCI 0 */ +#define MV64340_PCI_0_IO_ADDR_REMAP 0x0f0 +#define MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP 0x0f8 +#define MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP 0x320 +#define MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP 0x100 +#define MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP 0x328 +#define MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP 0x2f8 +#define MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP 0x330 +#define MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP 0x300 +#define MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP 0x338 + /* PCI 1 */ +#define MV64340_PCI_1_IO_ADDR_REMAP 0x108 +#define MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP 0x110 +#define MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP 0x340 +#define MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP 0x118 +#define MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP 0x348 +#define MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP 0x310 +#define MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP 0x350 +#define MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP 0x318 +#define MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP 0x358 + +#define MV64340_CPU_PCI_0_HEADERS_RETARGET_CONTROL 0x3b0 +#define MV64340_CPU_PCI_0_HEADERS_RETARGET_BASE 0x3b8 +#define MV64340_CPU_PCI_1_HEADERS_RETARGET_CONTROL 0x3c0 +#define MV64340_CPU_PCI_1_HEADERS_RETARGET_BASE 0x3c8 +#define MV64340_CPU_GE_HEADERS_RETARGET_CONTROL 0x3d0 +#define MV64340_CPU_GE_HEADERS_RETARGET_BASE 0x3d8 +#define MV64340_CPU_IDMA_HEADERS_RETARGET_CONTROL 0x3e0 +#define MV64340_CPU_IDMA_HEADERS_RETARGET_BASE 0x3e8 + +/****************************************/ +/* CPU Control Registers */ +/****************************************/ + +#define MV64340_CPU_CONFIG 0x000 +#define MV64340_CPU_MODE 0x120 +#define MV64340_CPU_MASTER_CONTROL 0x160 +#define MV64340_CPU_CROSS_BAR_CONTROL_LOW 0x150 +#define MV64340_CPU_CROSS_BAR_CONTROL_HIGH 0x158 +#define MV64340_CPU_CROSS_BAR_TIMEOUT 0x168 + +/****************************************/ +/* SMP RegisterS */ +/****************************************/ + +#define MV64340_SMP_WHO_AM_I 0x200 +#define MV64340_SMP_CPU0_DOORBELL 0x214 +#define MV64340_SMP_CPU0_DOORBELL_CLEAR 0x21C +#define MV64340_SMP_CPU1_DOORBELL 0x224 +#define MV64340_SMP_CPU1_DOORBELL_CLEAR 0x22C +#define MV64340_SMP_CPU0_DOORBELL_MASK 0x234 +#define MV64340_SMP_CPU1_DOORBELL_MASK 0x23C +#define MV64340_SMP_SEMAPHOR0 0x244 +#define MV64340_SMP_SEMAPHOR1 0x24c +#define MV64340_SMP_SEMAPHOR2 0x254 +#define MV64340_SMP_SEMAPHOR3 0x25c +#define MV64340_SMP_SEMAPHOR4 0x264 +#define MV64340_SMP_SEMAPHOR5 0x26c +#define MV64340_SMP_SEMAPHOR6 0x274 +#define MV64340_SMP_SEMAPHOR7 0x27c + +/****************************************/ +/* CPU Sync Barrier Register */ +/****************************************/ + +#define MV64340_CPU_0_SYNC_BARRIER_TRIGGER 0x0c0 +#define MV64340_CPU_0_SYNC_BARRIER_VIRTUAL 0x0c8 +#define MV64340_CPU_1_SYNC_BARRIER_TRIGGER 0x0d0 +#define MV64340_CPU_1_SYNC_BARRIER_VIRTUAL 0x0d8 + +/****************************************/ +/* CPU Access Protect */ +/****************************************/ + +#define MV64340_CPU_PROTECT_WINDOW_0_BASE_ADDR 0x180 +#define MV64340_CPU_PROTECT_WINDOW_0_SIZE 0x188 +#define MV64340_CPU_PROTECT_WINDOW_1_BASE_ADDR 0x190 +#define MV64340_CPU_PROTECT_WINDOW_1_SIZE 0x198 +#define MV64340_CPU_PROTECT_WINDOW_2_BASE_ADDR 0x1a0 +#define MV64340_CPU_PROTECT_WINDOW_2_SIZE 0x1a8 +#define MV64340_CPU_PROTECT_WINDOW_3_BASE_ADDR 0x1b0 +#define MV64340_CPU_PROTECT_WINDOW_3_SIZE 0x1b8 + + +/****************************************/ +/* CPU Error Report */ +/****************************************/ + +#define MV64340_CPU_ERROR_ADDR_LOW 0x070 +#define MV64340_CPU_ERROR_ADDR_HIGH 0x078 +#define MV64340_CPU_ERROR_DATA_LOW 0x128 +#define MV64340_CPU_ERROR_DATA_HIGH 0x130 +#define MV64340_CPU_ERROR_PARITY 0x138 +#define MV64340_CPU_ERROR_CAUSE 0x140 +#define MV64340_CPU_ERROR_MASK 0x148 + +/****************************************/ +/* CPU Interface Debug Registers */ +/****************************************/ + +#define MV64340_PUNIT_SLAVE_DEBUG_LOW 0x360 +#define MV64340_PUNIT_SLAVE_DEBUG_HIGH 0x368 +#define MV64340_PUNIT_MASTER_DEBUG_LOW 0x370 +#define MV64340_PUNIT_MASTER_DEBUG_HIGH 0x378 +#define MV64340_PUNIT_MMASK 0x3e4 + +/****************************************/ +/* Integrated SRAM Registers */ +/****************************************/ + +#define MV64340_SRAM_CONFIG 0x380 +#define MV64340_SRAM_TEST_MODE 0X3F4 +#define MV64340_SRAM_ERROR_CAUSE 0x388 +#define MV64340_SRAM_ERROR_ADDR 0x390 +#define MV64340_SRAM_ERROR_ADDR_HIGH 0X3F8 +#define MV64340_SRAM_ERROR_DATA_LOW 0x398 +#define MV64340_SRAM_ERROR_DATA_HIGH 0x3a0 +#define MV64340_SRAM_ERROR_DATA_PARITY 0x3a8 + +/****************************************/ +/* SDRAM Configuration */ +/****************************************/ + +#define MV64340_SDRAM_CONFIG 0x1400 +#define MV64340_D_UNIT_CONTROL_LOW 0x1404 +#define MV64340_D_UNIT_CONTROL_HIGH 0x1424 +#define MV64340_SDRAM_TIMING_CONTROL_LOW 0x1408 +#define MV64340_SDRAM_TIMING_CONTROL_HIGH 0x140c +#define MV64340_SDRAM_ADDR_CONTROL 0x1410 +#define MV64340_SDRAM_OPEN_PAGES_CONTROL 0x1414 +#define MV64340_SDRAM_OPERATION 0x1418 +#define MV64340_SDRAM_MODE 0x141c +#define MV64340_EXTENDED_DRAM_MODE 0x1420 +#define MV64340_SDRAM_CROSS_BAR_CONTROL_LOW 0x1430 +#define MV64340_SDRAM_CROSS_BAR_CONTROL_HIGH 0x1434 +#define MV64340_SDRAM_CROSS_BAR_TIMEOUT 0x1438 +#define MV64340_SDRAM_ADDR_CTRL_PADS_CALIBRATION 0x14c0 +#define MV64340_SDRAM_DATA_PADS_CALIBRATION 0x14c4 + +/****************************************/ +/* SDRAM Error Report */ +/****************************************/ + +#define MV64340_SDRAM_ERROR_DATA_LOW 0x1444 +#define MV64340_SDRAM_ERROR_DATA_HIGH 0x1440 +#define MV64340_SDRAM_ERROR_ADDR 0x1450 +#define MV64340_SDRAM_RECEIVED_ECC 0x1448 +#define MV64340_SDRAM_CALCULATED_ECC 0x144c +#define MV64340_SDRAM_ECC_CONTROL 0x1454 +#define MV64340_SDRAM_ECC_ERROR_COUNTER 0x1458 + +/******************************************/ +/* Controlled Delay Line (CDL) Registers */ +/******************************************/ + +#define MV64340_DFCDL_CONFIG0 0x1480 +#define MV64340_DFCDL_CONFIG1 0x1484 +#define MV64340_DLL_WRITE 0x1488 +#define MV64340_DLL_READ 0x148c +#define MV64340_SRAM_ADDR 0x1490 +#define MV64340_SRAM_DATA0 0x1494 +#define MV64340_SRAM_DATA1 0x1498 +#define MV64340_SRAM_DATA2 0x149c +#define MV64340_DFCL_PROBE 0x14a0 + +/******************************************/ +/* Debug Registers */ +/******************************************/ + +#define MV64340_DUNIT_DEBUG_LOW 0x1460 +#define MV64340_DUNIT_DEBUG_HIGH 0x1464 +#define MV64340_DUNIT_MMASK 0X1b40 + +/****************************************/ +/* Device Parameters */ +/****************************************/ + +#define MV64340_DEVICE_BANK0_PARAMETERS 0x45c +#define MV64340_DEVICE_BANK1_PARAMETERS 0x460 +#define MV64340_DEVICE_BANK2_PARAMETERS 0x464 +#define MV64340_DEVICE_BANK3_PARAMETERS 0x468 +#define MV64340_DEVICE_BOOT_BANK_PARAMETERS 0x46c +#define MV64340_DEVICE_INTERFACE_CONTROL 0x4c0 +#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_LOW 0x4c8 +#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_HIGH 0x4cc +#define MV64340_DEVICE_INTERFACE_CROSS_BAR_TIMEOUT 0x4c4 + +/****************************************/ +/* Device interrupt registers */ +/****************************************/ + +#define MV64340_DEVICE_INTERRUPT_CAUSE 0x4d0 +#define MV64340_DEVICE_INTERRUPT_MASK 0x4d4 +#define MV64340_DEVICE_ERROR_ADDR 0x4d8 +#define MV64340_DEVICE_ERROR_DATA 0x4dc +#define MV64340_DEVICE_ERROR_PARITY 0x4e0 + +/****************************************/ +/* Device debug registers */ +/****************************************/ + +#define MV64340_DEVICE_DEBUG_LOW 0x4e4 +#define MV64340_DEVICE_DEBUG_HIGH 0x4e8 +#define MV64340_RUNIT_MMASK 0x4f0 + +/****************************************/ +/* PCI Slave Address Decoding registers */ +/****************************************/ + +#define MV64340_PCI_0_CS_0_BANK_SIZE 0xc08 +#define MV64340_PCI_1_CS_0_BANK_SIZE 0xc88 +#define MV64340_PCI_0_CS_1_BANK_SIZE 0xd08 +#define MV64340_PCI_1_CS_1_BANK_SIZE 0xd88 +#define MV64340_PCI_0_CS_2_BANK_SIZE 0xc0c +#define MV64340_PCI_1_CS_2_BANK_SIZE 0xc8c +#define MV64340_PCI_0_CS_3_BANK_SIZE 0xd0c +#define MV64340_PCI_1_CS_3_BANK_SIZE 0xd8c +#define MV64340_PCI_0_DEVCS_0_BANK_SIZE 0xc10 +#define MV64340_PCI_1_DEVCS_0_BANK_SIZE 0xc90 +#define MV64340_PCI_0_DEVCS_1_BANK_SIZE 0xd10 +#define MV64340_PCI_1_DEVCS_1_BANK_SIZE 0xd90 +#define MV64340_PCI_0_DEVCS_2_BANK_SIZE 0xd18 +#define MV64340_PCI_1_DEVCS_2_BANK_SIZE 0xd98 +#define MV64340_PCI_0_DEVCS_3_BANK_SIZE 0xc14 +#define MV64340_PCI_1_DEVCS_3_BANK_SIZE 0xc94 +#define MV64340_PCI_0_DEVCS_BOOT_BANK_SIZE 0xd14 +#define MV64340_PCI_1_DEVCS_BOOT_BANK_SIZE 0xd94 +#define MV64340_PCI_0_P2P_MEM0_BAR_SIZE 0xd1c +#define MV64340_PCI_1_P2P_MEM0_BAR_SIZE 0xd9c +#define MV64340_PCI_0_P2P_MEM1_BAR_SIZE 0xd20 +#define MV64340_PCI_1_P2P_MEM1_BAR_SIZE 0xda0 +#define MV64340_PCI_0_P2P_I_O_BAR_SIZE 0xd24 +#define MV64340_PCI_1_P2P_I_O_BAR_SIZE 0xda4 +#define MV64340_PCI_0_CPU_BAR_SIZE 0xd28 +#define MV64340_PCI_1_CPU_BAR_SIZE 0xda8 +#define MV64340_PCI_0_INTERNAL_SRAM_BAR_SIZE 0xe00 +#define MV64340_PCI_1_INTERNAL_SRAM_BAR_SIZE 0xe80 +#define MV64340_PCI_0_EXPANSION_ROM_BAR_SIZE 0xd2c +#define MV64340_PCI_1_EXPANSION_ROM_BAR_SIZE 0xd9c +#define MV64340_PCI_0_BASE_ADDR_REG_ENABLE 0xc3c +#define MV64340_PCI_1_BASE_ADDR_REG_ENABLE 0xcbc +#define MV64340_PCI_0_CS_0_BASE_ADDR_REMAP 0xc48 +#define MV64340_PCI_1_CS_0_BASE_ADDR_REMAP 0xcc8 +#define MV64340_PCI_0_CS_1_BASE_ADDR_REMAP 0xd48 +#define MV64340_PCI_1_CS_1_BASE_ADDR_REMAP 0xdc8 +#define MV64340_PCI_0_CS_2_BASE_ADDR_REMAP 0xc4c +#define MV64340_PCI_1_CS_2_BASE_ADDR_REMAP 0xccc +#define MV64340_PCI_0_CS_3_BASE_ADDR_REMAP 0xd4c +#define MV64340_PCI_1_CS_3_BASE_ADDR_REMAP 0xdcc +#define MV64340_PCI_0_CS_0_BASE_HIGH_ADDR_REMAP 0xF04 +#define MV64340_PCI_1_CS_0_BASE_HIGH_ADDR_REMAP 0xF84 +#define MV64340_PCI_0_CS_1_BASE_HIGH_ADDR_REMAP 0xF08 +#define MV64340_PCI_1_CS_1_BASE_HIGH_ADDR_REMAP 0xF88 +#define MV64340_PCI_0_CS_2_BASE_HIGH_ADDR_REMAP 0xF0C +#define MV64340_PCI_1_CS_2_BASE_HIGH_ADDR_REMAP 0xF8C +#define MV64340_PCI_0_CS_3_BASE_HIGH_ADDR_REMAP 0xF10 +#define MV64340_PCI_1_CS_3_BASE_HIGH_ADDR_REMAP 0xF90 +#define MV64340_PCI_0_DEVCS_0_BASE_ADDR_REMAP 0xc50 +#define MV64340_PCI_1_DEVCS_0_BASE_ADDR_REMAP 0xcd0 +#define MV64340_PCI_0_DEVCS_1_BASE_ADDR_REMAP 0xd50 +#define MV64340_PCI_1_DEVCS_1_BASE_ADDR_REMAP 0xdd0 +#define MV64340_PCI_0_DEVCS_2_BASE_ADDR_REMAP 0xd58 +#define MV64340_PCI_1_DEVCS_2_BASE_ADDR_REMAP 0xdd8 +#define MV64340_PCI_0_DEVCS_3_BASE_ADDR_REMAP 0xc54 +#define MV64340_PCI_1_DEVCS_3_BASE_ADDR_REMAP 0xcd4 +#define MV64340_PCI_0_DEVCS_BOOTCS_BASE_ADDR_REMAP 0xd54 +#define MV64340_PCI_1_DEVCS_BOOTCS_BASE_ADDR_REMAP 0xdd4 +#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_LOW 0xd5c +#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_LOW 0xddc +#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_HIGH 0xd60 +#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_HIGH 0xde0 +#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_LOW 0xd64 +#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_LOW 0xde4 +#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_HIGH 0xd68 +#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_HIGH 0xde8 +#define MV64340_PCI_0_P2P_I_O_BASE_ADDR_REMAP 0xd6c +#define MV64340_PCI_1_P2P_I_O_BASE_ADDR_REMAP 0xdec +#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_LOW 0xd70 +#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_LOW 0xdf0 +#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_HIGH 0xd74 +#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_HIGH 0xdf4 +#define MV64340_PCI_0_INTEGRATED_SRAM_BASE_ADDR_REMAP 0xf00 +#define MV64340_PCI_1_INTEGRATED_SRAM_BASE_ADDR_REMAP 0xf80 +#define MV64340_PCI_0_EXPANSION_ROM_BASE_ADDR_REMAP 0xf38 +#define MV64340_PCI_1_EXPANSION_ROM_BASE_ADDR_REMAP 0xfb8 +#define MV64340_PCI_0_ADDR_DECODE_CONTROL 0xd3c +#define MV64340_PCI_1_ADDR_DECODE_CONTROL 0xdbc +#define MV64340_PCI_0_HEADERS_RETARGET_CONTROL 0xF40 +#define MV64340_PCI_1_HEADERS_RETARGET_CONTROL 0xFc0 +#define MV64340_PCI_0_HEADERS_RETARGET_BASE 0xF44 +#define MV64340_PCI_1_HEADERS_RETARGET_BASE 0xFc4 +#define MV64340_PCI_0_HEADERS_RETARGET_HIGH 0xF48 +#define MV64340_PCI_1_HEADERS_RETARGET_HIGH 0xFc8 + +/***********************************/ +/* PCI Control Register Map */ +/***********************************/ + +#define MV64340_PCI_0_DLL_STATUS_AND_COMMAND 0x1d20 +#define MV64340_PCI_1_DLL_STATUS_AND_COMMAND 0x1da0 +#define MV64340_PCI_0_MPP_PADS_DRIVE_CONTROL 0x1d1C +#define MV64340_PCI_1_MPP_PADS_DRIVE_CONTROL 0x1d9C +#define MV64340_PCI_0_COMMAND 0xc00 +#define MV64340_PCI_1_COMMAND 0xc80 +#define MV64340_PCI_0_MODE 0xd00 +#define MV64340_PCI_1_MODE 0xd80 +#define MV64340_PCI_0_RETRY 0xc04 +#define MV64340_PCI_1_RETRY 0xc84 +#define MV64340_PCI_0_READ_BUFFER_DISCARD_TIMER 0xd04 +#define MV64340_PCI_1_READ_BUFFER_DISCARD_TIMER 0xd84 +#define MV64340_PCI_0_MSI_TRIGGER_TIMER 0xc38 +#define MV64340_PCI_1_MSI_TRIGGER_TIMER 0xcb8 +#define MV64340_PCI_0_ARBITER_CONTROL 0x1d00 +#define MV64340_PCI_1_ARBITER_CONTROL 0x1d80 +#define MV64340_PCI_0_CROSS_BAR_CONTROL_LOW 0x1d08 +#define MV64340_PCI_1_CROSS_BAR_CONTROL_LOW 0x1d88 +#define MV64340_PCI_0_CROSS_BAR_CONTROL_HIGH 0x1d0c +#define MV64340_PCI_1_CROSS_BAR_CONTROL_HIGH 0x1d8c +#define MV64340_PCI_0_CROSS_BAR_TIMEOUT 0x1d04 +#define MV64340_PCI_1_CROSS_BAR_TIMEOUT 0x1d84 +#define MV64340_PCI_0_SYNC_BARRIER_TRIGGER_REG 0x1D18 +#define MV64340_PCI_1_SYNC_BARRIER_TRIGGER_REG 0x1D98 +#define MV64340_PCI_0_SYNC_BARRIER_VIRTUAL_REG 0x1d10 +#define MV64340_PCI_1_SYNC_BARRIER_VIRTUAL_REG 0x1d90 +#define MV64340_PCI_0_P2P_CONFIG 0x1d14 +#define MV64340_PCI_1_P2P_CONFIG 0x1d94 + +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_LOW 0x1e00 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_HIGH 0x1e04 +#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_0 0x1e08 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_LOW 0x1e10 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_HIGH 0x1e14 +#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_1 0x1e18 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_LOW 0x1e20 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_HIGH 0x1e24 +#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_2 0x1e28 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_LOW 0x1e30 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_HIGH 0x1e34 +#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_3 0x1e38 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_LOW 0x1e40 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_HIGH 0x1e44 +#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_4 0x1e48 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_LOW 0x1e50 +#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_HIGH 0x1e54 +#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_5 0x1e58 + +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_LOW 0x1e80 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_HIGH 0x1e84 +#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_0 0x1e88 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_LOW 0x1e90 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_HIGH 0x1e94 +#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_1 0x1e98 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_LOW 0x1ea0 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_HIGH 0x1ea4 +#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_2 0x1ea8 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_LOW 0x1eb0 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_HIGH 0x1eb4 +#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_3 0x1eb8 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_LOW 0x1ec0 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_HIGH 0x1ec4 +#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_4 0x1ec8 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_LOW 0x1ed0 +#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_HIGH 0x1ed4 +#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_5 0x1ed8 + +/****************************************/ +/* PCI Configuration Access Registers */ +/****************************************/ + +#define MV64340_PCI_0_CONFIG_ADDR 0xcf8 +#define MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG 0xcfc +#define MV64340_PCI_1_CONFIG_ADDR 0xc78 +#define MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG 0xc7c +#define MV64340_PCI_0_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG 0xc34 +#define MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG 0xcb4 + +/****************************************/ +/* PCI Error Report Registers */ +/****************************************/ + +#define MV64340_PCI_0_SERR_MASK 0xc28 +#define MV64340_PCI_1_SERR_MASK 0xca8 +#define MV64340_PCI_0_ERROR_ADDR_LOW 0x1d40 +#define MV64340_PCI_1_ERROR_ADDR_LOW 0x1dc0 +#define MV64340_PCI_0_ERROR_ADDR_HIGH 0x1d44 +#define MV64340_PCI_1_ERROR_ADDR_HIGH 0x1dc4 +#define MV64340_PCI_0_ERROR_ATTRIBUTE 0x1d48 +#define MV64340_PCI_1_ERROR_ATTRIBUTE 0x1dc8 +#define MV64340_PCI_0_ERROR_COMMAND 0x1d50 +#define MV64340_PCI_1_ERROR_COMMAND 0x1dd0 +#define MV64340_PCI_0_ERROR_CAUSE 0x1d58 +#define MV64340_PCI_1_ERROR_CAUSE 0x1dd8 +#define MV64340_PCI_0_ERROR_MASK 0x1d5c +#define MV64340_PCI_1_ERROR_MASK 0x1ddc + +/****************************************/ +/* PCI Debug Registers */ +/****************************************/ + +#define MV64340_PCI_0_MMASK 0X1D24 +#define MV64340_PCI_1_MMASK 0X1DA4 + +/*********************************************/ +/* PCI Configuration, Function 0, Registers */ +/*********************************************/ + +#define MV64340_PCI_DEVICE_AND_VENDOR_ID 0x000 +#define MV64340_PCI_STATUS_AND_COMMAND 0x004 +#define MV64340_PCI_CLASS_CODE_AND_REVISION_ID 0x008 +#define MV64340_PCI_BIST_HEADER_TYPE_LATENCY_TIMER_CACHE_LINE 0x00C + +#define MV64340_PCI_SCS_0_BASE_ADDR_LOW 0x010 +#define MV64340_PCI_SCS_0_BASE_ADDR_HIGH 0x014 +#define MV64340_PCI_SCS_1_BASE_ADDR_LOW 0x018 +#define MV64340_PCI_SCS_1_BASE_ADDR_HIGH 0x01C +#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_LOW 0x020 +#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_HIGH 0x024 +#define MV64340_PCI_SUBSYSTEM_ID_AND_SUBSYSTEM_VENDOR_ID 0x02c +#define MV64340_PCI_EXPANSION_ROM_BASE_ADDR_REG 0x030 +#define MV64340_PCI_CAPABILTY_LIST_POINTER 0x034 +#define MV64340_PCI_INTERRUPT_PIN_AND_LINE 0x03C + /* capability list */ +#define MV64340_PCI_POWER_MANAGEMENT_CAPABILITY 0x040 +#define MV64340_PCI_POWER_MANAGEMENT_STATUS_AND_CONTROL 0x044 +#define MV64340_PCI_VPD_ADDR 0x048 +#define MV64340_PCI_VPD_DATA 0x04c +#define MV64340_PCI_MSI_MESSAGE_CONTROL 0x050 +#define MV64340_PCI_MSI_MESSAGE_ADDR 0x054 +#define MV64340_PCI_MSI_MESSAGE_UPPER_ADDR 0x058 +#define MV64340_PCI_MSI_MESSAGE_DATA 0x05c +#define MV64340_PCI_X_COMMAND 0x060 +#define MV64340_PCI_X_STATUS 0x064 +#define MV64340_PCI_COMPACT_PCI_HOT_SWAP 0x068 + +/***********************************************/ +/* PCI Configuration, Function 1, Registers */ +/***********************************************/ + +#define MV64340_PCI_SCS_2_BASE_ADDR_LOW 0x110 +#define MV64340_PCI_SCS_2_BASE_ADDR_HIGH 0x114 +#define MV64340_PCI_SCS_3_BASE_ADDR_LOW 0x118 +#define MV64340_PCI_SCS_3_BASE_ADDR_HIGH 0x11c +#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_LOW 0x120 +#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_HIGH 0x124 + +/***********************************************/ +/* PCI Configuration, Function 2, Registers */ +/***********************************************/ + +#define MV64340_PCI_DEVCS_0_BASE_ADDR_LOW 0x210 +#define MV64340_PCI_DEVCS_0_BASE_ADDR_HIGH 0x214 +#define MV64340_PCI_DEVCS_1_BASE_ADDR_LOW 0x218 +#define MV64340_PCI_DEVCS_1_BASE_ADDR_HIGH 0x21c +#define MV64340_PCI_DEVCS_2_BASE_ADDR_LOW 0x220 +#define MV64340_PCI_DEVCS_2_BASE_ADDR_HIGH 0x224 + +/***********************************************/ +/* PCI Configuration, Function 3, Registers */ +/***********************************************/ + +#define MV64340_PCI_DEVCS_3_BASE_ADDR_LOW 0x310 +#define MV64340_PCI_DEVCS_3_BASE_ADDR_HIGH 0x314 +#define MV64340_PCI_BOOT_CS_BASE_ADDR_LOW 0x318 +#define MV64340_PCI_BOOT_CS_BASE_ADDR_HIGH 0x31c +#define MV64340_PCI_CPU_BASE_ADDR_LOW 0x220 +#define MV64340_PCI_CPU_BASE_ADDR_HIGH 0x224 + +/***********************************************/ +/* PCI Configuration, Function 4, Registers */ +/***********************************************/ + +#define MV64340_PCI_P2P_MEM0_BASE_ADDR_LOW 0x410 +#define MV64340_PCI_P2P_MEM0_BASE_ADDR_HIGH 0x414 +#define MV64340_PCI_P2P_MEM1_BASE_ADDR_LOW 0x418 +#define MV64340_PCI_P2P_MEM1_BASE_ADDR_HIGH 0x41c +#define MV64340_PCI_P2P_I_O_BASE_ADDR 0x420 +#define MV64340_PCI_INTERNAL_REGS_I_O_MAPPED_BASE_ADDR 0x424 + +/****************************************/ +/* Messaging Unit Registers (I20) */ +/****************************************/ + +#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_0_SIDE 0x010 +#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_0_SIDE 0x014 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_0_SIDE 0x018 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_0_SIDE 0x01C +#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_0_SIDE 0x020 +#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE 0x024 +#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE 0x028 +#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_0_SIDE 0x02C +#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE 0x030 +#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE 0x034 +#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE 0x040 +#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE 0x044 +#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_0_SIDE 0x050 +#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_0_SIDE 0x054 +#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE 0x060 +#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE 0x064 +#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE 0x068 +#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE 0x06C +#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE 0x070 +#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE 0x074 +#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE 0x0F8 +#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE 0x0FC + +#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_1_SIDE 0x090 +#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_1_SIDE 0x094 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_1_SIDE 0x098 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_1_SIDE 0x09C +#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_1_SIDE 0x0A0 +#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE 0x0A4 +#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE 0x0A8 +#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_1_SIDE 0x0AC +#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE 0x0B0 +#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE 0x0B4 +#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE 0x0C0 +#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE 0x0C4 +#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_1_SIDE 0x0D0 +#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_1_SIDE 0x0D4 +#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE 0x0E0 +#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE 0x0E4 +#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE 0x0E8 +#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE 0x0EC +#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE 0x0F0 +#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE 0x0F4 +#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE 0x078 +#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE 0x07C + +#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU0_SIDE 0x1C10 +#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU0_SIDE 0x1C14 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU0_SIDE 0x1C18 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU0_SIDE 0x1C1C +#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU0_SIDE 0x1C20 +#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE 0x1C24 +#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU0_SIDE 0x1C28 +#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU0_SIDE 0x1C2C +#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE 0x1C30 +#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU0_SIDE 0x1C34 +#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE 0x1C40 +#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE 0x1C44 +#define MV64340_I2O_QUEUE_CONTROL_REG_CPU0_SIDE 0x1C50 +#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU0_SIDE 0x1C54 +#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE 0x1C60 +#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE 0x1C64 +#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE 0x1C68 +#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE 0x1C6C +#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE 0x1C70 +#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE 0x1C74 +#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE 0x1CF8 +#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE 0x1CFC +#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU1_SIDE 0x1C90 +#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU1_SIDE 0x1C94 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU1_SIDE 0x1C98 +#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU1_SIDE 0x1C9C +#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU1_SIDE 0x1CA0 +#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE 0x1CA4 +#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU1_SIDE 0x1CA8 +#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU1_SIDE 0x1CAC +#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE 0x1CB0 +#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU1_SIDE 0x1CB4 +#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE 0x1CC0 +#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE 0x1CC4 +#define MV64340_I2O_QUEUE_CONTROL_REG_CPU1_SIDE 0x1CD0 +#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU1_SIDE 0x1CD4 +#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE 0x1CE0 +#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE 0x1CE4 +#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE 0x1CE8 +#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE 0x1CEC +#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE 0x1CF0 +#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE 0x1CF4 +#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE 0x1C78 +#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE 0x1C7C + +/****************************************/ +/* Ethernet Unit Registers */ +/****************************************/ + +/*******************************************/ +/* CUNIT Registers */ +/*******************************************/ + + /* Address Decoding Register Map */ + +#define MV64340_CUNIT_BASE_ADDR_REG0 0xf200 +#define MV64340_CUNIT_BASE_ADDR_REG1 0xf208 +#define MV64340_CUNIT_BASE_ADDR_REG2 0xf210 +#define MV64340_CUNIT_BASE_ADDR_REG3 0xf218 +#define MV64340_CUNIT_SIZE0 0xf204 +#define MV64340_CUNIT_SIZE1 0xf20c +#define MV64340_CUNIT_SIZE2 0xf214 +#define MV64340_CUNIT_SIZE3 0xf21c +#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG0 0xf240 +#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG1 0xf244 +#define MV64340_CUNIT_BASE_ADDR_ENABLE_REG 0xf250 +#define MV64340_MPSC0_ACCESS_PROTECTION_REG 0xf254 +#define MV64340_MPSC1_ACCESS_PROTECTION_REG 0xf258 +#define MV64340_CUNIT_INTERNAL_SPACE_BASE_ADDR_REG 0xf25C + + /* Error Report Registers */ + +#define MV64340_CUNIT_INTERRUPT_CAUSE_REG 0xf310 +#define MV64340_CUNIT_INTERRUPT_MASK_REG 0xf314 +#define MV64340_CUNIT_ERROR_ADDR 0xf318 + + /* Cunit Control Registers */ + +#define MV64340_CUNIT_ARBITER_CONTROL_REG 0xf300 +#define MV64340_CUNIT_CONFIG_REG 0xb40c +#define MV64340_CUNIT_CRROSBAR_TIMEOUT_REG 0xf304 + + /* Cunit Debug Registers */ + +#define MV64340_CUNIT_DEBUG_LOW 0xf340 +#define MV64340_CUNIT_DEBUG_HIGH 0xf344 +#define MV64340_CUNIT_MMASK 0xf380 + + /* MPSCs Clocks Routing Registers */ + +#define MV64340_MPSC_ROUTING_REG 0xb400 +#define MV64340_MPSC_RX_CLOCK_ROUTING_REG 0xb404 +#define MV64340_MPSC_TX_CLOCK_ROUTING_REG 0xb408 + + /* MPSCs Interrupts Registers */ + +#define MV64340_MPSC_CAUSE_REG(port) (0xb804 + (port << 3)) +#define MV64340_MPSC_MASK_REG(port) (0xb884 + (port << 3)) + +#define MV64340_MPSC_MAIN_CONFIG_LOW(port) (0x8000 + (port << 12)) +#define MV64340_MPSC_MAIN_CONFIG_HIGH(port) (0x8004 + (port << 12)) +#define MV64340_MPSC_PROTOCOL_CONFIG(port) (0x8008 + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG1(port) (0x800c + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG2(port) (0x8010 + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG3(port) (0x8014 + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG4(port) (0x8018 + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG5(port) (0x801c + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG6(port) (0x8020 + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG7(port) (0x8024 + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG8(port) (0x8028 + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG9(port) (0x802c + (port << 12)) +#define MV64340_MPSC_CHANNEL_REG10(port) (0x8030 + (port << 12)) + + /* MPSC0 Registers */ + + +/***************************************/ +/* SDMA Registers */ +/***************************************/ + +#define MV64340_SDMA_CONFIG_REG(channel) (0x4000 + (channel << 13)) +#define MV64340_SDMA_COMMAND_REG(channel) (0x4008 + (channel << 13)) +#define MV64340_SDMA_CURRENT_RX_DESCRIPTOR_POINTER(channel) (0x4810 + (channel << 13)) +#define MV64340_SDMA_CURRENT_TX_DESCRIPTOR_POINTER(channel) (0x4c10 + (channel << 13)) +#define MV64340_SDMA_FIRST_TX_DESCRIPTOR_POINTER(channel) (0x4c14 + (channel << 13)) + +#define MV64340_SDMA_CAUSE_REG 0xb800 +#define MV64340_SDMA_MASK_REG 0xb880 + +/* BRG Interrupts */ + +#define MV64340_BRG_CONFIG_REG(brg) (0xb200 + (brg << 3)) +#define MV64340_BRG_BAUDE_TUNING_REG(brg) (0xb208 + (brg << 3)) +#define MV64340_BRG_CAUSE_REG 0xb834 +#define MV64340_BRG_MASK_REG 0xb8b4 + +/****************************************/ +/* DMA Channel Control */ +/****************************************/ + +#define MV64340_DMA_CHANNEL0_CONTROL 0x840 +#define MV64340_DMA_CHANNEL0_CONTROL_HIGH 0x880 +#define MV64340_DMA_CHANNEL1_CONTROL 0x844 +#define MV64340_DMA_CHANNEL1_CONTROL_HIGH 0x884 +#define MV64340_DMA_CHANNEL2_CONTROL 0x848 +#define MV64340_DMA_CHANNEL2_CONTROL_HIGH 0x888 +#define MV64340_DMA_CHANNEL3_CONTROL 0x84C +#define MV64340_DMA_CHANNEL3_CONTROL_HIGH 0x88C + + +/****************************************/ +/* IDMA Registers */ +/****************************************/ + +#define MV64340_DMA_CHANNEL0_BYTE_COUNT 0x800 +#define MV64340_DMA_CHANNEL1_BYTE_COUNT 0x804 +#define MV64340_DMA_CHANNEL2_BYTE_COUNT 0x808 +#define MV64340_DMA_CHANNEL3_BYTE_COUNT 0x80C +#define MV64340_DMA_CHANNEL0_SOURCE_ADDR 0x810 +#define MV64340_DMA_CHANNEL1_SOURCE_ADDR 0x814 +#define MV64340_DMA_CHANNEL2_SOURCE_ADDR 0x818 +#define MV64340_DMA_CHANNEL3_SOURCE_ADDR 0x81c +#define MV64340_DMA_CHANNEL0_DESTINATION_ADDR 0x820 +#define MV64340_DMA_CHANNEL1_DESTINATION_ADDR 0x824 +#define MV64340_DMA_CHANNEL2_DESTINATION_ADDR 0x828 +#define MV64340_DMA_CHANNEL3_DESTINATION_ADDR 0x82C +#define MV64340_DMA_CHANNEL0_NEXT_DESCRIPTOR_POINTER 0x830 +#define MV64340_DMA_CHANNEL1_NEXT_DESCRIPTOR_POINTER 0x834 +#define MV64340_DMA_CHANNEL2_NEXT_DESCRIPTOR_POINTER 0x838 +#define MV64340_DMA_CHANNEL3_NEXT_DESCRIPTOR_POINTER 0x83C +#define MV64340_DMA_CHANNEL0_CURRENT_DESCRIPTOR_POINTER 0x870 +#define MV64340_DMA_CHANNEL1_CURRENT_DESCRIPTOR_POINTER 0x874 +#define MV64340_DMA_CHANNEL2_CURRENT_DESCRIPTOR_POINTER 0x878 +#define MV64340_DMA_CHANNEL3_CURRENT_DESCRIPTOR_POINTER 0x87C + + /* IDMA Address Decoding Base Address Registers */ + +#define MV64340_DMA_BASE_ADDR_REG0 0xa00 +#define MV64340_DMA_BASE_ADDR_REG1 0xa08 +#define MV64340_DMA_BASE_ADDR_REG2 0xa10 +#define MV64340_DMA_BASE_ADDR_REG3 0xa18 +#define MV64340_DMA_BASE_ADDR_REG4 0xa20 +#define MV64340_DMA_BASE_ADDR_REG5 0xa28 +#define MV64340_DMA_BASE_ADDR_REG6 0xa30 +#define MV64340_DMA_BASE_ADDR_REG7 0xa38 + + /* IDMA Address Decoding Size Address Register */ + +#define MV64340_DMA_SIZE_REG0 0xa04 +#define MV64340_DMA_SIZE_REG1 0xa0c +#define MV64340_DMA_SIZE_REG2 0xa14 +#define MV64340_DMA_SIZE_REG3 0xa1c +#define MV64340_DMA_SIZE_REG4 0xa24 +#define MV64340_DMA_SIZE_REG5 0xa2c +#define MV64340_DMA_SIZE_REG6 0xa34 +#define MV64340_DMA_SIZE_REG7 0xa3C + + /* IDMA Address Decoding High Address Remap and Access Protection Registers */ + +#define MV64340_DMA_HIGH_ADDR_REMAP_REG0 0xa60 +#define MV64340_DMA_HIGH_ADDR_REMAP_REG1 0xa64 +#define MV64340_DMA_HIGH_ADDR_REMAP_REG2 0xa68 +#define MV64340_DMA_HIGH_ADDR_REMAP_REG3 0xa6C +#define MV64340_DMA_BASE_ADDR_ENABLE_REG 0xa80 +#define MV64340_DMA_CHANNEL0_ACCESS_PROTECTION_REG 0xa70 +#define MV64340_DMA_CHANNEL1_ACCESS_PROTECTION_REG 0xa74 +#define MV64340_DMA_CHANNEL2_ACCESS_PROTECTION_REG 0xa78 +#define MV64340_DMA_CHANNEL3_ACCESS_PROTECTION_REG 0xa7c +#define MV64340_DMA_ARBITER_CONTROL 0x860 +#define MV64340_DMA_CROSS_BAR_TIMEOUT 0x8d0 + + /* IDMA Headers Retarget Registers */ + +#define MV64340_DMA_HEADERS_RETARGET_CONTROL 0xa84 +#define MV64340_DMA_HEADERS_RETARGET_BASE 0xa88 + + /* IDMA Interrupt Register */ + +#define MV64340_DMA_INTERRUPT_CAUSE_REG 0x8c0 +#define MV64340_DMA_INTERRUPT_CAUSE_MASK 0x8c4 +#define MV64340_DMA_ERROR_ADDR 0x8c8 +#define MV64340_DMA_ERROR_SELECT 0x8cc + + /* IDMA Debug Register ( for internal use ) */ + +#define MV64340_DMA_DEBUG_LOW 0x8e0 +#define MV64340_DMA_DEBUG_HIGH 0x8e4 +#define MV64340_DMA_SPARE 0xA8C + +/****************************************/ +/* Timer_Counter */ +/****************************************/ + +#define MV64340_TIMER_COUNTER0 0x850 +#define MV64340_TIMER_COUNTER1 0x854 +#define MV64340_TIMER_COUNTER2 0x858 +#define MV64340_TIMER_COUNTER3 0x85C +#define MV64340_TIMER_COUNTER_0_3_CONTROL 0x864 +#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_CAUSE 0x868 +#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_MASK 0x86c + +/****************************************/ +/* Watchdog registers */ +/****************************************/ + +#define MV64340_WATCHDOG_CONFIG_REG 0xb410 +#define MV64340_WATCHDOG_VALUE_REG 0xb414 + +/****************************************/ +/* I2C Registers */ +/****************************************/ + +#define MV64XXX_I2C_OFFSET 0xc000 +#define MV64XXX_I2C_REG_BLOCK_SIZE 0x0020 + +/****************************************/ +/* GPP Interface Registers */ +/****************************************/ + +#define MV64340_GPP_IO_CONTROL 0xf100 +#define MV64340_GPP_LEVEL_CONTROL 0xf110 +#define MV64340_GPP_VALUE 0xf104 +#define MV64340_GPP_INTERRUPT_CAUSE 0xf108 +#define MV64340_GPP_INTERRUPT_MASK0 0xf10c +#define MV64340_GPP_INTERRUPT_MASK1 0xf114 +#define MV64340_GPP_VALUE_SET 0xf118 +#define MV64340_GPP_VALUE_CLEAR 0xf11c + +/****************************************/ +/* Interrupt Controller Registers */ +/****************************************/ + +/****************************************/ +/* Interrupts */ +/****************************************/ + +#define MV64340_MAIN_INTERRUPT_CAUSE_LOW 0x004 +#define MV64340_MAIN_INTERRUPT_CAUSE_HIGH 0x00c +#define MV64340_CPU_INTERRUPT0_MASK_LOW 0x014 +#define MV64340_CPU_INTERRUPT0_MASK_HIGH 0x01c +#define MV64340_CPU_INTERRUPT0_SELECT_CAUSE 0x024 +#define MV64340_CPU_INTERRUPT1_MASK_LOW 0x034 +#define MV64340_CPU_INTERRUPT1_MASK_HIGH 0x03c +#define MV64340_CPU_INTERRUPT1_SELECT_CAUSE 0x044 +#define MV64340_INTERRUPT0_MASK_0_LOW 0x054 +#define MV64340_INTERRUPT0_MASK_0_HIGH 0x05c +#define MV64340_INTERRUPT0_SELECT_CAUSE 0x064 +#define MV64340_INTERRUPT1_MASK_0_LOW 0x074 +#define MV64340_INTERRUPT1_MASK_0_HIGH 0x07c +#define MV64340_INTERRUPT1_SELECT_CAUSE 0x084 + +/****************************************/ +/* MPP Interface Registers */ +/****************************************/ + +#define MV64340_MPP_CONTROL0 0xf000 +#define MV64340_MPP_CONTROL1 0xf004 +#define MV64340_MPP_CONTROL2 0xf008 +#define MV64340_MPP_CONTROL3 0xf00c + +/****************************************/ +/* Serial Initialization registers */ +/****************************************/ + +#define MV64340_SERIAL_INIT_LAST_DATA 0xf324 +#define MV64340_SERIAL_INIT_CONTROL 0xf328 +#define MV64340_SERIAL_INIT_STATUS 0xf32c + +#endif /* ASM_MV643XX_H */ diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 54f57c660a9..5c375a9f285 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -392,7 +392,7 @@ static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val) v &= 0xffffffffffff0000ull; v |= 0x000000000000cfffull & val; } - *tptr = val; + *tptr = v; break; } case IODA3_TBL_MBT: diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index 5ad1424b31a..89c1b53dd72 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -415,7 +415,6 @@ static const VMStateDescription vmstate_ppce500_pci = { } }; -#include "exec/address-spaces.h" static void e500_pcihost_bridge_realize(PCIDevice *d, Error **errp) { diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c deleted file mode 100644 index 0a9162fba97..00000000000 --- a/hw/pci-host/prep.c +++ /dev/null @@ -1,443 +0,0 @@ -/* - * QEMU PREP PCI host - * - * Copyright (c) 2006 Fabrice Bellard - * Copyright (c) 2011-2013 Andreas Färber - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "qemu/datadir.h" -#include "qemu/units.h" -#include "qemu/log.h" -#include "qapi/error.h" -#include "hw/pci/pci.h" -#include "hw/pci/pci_bus.h" -#include "hw/pci/pci_host.h" -#include "hw/qdev-properties.h" -#include "migration/vmstate.h" -#include "hw/intc/i8259.h" -#include "hw/irq.h" -#include "hw/loader.h" -#include "hw/or-irq.h" -#include "exec/address-spaces.h" -#include "elf.h" -#include "qom/object.h" - -#define TYPE_RAVEN_PCI_DEVICE "raven" -#define TYPE_RAVEN_PCI_HOST_BRIDGE "raven-pcihost" - -OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE) - -struct RavenPCIState { - PCIDevice dev; - - uint32_t elf_machine; - char *bios_name; - MemoryRegion bios; -}; - -typedef struct PRePPCIState PREPPCIState; -DECLARE_INSTANCE_CHECKER(PREPPCIState, RAVEN_PCI_HOST_BRIDGE, - TYPE_RAVEN_PCI_HOST_BRIDGE) - -struct PRePPCIState { - PCIHostState parent_obj; - - qemu_or_irq *or_irq; - qemu_irq pci_irqs[PCI_NUM_PINS]; - PCIBus pci_bus; - AddressSpace pci_io_as; - MemoryRegion pci_io; - MemoryRegion pci_io_non_contiguous; - MemoryRegion pci_memory; - MemoryRegion pci_intack; - MemoryRegion bm; - MemoryRegion bm_ram_alias; - MemoryRegion bm_pci_memory_alias; - AddressSpace bm_as; - RavenPCIState pci_dev; - - int contiguous_map; - bool is_legacy_prep; -}; - -#define BIOS_SIZE (1 * MiB) - -static inline uint32_t raven_pci_io_config(hwaddr addr) -{ - int i; - - for (i = 0; i < 11; i++) { - if ((addr & (1 << (11 + i))) != 0) { - break; - } - } - return (addr & 0x7ff) | (i << 11); -} - -static void raven_pci_io_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - PREPPCIState *s = opaque; - PCIHostState *phb = PCI_HOST_BRIDGE(s); - pci_data_write(phb->bus, raven_pci_io_config(addr), val, size); -} - -static uint64_t raven_pci_io_read(void *opaque, hwaddr addr, - unsigned int size) -{ - PREPPCIState *s = opaque; - PCIHostState *phb = PCI_HOST_BRIDGE(s); - return pci_data_read(phb->bus, raven_pci_io_config(addr), size); -} - -static const MemoryRegionOps raven_pci_io_ops = { - .read = raven_pci_io_read, - .write = raven_pci_io_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static uint64_t raven_intack_read(void *opaque, hwaddr addr, - unsigned int size) -{ - return pic_read_irq(isa_pic); -} - -static void raven_intack_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); -} - -static const MemoryRegionOps raven_intack_ops = { - .read = raven_intack_read, - .write = raven_intack_write, - .valid = { - .max_access_size = 1, - }, -}; - -static inline hwaddr raven_io_address(PREPPCIState *s, - hwaddr addr) -{ - if (s->contiguous_map == 0) { - /* 64 KB contiguous space for IOs */ - addr &= 0xFFFF; - } else { - /* 8 MB non-contiguous space for IOs */ - addr = (addr & 0x1F) | ((addr & 0x007FFF000) >> 7); - } - - /* FIXME: handle endianness switch */ - - return addr; -} - -static uint64_t raven_io_read(void *opaque, hwaddr addr, - unsigned int size) -{ - PREPPCIState *s = opaque; - uint8_t buf[4]; - - addr = raven_io_address(s, addr); - address_space_read(&s->pci_io_as, addr + 0x80000000, - MEMTXATTRS_UNSPECIFIED, buf, size); - - if (size == 1) { - return buf[0]; - } else if (size == 2) { - return lduw_le_p(buf); - } else if (size == 4) { - return ldl_le_p(buf); - } else { - g_assert_not_reached(); - } -} - -static void raven_io_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - PREPPCIState *s = opaque; - uint8_t buf[4]; - - addr = raven_io_address(s, addr); - - if (size == 1) { - buf[0] = val; - } else if (size == 2) { - stw_le_p(buf, val); - } else if (size == 4) { - stl_le_p(buf, val); - } else { - g_assert_not_reached(); - } - - address_space_write(&s->pci_io_as, addr + 0x80000000, - MEMTXATTRS_UNSPECIFIED, buf, size); -} - -static const MemoryRegionOps raven_io_ops = { - .read = raven_io_read, - .write = raven_io_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .impl.max_access_size = 4, - .valid.unaligned = true, -}; - -static int raven_map_irq(PCIDevice *pci_dev, int irq_num) -{ - return (irq_num + (pci_dev->devfn >> 3)) & 1; -} - -static void raven_set_irq(void *opaque, int irq_num, int level) -{ - PREPPCIState *s = opaque; - - qemu_set_irq(s->pci_irqs[irq_num], level); -} - -static AddressSpace *raven_pcihost_set_iommu(PCIBus *bus, void *opaque, - int devfn) -{ - PREPPCIState *s = opaque; - - return &s->bm_as; -} - -static void raven_change_gpio(void *opaque, int n, int level) -{ - PREPPCIState *s = opaque; - - s->contiguous_map = level; -} - -static void raven_pcihost_realizefn(DeviceState *d, Error **errp) -{ - SysBusDevice *dev = SYS_BUS_DEVICE(d); - PCIHostState *h = PCI_HOST_BRIDGE(dev); - PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(dev); - MemoryRegion *address_space_mem = get_system_memory(); - int i; - - if (s->is_legacy_prep) { - for (i = 0; i < PCI_NUM_PINS; i++) { - sysbus_init_irq(dev, &s->pci_irqs[i]); - } - } else { - /* According to PReP specification section 6.1.6 "System Interrupt - * Assignments", all PCI interrupts are routed via IRQ 15 */ - s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ)); - object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS, - &error_fatal); - qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal); - sysbus_init_irq(dev, &s->or_irq->out_irq); - - for (i = 0; i < PCI_NUM_PINS; i++) { - s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i); - } - } - - qdev_init_gpio_in(d, raven_change_gpio, 1); - - pci_bus_irqs(&s->pci_bus, raven_set_irq, raven_map_irq, s, PCI_NUM_PINS); - - memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops, s, - "pci-conf-idx", 4); - memory_region_add_subregion(&s->pci_io, 0xcf8, &h->conf_mem); - - memory_region_init_io(&h->data_mem, OBJECT(h), &pci_host_data_le_ops, s, - "pci-conf-data", 4); - memory_region_add_subregion(&s->pci_io, 0xcfc, &h->data_mem); - - memory_region_init_io(&h->mmcfg, OBJECT(s), &raven_pci_io_ops, s, - "pciio", 0x00400000); - memory_region_add_subregion(address_space_mem, 0x80800000, &h->mmcfg); - - memory_region_init_io(&s->pci_intack, OBJECT(s), &raven_intack_ops, s, - "pci-intack", 1); - memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack); - - /* TODO Remove once realize propagates to child devices. */ - qdev_realize(DEVICE(&s->pci_dev), BUS(&s->pci_bus), errp); -} - -static void raven_pcihost_initfn(Object *obj) -{ - PCIHostState *h = PCI_HOST_BRIDGE(obj); - PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(obj); - MemoryRegion *address_space_mem = get_system_memory(); - DeviceState *pci_dev; - - memory_region_init(&s->pci_io, obj, "pci-io", 0x3f800000); - memory_region_init_io(&s->pci_io_non_contiguous, obj, &raven_io_ops, s, - "pci-io-non-contiguous", 0x00800000); - memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); - address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); - - /* CPU address space */ - memory_region_add_subregion(address_space_mem, 0x80000000, &s->pci_io); - memory_region_add_subregion_overlap(address_space_mem, 0x80000000, - &s->pci_io_non_contiguous, 1); - memory_region_add_subregion(address_space_mem, 0xc0000000, &s->pci_memory); - pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), NULL, - &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS); - - /* Bus master address space */ - memory_region_init(&s->bm, obj, "bm-raven", 4 * GiB); - memory_region_init_alias(&s->bm_pci_memory_alias, obj, "bm-pci-memory", - &s->pci_memory, 0, - memory_region_size(&s->pci_memory)); - memory_region_init_alias(&s->bm_ram_alias, obj, "bm-system", - get_system_memory(), 0, 0x80000000); - memory_region_add_subregion(&s->bm, 0 , &s->bm_pci_memory_alias); - memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias); - address_space_init(&s->bm_as, &s->bm, "raven-bm"); - pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s); - - h->bus = &s->pci_bus; - - object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_RAVEN_PCI_DEVICE); - pci_dev = DEVICE(&s->pci_dev); - object_property_set_int(OBJECT(&s->pci_dev), "addr", PCI_DEVFN(0, 0), - NULL); - qdev_prop_set_bit(pci_dev, "multifunction", false); -} - -static void raven_realize(PCIDevice *d, Error **errp) -{ - RavenPCIState *s = RAVEN_PCI_DEVICE(d); - char *filename; - int bios_size = -1; - - d->config[0x0C] = 0x08; // cache_line_size - d->config[0x0D] = 0x10; // latency_timer - d->config[0x34] = 0x00; // capabilities_pointer - - memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios", BIOS_SIZE, - &error_fatal); - memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE), - &s->bios); - if (s->bios_name) { - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name); - if (filename) { - if (s->elf_machine != EM_NONE) { - bios_size = load_elf(filename, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, 1, s->elf_machine, - 0, 0); - } - if (bios_size < 0) { - bios_size = get_image_size(filename); - if (bios_size > 0 && bios_size <= BIOS_SIZE) { - hwaddr bios_addr; - bios_size = (bios_size + 0xfff) & ~0xfff; - bios_addr = (uint32_t)(-BIOS_SIZE); - bios_size = load_image_targphys(filename, bios_addr, - bios_size); - } - } - } - g_free(filename); - if (bios_size < 0 || bios_size > BIOS_SIZE) { - memory_region_del_subregion(get_system_memory(), &s->bios); - error_setg(errp, "Could not load bios image '%s'", s->bios_name); - return; - } - } - - vmstate_register_ram_global(&s->bios); -} - -static const VMStateDescription vmstate_raven = { - .name = "raven", - .version_id = 0, - .minimum_version_id = 0, - .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(dev, RavenPCIState), - VMSTATE_END_OF_LIST() - }, -}; - -static void raven_class_init(ObjectClass *klass, void *data) -{ - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - k->realize = raven_realize; - k->vendor_id = PCI_VENDOR_ID_MOTOROLA; - k->device_id = PCI_DEVICE_ID_MOTOROLA_RAVEN; - k->revision = 0x00; - k->class_id = PCI_CLASS_BRIDGE_HOST; - dc->desc = "PReP Host Bridge - Motorola Raven"; - dc->vmsd = &vmstate_raven; - /* - * Reason: PCI-facing part of the host bridge, not usable without - * the host-facing part, which can't be device_add'ed, yet. - */ - dc->user_creatable = false; -} - -static const TypeInfo raven_info = { - .name = TYPE_RAVEN_PCI_DEVICE, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(RavenPCIState), - .class_init = raven_class_init, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { }, - }, -}; - -static Property raven_pcihost_properties[] = { - DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine, - EM_NONE), - DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name), - /* Temporary workaround until legacy prep machine is removed */ - DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep, - false), - DEFINE_PROP_END_OF_LIST() -}; - -static void raven_pcihost_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - dc->realize = raven_pcihost_realizefn; - device_class_set_props(dc, raven_pcihost_properties); - dc->fw_name = "pci"; -} - -static const TypeInfo raven_pcihost_info = { - .name = TYPE_RAVEN_PCI_HOST_BRIDGE, - .parent = TYPE_PCI_HOST_BRIDGE, - .instance_size = sizeof(PREPPCIState), - .instance_init = raven_pcihost_initfn, - .class_init = raven_pcihost_class_init, -}; - -static void raven_register_types(void) -{ - type_register_static(&raven_pcihost_info); - type_register_static(&raven_info); -} - -type_init(raven_register_types) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 2eb729dff58..ab5a47aff56 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -29,6 +29,7 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/i386/pc.h" #include "hw/pci-host/q35.h" #include "hw/qdev-properties.h" @@ -64,6 +65,8 @@ static void q35_host_realize(DeviceState *dev, Error **errp) s->mch.address_space_io, 0, TYPE_PCIE_BUS); PC_MACHINE(qdev_get_machine())->bus = pci->bus; + pci->bypass_iommu = + PC_MACHINE(qdev_get_machine())->default_bus_bypass_iommu; qdev_realize(DEVICE(&s->mch), BUS(pci->bus), &error_fatal); } @@ -318,6 +321,8 @@ static void mch_update_pciexbar(MCHPCIState *mch) addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK; break; case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_RVD: + qemu_log_mask(LOG_GUEST_ERROR, "Q35: Reserved PCIEXBAR LENGTH\n"); + return; default: abort(); } diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c new file mode 100644 index 00000000000..6e514f75eb8 --- /dev/null +++ b/hw/pci-host/raven.c @@ -0,0 +1,445 @@ +/* + * QEMU PREP PCI host + * + * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2011-2013 Andreas Färber + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qemu/units.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_host.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "hw/intc/i8259.h" +#include "hw/irq.h" +#include "hw/loader.h" +#include "hw/or-irq.h" +#include "elf.h" +#include "qom/object.h" + +#define TYPE_RAVEN_PCI_DEVICE "raven" +#define TYPE_RAVEN_PCI_HOST_BRIDGE "raven-pcihost" + +OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE) + +struct RavenPCIState { + PCIDevice dev; + + uint32_t elf_machine; + char *bios_name; + MemoryRegion bios; +}; + +typedef struct PRePPCIState PREPPCIState; +DECLARE_INSTANCE_CHECKER(PREPPCIState, RAVEN_PCI_HOST_BRIDGE, + TYPE_RAVEN_PCI_HOST_BRIDGE) + +struct PRePPCIState { + PCIHostState parent_obj; + + qemu_or_irq *or_irq; + qemu_irq pci_irqs[PCI_NUM_PINS]; + PCIBus pci_bus; + AddressSpace pci_io_as; + MemoryRegion pci_io; + MemoryRegion pci_io_non_contiguous; + MemoryRegion pci_memory; + MemoryRegion pci_intack; + MemoryRegion bm; + MemoryRegion bm_ram_alias; + MemoryRegion bm_pci_memory_alias; + AddressSpace bm_as; + RavenPCIState pci_dev; + + int contiguous_map; + bool is_legacy_prep; +}; + +#define BIOS_SIZE (1 * MiB) + +#define PCI_IO_BASE_ADDR 0x80000000 /* Physical address on main bus */ + +static inline uint32_t raven_pci_io_config(hwaddr addr) +{ + int i; + + for (i = 0; i < 11; i++) { + if ((addr & (1 << (11 + i))) != 0) { + break; + } + } + return (addr & 0x7ff) | (i << 11); +} + +static void raven_pci_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + PREPPCIState *s = opaque; + PCIHostState *phb = PCI_HOST_BRIDGE(s); + pci_data_write(phb->bus, raven_pci_io_config(addr), val, size); +} + +static uint64_t raven_pci_io_read(void *opaque, hwaddr addr, + unsigned int size) +{ + PREPPCIState *s = opaque; + PCIHostState *phb = PCI_HOST_BRIDGE(s); + return pci_data_read(phb->bus, raven_pci_io_config(addr), size); +} + +static const MemoryRegionOps raven_pci_io_ops = { + .read = raven_pci_io_read, + .write = raven_pci_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t raven_intack_read(void *opaque, hwaddr addr, + unsigned int size) +{ + return pic_read_irq(isa_pic); +} + +static void raven_intack_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); +} + +static const MemoryRegionOps raven_intack_ops = { + .read = raven_intack_read, + .write = raven_intack_write, + .valid = { + .max_access_size = 1, + }, +}; + +static inline hwaddr raven_io_address(PREPPCIState *s, + hwaddr addr) +{ + if (s->contiguous_map == 0) { + /* 64 KB contiguous space for IOs */ + addr &= 0xFFFF; + } else { + /* 8 MB non-contiguous space for IOs */ + addr = (addr & 0x1F) | ((addr & 0x007FFF000) >> 7); + } + + /* FIXME: handle endianness switch */ + + return addr; +} + +static uint64_t raven_io_read(void *opaque, hwaddr addr, + unsigned int size) +{ + PREPPCIState *s = opaque; + uint8_t buf[4]; + + addr = raven_io_address(s, addr); + address_space_read(&s->pci_io_as, addr + PCI_IO_BASE_ADDR, + MEMTXATTRS_UNSPECIFIED, buf, size); + + if (size == 1) { + return buf[0]; + } else if (size == 2) { + return lduw_le_p(buf); + } else if (size == 4) { + return ldl_le_p(buf); + } else { + g_assert_not_reached(); + } +} + +static void raven_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + PREPPCIState *s = opaque; + uint8_t buf[4]; + + addr = raven_io_address(s, addr); + + if (size == 1) { + buf[0] = val; + } else if (size == 2) { + stw_le_p(buf, val); + } else if (size == 4) { + stl_le_p(buf, val); + } else { + g_assert_not_reached(); + } + + address_space_write(&s->pci_io_as, addr + PCI_IO_BASE_ADDR, + MEMTXATTRS_UNSPECIFIED, buf, size); +} + +static const MemoryRegionOps raven_io_ops = { + .read = raven_io_read, + .write = raven_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl.max_access_size = 4, + .valid.unaligned = true, +}; + +static int raven_map_irq(PCIDevice *pci_dev, int irq_num) +{ + return (irq_num + (pci_dev->devfn >> 3)) & 1; +} + +static void raven_set_irq(void *opaque, int irq_num, int level) +{ + PREPPCIState *s = opaque; + + qemu_set_irq(s->pci_irqs[irq_num], level); +} + +static AddressSpace *raven_pcihost_set_iommu(PCIBus *bus, void *opaque, + int devfn) +{ + PREPPCIState *s = opaque; + + return &s->bm_as; +} + +static void raven_change_gpio(void *opaque, int n, int level) +{ + PREPPCIState *s = opaque; + + s->contiguous_map = level; +} + +static void raven_pcihost_realizefn(DeviceState *d, Error **errp) +{ + SysBusDevice *dev = SYS_BUS_DEVICE(d); + PCIHostState *h = PCI_HOST_BRIDGE(dev); + PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(dev); + MemoryRegion *address_space_mem = get_system_memory(); + int i; + + if (s->is_legacy_prep) { + for (i = 0; i < PCI_NUM_PINS; i++) { + sysbus_init_irq(dev, &s->pci_irqs[i]); + } + } else { + /* According to PReP specification section 6.1.6 "System Interrupt + * Assignments", all PCI interrupts are routed via IRQ 15 */ + s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ)); + object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS, + &error_fatal); + qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal); + sysbus_init_irq(dev, &s->or_irq->out_irq); + + for (i = 0; i < PCI_NUM_PINS; i++) { + s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i); + } + } + + qdev_init_gpio_in(d, raven_change_gpio, 1); + + pci_bus_irqs(&s->pci_bus, raven_set_irq, raven_map_irq, s, PCI_NUM_PINS); + + memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops, s, + "pci-conf-idx", 4); + memory_region_add_subregion(&s->pci_io, 0xcf8, &h->conf_mem); + + memory_region_init_io(&h->data_mem, OBJECT(h), &pci_host_data_le_ops, s, + "pci-conf-data", 4); + memory_region_add_subregion(&s->pci_io, 0xcfc, &h->data_mem); + + memory_region_init_io(&h->mmcfg, OBJECT(s), &raven_pci_io_ops, s, + "pciio", 0x00400000); + memory_region_add_subregion(address_space_mem, 0x80800000, &h->mmcfg); + + memory_region_init_io(&s->pci_intack, OBJECT(s), &raven_intack_ops, s, + "pci-intack", 1); + memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack); + + /* TODO Remove once realize propagates to child devices. */ + qdev_realize(DEVICE(&s->pci_dev), BUS(&s->pci_bus), errp); +} + +static void raven_pcihost_initfn(Object *obj) +{ + PCIHostState *h = PCI_HOST_BRIDGE(obj); + PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(obj); + MemoryRegion *address_space_mem = get_system_memory(); + DeviceState *pci_dev; + + memory_region_init(&s->pci_io, obj, "pci-io", 0x3f800000); + memory_region_init_io(&s->pci_io_non_contiguous, obj, &raven_io_ops, s, + "pci-io-non-contiguous", 0x00800000); + memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); + address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + + /* CPU address space */ + memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, + &s->pci_io); + memory_region_add_subregion_overlap(address_space_mem, PCI_IO_BASE_ADDR, + &s->pci_io_non_contiguous, 1); + memory_region_add_subregion(address_space_mem, 0xc0000000, &s->pci_memory); + pci_root_bus_init(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), NULL, + &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS); + + /* Bus master address space */ + memory_region_init(&s->bm, obj, "bm-raven", 4 * GiB); + memory_region_init_alias(&s->bm_pci_memory_alias, obj, "bm-pci-memory", + &s->pci_memory, 0, + memory_region_size(&s->pci_memory)); + memory_region_init_alias(&s->bm_ram_alias, obj, "bm-system", + get_system_memory(), 0, 0x80000000); + memory_region_add_subregion(&s->bm, 0 , &s->bm_pci_memory_alias); + memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias); + address_space_init(&s->bm_as, &s->bm, "raven-bm"); + pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s); + + h->bus = &s->pci_bus; + + object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_RAVEN_PCI_DEVICE); + pci_dev = DEVICE(&s->pci_dev); + object_property_set_int(OBJECT(&s->pci_dev), "addr", PCI_DEVFN(0, 0), + NULL); + qdev_prop_set_bit(pci_dev, "multifunction", false); +} + +static void raven_realize(PCIDevice *d, Error **errp) +{ + RavenPCIState *s = RAVEN_PCI_DEVICE(d); + char *filename; + int bios_size = -1; + + d->config[0x0C] = 0x08; // cache_line_size + d->config[0x0D] = 0x10; // latency_timer + d->config[0x34] = 0x00; // capabilities_pointer + + memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios", BIOS_SIZE, + &error_fatal); + memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE), + &s->bios); + if (s->bios_name) { + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name); + if (filename) { + if (s->elf_machine != EM_NONE) { + bios_size = load_elf(filename, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, 1, s->elf_machine, + 0, 0); + } + if (bios_size < 0) { + bios_size = get_image_size(filename); + if (bios_size > 0 && bios_size <= BIOS_SIZE) { + hwaddr bios_addr; + bios_size = (bios_size + 0xfff) & ~0xfff; + bios_addr = (uint32_t)(-BIOS_SIZE); + bios_size = load_image_targphys(filename, bios_addr, + bios_size); + } + } + } + g_free(filename); + if (bios_size < 0 || bios_size > BIOS_SIZE) { + memory_region_del_subregion(get_system_memory(), &s->bios); + error_setg(errp, "Could not load bios image '%s'", s->bios_name); + return; + } + } + + vmstate_register_ram_global(&s->bios); +} + +static const VMStateDescription vmstate_raven = { + .name = "raven", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(dev, RavenPCIState), + VMSTATE_END_OF_LIST() + }, +}; + +static void raven_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + k->realize = raven_realize; + k->vendor_id = PCI_VENDOR_ID_MOTOROLA; + k->device_id = PCI_DEVICE_ID_MOTOROLA_RAVEN; + k->revision = 0x00; + k->class_id = PCI_CLASS_BRIDGE_HOST; + dc->desc = "PReP Host Bridge - Motorola Raven"; + dc->vmsd = &vmstate_raven; + /* + * Reason: PCI-facing part of the host bridge, not usable without + * the host-facing part, which can't be device_add'ed, yet. + */ + dc->user_creatable = false; +} + +static const TypeInfo raven_info = { + .name = TYPE_RAVEN_PCI_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(RavenPCIState), + .class_init = raven_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + +static Property raven_pcihost_properties[] = { + DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine, + EM_NONE), + DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name), + /* Temporary workaround until legacy prep machine is removed */ + DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep, + false), + DEFINE_PROP_END_OF_LIST() +}; + +static void raven_pcihost_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->realize = raven_pcihost_realizefn; + device_class_set_props(dc, raven_pcihost_properties); + dc->fw_name = "pci"; +} + +static const TypeInfo raven_pcihost_info = { + .name = TYPE_RAVEN_PCI_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(PREPPCIState), + .instance_init = raven_pcihost_initfn, + .class_init = raven_pcihost_class_init, +}; + +static void raven_register_types(void) +{ + type_register_static(&raven_pcihost_info); + type_register_static(&raven_info); +} + +type_init(raven_register_types) diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c index f41a0cc3013..949ecc21f2b 100644 --- a/hw/pci-host/sabre.c +++ b/hw/pci-host/sabre.c @@ -34,7 +34,6 @@ #include "hw/irq.h" #include "hw/pci-bridge/simba.h" #include "hw/pci-host/sabre.h" -#include "exec/address-spaces.h" #include "qapi/error.h" #include "qemu/log.h" #include "qemu/module.h" diff --git a/hw/pci-host/sh_pci.c b/hw/pci-host/sh_pci.c index 734892f47c7..719d6ca2a6d 100644 --- a/hw/pci-host/sh_pci.c +++ b/hw/pci-host/sh_pci.c @@ -30,7 +30,6 @@ #include "hw/pci/pci_host.h" #include "qemu/bswap.h" #include "qemu/module.h" -#include "exec/address-spaces.h" #include "qom/object.h" #define TYPE_SH_PCI_HOST_BRIDGE "sh_pci" @@ -50,13 +49,12 @@ struct SHPCIState { uint32_t iobr; }; -static void sh_pci_reg_write (void *p, hwaddr addr, uint64_t val, - unsigned size) +static void sh_pci_reg_write(void *p, hwaddr addr, uint64_t val, unsigned size) { SHPCIState *pcic = p; PCIHostState *phb = PCI_HOST_BRIDGE(pcic); - switch(addr) { + switch (addr) { case 0 ... 0xfc: stl_le_p(pcic->dev->config + addr, val); break; @@ -76,13 +74,12 @@ static void sh_pci_reg_write (void *p, hwaddr addr, uint64_t val, } } -static uint64_t sh_pci_reg_read (void *p, hwaddr addr, - unsigned size) +static uint64_t sh_pci_reg_read(void *p, hwaddr addr, unsigned size) { SHPCIState *pcic = p; PCIHostState *phb = PCI_HOST_BRIDGE(pcic); - switch(addr) { + switch (addr) { case 0 ... 0xfc: return ldl_le_p(pcic->dev->config + addr); case 0x1c0: diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events index 7d8063ac421..630e9fcc5e7 100644 --- a/hw/pci-host/trace-events +++ b/hw/pci-host/trace-events @@ -1,8 +1,17 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. + +# bonito.c +bonito_spciconf_small_access(uint64_t addr, unsigned size) "PCI config address is smaller then 32-bit, addr: 0x%"PRIx64", size: %u" # grackle.c grackle_set_irq(int irq_num, int level) "set_irq num %d level %d" +# mv64361.c +mv64361_region_map(const char *name, uint64_t poffs, uint64_t size, uint64_t moffs) "Mapping %s 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 +mv64361_region_enable(const char *op, int num) "Should %s region %d" +mv64361_reg_read(uint64_t addr, uint32_t val) "0x%"PRIx64" -> 0x%x" +mv64361_reg_write(uint64_t addr, uint64_t val) "0x%"PRIx64" <- 0x%"PRIx64 + # sabre.c sabre_set_request(int irq_num) "request irq %d" sabre_clear_request(int irq_num) "clear request irq %d" diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index 3553277f941..f66384fa02d 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -405,9 +405,9 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp) memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 4 * GiB); memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 4 * GiB); - pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci", - &s->pci_mem_space, &s->pci_io_space, - PCI_DEVFN(11, 0), TYPE_PCI_BUS); + pci_root_bus_init(&s->pci_bus, sizeof(s->pci_bus), dev, "pci", + &s->pci_mem_space, &s->pci_io_space, + PCI_DEVFN(11, 0), TYPE_PCI_BUS); h->bus = &s->pci_bus; object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 8f35e13a0cb..e5993c1ef52 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -45,7 +45,6 @@ #include "trace.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" -#include "exec/address-spaces.h" #include "hw/hotplug.h" #include "hw/boards.h" #include "qapi/error.h" @@ -417,10 +416,26 @@ const char *pci_root_bus_path(PCIDevice *dev) return rootbus->qbus.name; } -static void pci_root_bus_init(PCIBus *bus, DeviceState *parent, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, - uint8_t devfn_min) +bool pci_bus_bypass_iommu(PCIBus *bus) +{ + PCIBus *rootbus = bus; + PCIHostState *host_bridge; + + if (!pci_bus_is_root(bus)) { + rootbus = pci_device_root_bus(bus->parent_dev); + } + + host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent); + + assert(host_bridge->bus == rootbus); + + return host_bridge->bypass_iommu; +} + +static void pci_root_bus_internal_init(PCIBus *bus, DeviceState *parent, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min) { assert(PCI_FUNC(devfn_min) == 0); bus->devfn_min = devfn_min; @@ -445,15 +460,15 @@ bool pci_bus_is_express(PCIBus *bus) return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS); } -void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent, - const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, - uint8_t devfn_min, const char *typename) +void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent, + const char *name, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, const char *typename) { - qbus_create_inplace(bus, bus_size, typename, parent, name); - pci_root_bus_init(bus, parent, address_space_mem, address_space_io, - devfn_min); + qbus_init(bus, bus_size, typename, parent, name); + pci_root_bus_internal_init(bus, parent, address_space_mem, + address_space_io, devfn_min); } PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, @@ -463,9 +478,9 @@ PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, { PCIBus *bus; - bus = PCI_BUS(qbus_create(typename, parent, name)); - pci_root_bus_init(bus, parent, address_space_mem, address_space_io, - devfn_min); + bus = PCI_BUS(qbus_new(typename, parent, name)); + pci_root_bus_internal_init(bus, parent, address_space_mem, + address_space_io, devfn_min); return bus; } @@ -522,6 +537,22 @@ int pci_bus_num(PCIBus *s) return PCI_BUS_GET_CLASS(s)->bus_num(s); } +/* Returns the min and max bus numbers of a PCI bus hierarchy */ +void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus) +{ + int i; + *min_bus = *max_bus = pci_bus_num(bus); + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + PCIDevice *dev = bus->devices[i]; + + if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) { + *min_bus = MIN(*min_bus, dev->config[PCI_SECONDARY_BUS]); + *max_bus = MAX(*max_bus, dev->config[PCI_SUBORDINATE_BUS]); + } + } +} + int pci_bus_numa_node(PCIBus *bus) { return PCI_BUS_GET_CLASS(bus)->numa_node(bus); @@ -1349,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); + if (!d->has_power) { + new_addr = PCI_BAR_UNMAPPED; + } /* This bar isn't changed */ if (new_addr == r->addr) @@ -1433,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (range_covers_byte(addr, l, PCI_COMMAND)) { pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, - pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); } msi_write_config(d, addr, val_in, l); @@ -1623,11 +1657,9 @@ static const pci_class_desc pci_class_descriptions[] = { 0, NULL} }; -static void pci_for_each_device_under_bus_reverse(PCIBus *bus, - void (*fn)(PCIBus *b, - PCIDevice *d, - void *opaque), - void *opaque) +void pci_for_each_device_under_bus_reverse(PCIBus *bus, + pci_bus_dev_fn fn, + void *opaque) { PCIDevice *d; int devfn; @@ -1641,8 +1673,7 @@ static void pci_for_each_device_under_bus_reverse(PCIBus *bus, } void pci_for_each_device_reverse(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), - void *opaque) + pci_bus_dev_fn fn, void *opaque) { bus = pci_find_bus_nr(bus, bus_num); @@ -1651,10 +1682,8 @@ void pci_for_each_device_reverse(PCIBus *bus, int bus_num, } } -static void pci_for_each_device_under_bus(PCIBus *bus, - void (*fn)(PCIBus *b, PCIDevice *d, - void *opaque), - void *opaque) +void pci_for_each_device_under_bus(PCIBus *bus, + pci_bus_dev_fn fn, void *opaque) { PCIDevice *d; int devfn; @@ -1668,8 +1697,7 @@ static void pci_for_each_device_under_bus(PCIBus *bus, } void pci_for_each_device(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), - void *opaque) + pci_bus_dev_fn fn, void *opaque) { bus = pci_find_bus_nr(bus, bus_num); @@ -2047,10 +2075,8 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) return NULL; } -void pci_for_each_bus_depth_first(PCIBus *bus, - void *(*begin)(PCIBus *bus, void *parent_state), - void (*end)(PCIBus *bus, void *state), - void *parent_state) +void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin, + pci_bus_fn end, void *parent_state) { PCIBus *sec; void *state; @@ -2159,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_qdev_unrealize(DEVICE(pci_dev)); return; } + + pci_set_power(pci_dev, true); } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, @@ -2719,7 +2747,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (iommu_bus && iommu_bus->iommu_fn) { + if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) { return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn); } return &address_space_memory; @@ -2830,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } +void pci_set_power(PCIDevice *d, bool state) +{ + if (d->has_power == state) { + return; + } + + d->has_power = state; + pci_update_mappings(d); + memory_region_set_enabled(&d->bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); + if (!d->has_power) { + pci_device_reset(d); + } +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index 3789c17edc2..da34c8ebcd1 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -374,8 +374,8 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) br->bus_name = dev->qdev.id; } - qbus_create_inplace(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev), - br->bus_name); + qbus_init(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev), + br->bus_name); sec_bus->parent_dev = dev; sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn; sec_bus->address_space_mem = &br->address_space_mem; @@ -448,11 +448,11 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, PCIBridgeQemuCap cap = { .len = cap_len, .type = REDHAT_PCI_CAP_RESOURCE_RESERVE, - .bus_res = res_reserve.bus, - .io = res_reserve.io, - .mem = res_reserve.mem_non_pref, - .mem_pref_32 = res_reserve.mem_pref_32, - .mem_pref_64 = res_reserve.mem_pref_64 + .bus_res = cpu_to_le32(res_reserve.bus), + .io = cpu_to_le64(res_reserve.io), + .mem = cpu_to_le32(res_reserve.mem_non_pref), + .mem_pref_32 = cpu_to_le32(res_reserve.mem_pref_32), + .mem_pref_64 = cpu_to_le64(res_reserve.mem_pref_64) }; int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 8ca5fadcbd4..7beafd40a8e 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return; } @@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return ~0x0; } @@ -222,6 +224,7 @@ const VMStateDescription vmstate_pcihost = { static Property pci_host_properties_common[] = { DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState, mig_enabled, true), + DEFINE_PROP_BOOL("bypass-iommu", PCIHostState, bypass_iommu, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index fd0fa157e81..d7d73a31e4c 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev) } } +static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + bool *power = opaque; + + pci_set_power(dev, *power); +} + +static void pcie_cap_update_power(PCIDevice *hotplug_dev) +{ + uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev)); + uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + bool power = true; + + if (sltcap & PCI_EXP_SLTCAP_PCP) { + power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; + } + + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_set_power_device, &power); +} + /* * A PCI Express Hot-Plug Event has occurred, so update slot status register * and notify OS of the event if necessary. @@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_DLLLA); } + pcie_cap_update_power(hotplug_pdev); return; } @@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } pcie_cap_slot_event(hotplug_pdev, PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + pcie_cap_update_power(hotplug_pdev); } } @@ -472,6 +497,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) object_unparent(OBJECT(dev)); } +static void pcie_cap_slot_do_unplug(PCIDevice *dev) +{ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + + pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || + (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); +} + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -481,6 +525,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); /* Check if hot-unplug is disabled on the slot */ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { @@ -496,7 +541,15 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) { + error_setg(errp, "Hot-unplug failed: " + "guest is busy (power indicator blinking)"); + return; + } + dev->pending_deleted_event = true; + dev->pending_deleted_expires_ms = + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */ /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, @@ -509,6 +562,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) && + ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) { + /* slot is powered off -> unplug without round-trip to the guest */ + pcie_cap_slot_do_unplug(hotplug_pdev); + hotplug_event_notify(hotplug_pdev); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP); + return; + } + pcie_cap_slot_push_attention_button(hotplug_pdev); } @@ -529,7 +592,13 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) PCI_EXP_SLTCAP_PIP | PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_ABP); - if (s->hotplug) { + + /* + * Enable native hot-plug on all hot-plugged bridges unless + * hot-plug is disabled on the slot. + */ + if (s->hotplug && + (s->native_hotplug || DEVICE(dev)->hotplugged)) { pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, PCI_EXP_SLTCAP_HPS | PCI_EXP_SLTCAP_HPC); @@ -619,6 +688,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_ABP); + pcie_cap_update_power(dev); hotplug_event_update_event_status(dev); } @@ -637,7 +707,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t pos = dev->exp.exp_cap; uint8_t *exp_cap = dev->config + pos; uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); - uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { /* @@ -687,20 +756,9 @@ void pcie_cap_slot_write_config(PCIDevice *dev, (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { - PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - pcie_unplug_device, NULL); - - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || - (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDC); + pcie_cap_slot_do_unplug(dev); } + pcie_cap_update_power(dev); hotplug_event_notify(dev); @@ -727,6 +785,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id) { PCIDevice *dev = opaque; hotplug_event_update_event_status(dev); + pcie_cap_update_power(dev); return 0; } @@ -870,8 +929,8 @@ void pcie_add_capability(PCIDevice *dev, uint16_t offset, uint16_t size) { assert(offset >= PCI_CONFIG_SPACE_SIZE); - assert(offset < offset + size); - assert(offset + size <= PCIE_CONFIG_SPACE_SIZE); + assert(offset < (uint16_t)(offset + size)); + assert((uint16_t)(offset + size) <= PCIE_CONFIG_SPACE_SIZE); assert(size >= 8); assert(pci_is_express(dev)); diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c index 3534006f993..5abbe832202 100644 --- a/hw/pci/pcie_host.c +++ b/hw/pci/pcie_host.c @@ -23,7 +23,6 @@ #include "hw/pci/pci.h" #include "hw/pci/pcie_host.h" #include "qemu/module.h" -#include "exec/address-spaces.h" /* a helper function to get a PCIDevice for a given mmconfig address */ static inline PCIDevice *pcie_dev_find_by_mmcfg_addr(PCIBus *s, diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index eb563ad4354..e95c1e5519c 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -148,6 +148,7 @@ static Property pcie_slot_props[] = { DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), + DEFINE_PROP_BOOL("x-native-hotplug", PCIESlot, native_hotplug, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci/trace-events b/hw/pci/trace-events index def4b3926d4..fc777d0b5e6 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # pci.c pci_update_mappings_del(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 diff --git a/hw/pcmcia/meson.build b/hw/pcmcia/meson.build index ab50bd325d6..51f2512b8ed 100644 --- a/hw/pcmcia/meson.build +++ b/hw/pcmcia/meson.build @@ -1,2 +1,2 @@ -softmmu_ss.add(files('pcmcia.c')) +softmmu_ss.add(when: 'CONFIG_PCMCIA', if_true: files('pcmcia.c')) softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c')) diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index d11dc30509d..400511c6b70 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -3,6 +3,7 @@ config PSERIES imply PCI_DEVICES imply TEST_DEVICES imply VIRTIO_VGA + imply NVDIMM select DIMM select PCI select SPAPR_VSCSI @@ -12,6 +13,7 @@ config PSERIES select MSI_NONBROKEN select FDT_PPC select CHRP_NVRAM + select VOF config SPAPR_RNG bool @@ -68,12 +70,23 @@ config SAM460EX select USB_OHCI select FDT_PPC +config PEGASOS2 + bool + select MV64361 + select VT82C686 + select IDE_VIA + select SMBUS_EEPROM + select VOF +# This should come with VT82C686 + select ACPI_X86 + imply ATI_VGA + config PREP bool imply PCI_DEVICES imply TEST_DEVICES select CS4231A - select PREP_PCI + select RAVEN_PCI select I82378 select LSI_SCSI_PCI select M48T59 @@ -118,6 +131,7 @@ config E500 select SERIAL select MPC_I2C select FDT_PPC + select DS1338 config VIRTEX bool @@ -134,3 +148,6 @@ config FW_CFG_PPC config FDT_PPC bool + +config VOF + bool diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 79467ac5123..960e7efcd31 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -25,7 +25,6 @@ #include "qemu/config-file.h" #include "hw/char/serial.h" #include "hw/pci/pci.h" -#include "hw/boards.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" @@ -39,7 +38,6 @@ #include "hw/loader.h" #include "elf.h" #include "hw/sysbus.h" -#include "exec/address-spaces.h" #include "qemu/host-utils.h" #include "qemu/option.h" #include "hw/pci-host/ppce500.h" @@ -1008,7 +1006,7 @@ void ppce500_init(MachineState *machine) /* Platform Bus Device */ if (pmc->has_platform_bus) { dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE); - dev->id = TYPE_PLATFORM_BUS_DEVICE; + dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE); qdev_prop_set_uint32(dev, "num_irqs", pmc->platform_bus_num_irqs); qdev_prop_set_uint32(dev, "mmio_size", pmc->platform_bus_size); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 21759628466..7bb7ac39975 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -58,7 +58,6 @@ #include "hw/pci/pci.h" #include "net/net.h" #include "sysemu/sysemu.h" -#include "hw/boards.h" #include "hw/nvram/fw_cfg.h" #include "hw/char/escc.h" #include "hw/misc/macio/macio.h" @@ -71,7 +70,6 @@ #include "sysemu/reset.h" #include "kvm_ppc.h" #include "hw/usb.h" -#include "exec/address-spaces.h" #include "hw/sysbus.h" #include "trace.h" @@ -157,6 +155,10 @@ static void ppc_core99_init(MachineState *machine) } /* allocate RAM */ + if (machine->ram_size > 2 * GiB) { + error_report("RAM size more than 2 GiB is not supported"); + exit(1); + } memory_region_add_subregion(get_system_memory(), 0, machine->ram); /* allocate and load firmware ROM */ diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 963d247f5f5..de2be960e6c 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -38,7 +38,6 @@ #include "hw/isa/isa.h" #include "hw/pci/pci.h" #include "hw/pci/pci_host.h" -#include "hw/boards.h" #include "hw/nvram/fw_cfg.h" #include "hw/char/escc.h" #include "hw/misc/macio/macio.h" @@ -49,7 +48,6 @@ #include "sysemu/kvm.h" #include "sysemu/reset.h" #include "kvm_ppc.h" -#include "exec/address-spaces.h" #define MAX_IDE_BUS 2 #define CFG_ADDR 0xf0000510 diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index 218631c883b..aa4c8e6a2ea 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -29,6 +29,9 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( 'spapr_numa.c', 'pef.c', )) +ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files( + 'spapr_softmmu.c', +)) ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c')) ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files( 'spapr_pci_vfio.c', @@ -78,5 +81,10 @@ ppc_ss.add(when: 'CONFIG_E500', if_true: files( )) # PowerPC 440 Xilinx ML507 reference board. ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c')) +# Pegasos2 +ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c')) + +ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c')) +ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c')) hw_arch += {'ppc': ppc_ss} diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c index 573be3ed79b..cc44d5e3396 100644 --- a/hw/ppc/pef.c +++ b/hw/ppc/pef.c @@ -41,7 +41,7 @@ struct PefGuest { ConfidentialGuestSupport parent_obj; }; -static int kvmppc_svm_init(Error **errp) +static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp) { #ifdef CONFIG_KVM static Error *pef_mig_blocker; @@ -65,6 +65,8 @@ static int kvmppc_svm_init(Error **errp) /* NB: This can fail if --only-migratable is used */ migrate_add_blocker(pef_mig_blocker, &error_fatal); + cgs->ready = true; + return 0; #else g_assert_not_reached(); @@ -102,7 +104,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return -1; } - return kvmppc_svm_init(errp); + return kvmppc_svm_init(cgs, errp); } int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c new file mode 100644 index 00000000000..298e6b93e2d --- /dev/null +++ b/hw/ppc/pegasos2.c @@ -0,0 +1,952 @@ +/* + * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator + * + * Copyright (c) 2018-2021 BALATON Zoltan + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/hw.h" +#include "hw/ppc/ppc.h" +#include "hw/sysbus.h" +#include "hw/pci/pci_host.h" +#include "hw/irq.h" +#include "hw/pci-host/mv64361.h" +#include "hw/isa/vt82c686.h" +#include "hw/ide/pci.h" +#include "hw/i2c/smbus_eeprom.h" +#include "hw/qdev-properties.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "sysemu/qtest.h" +#include "hw/boards.h" +#include "hw/loader.h" +#include "hw/fw-path-provider.h" +#include "elf.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "kvm_ppc.h" +#include "exec/address-spaces.h" +#include "qom/qom-qobject.h" +#include "qapi/qmp/qdict.h" +#include "trace.h" +#include "qemu/datadir.h" +#include "sysemu/device_tree.h" +#include "hw/ppc/vof.h" + +#include + +#define PROM_FILENAME "vof.bin" +#define PROM_ADDR 0xfff00000 +#define PROM_SIZE 0x80000 + +#define KVMPPC_HCALL_BASE 0xf000 +#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0) +#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5) + +#define H_SUCCESS 0 +#define H_PRIVILEGE -3 /* Caller not privileged */ +#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */ + +#define BUS_FREQ_HZ 133333333 + +#define PCI0_CFG_ADDR 0xcf8 +#define PCI0_MEM_BASE 0xc0000000 +#define PCI0_MEM_SIZE 0x20000000 +#define PCI0_IO_BASE 0xf8000000 +#define PCI0_IO_SIZE 0x10000 + +#define PCI1_CFG_ADDR 0xc78 +#define PCI1_MEM_BASE 0x80000000 +#define PCI1_MEM_SIZE 0x40000000 +#define PCI1_IO_BASE 0xfe000000 +#define PCI1_IO_SIZE 0x10000 + +#define TYPE_PEGASOS2_MACHINE MACHINE_TYPE_NAME("pegasos2") +OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE) + +struct Pegasos2MachineState { + MachineState parent_obj; + PowerPCCPU *cpu; + DeviceState *mv; + Vof *vof; + void *fdt_blob; + uint64_t kernel_addr; + uint64_t kernel_entry; + uint64_t kernel_size; +}; + +static void *build_fdt(MachineState *machine, int *fdt_size); + +static void pegasos2_cpu_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine); + + cpu_reset(CPU(cpu)); + cpu->env.spr[SPR_HID1] = 7ULL << 28; + if (pm->vof) { + cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20; + cpu->env.nip = 0x100; + } +} + +static void pegasos2_init(MachineState *machine) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); + CPUPPCState *env; + MemoryRegion *rom = g_new(MemoryRegion, 1); + PCIBus *pci_bus; + PCIDevice *dev; + I2CBus *i2c_bus; + const char *fwname = machine->firmware ?: PROM_FILENAME; + char *filename; + int sz; + uint8_t *spd_data; + + /* init CPU */ + pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + env = &pm->cpu->env; + if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) { + error_report("Incompatible CPU, only 6xx bus supported"); + exit(1); + } + + /* Set time-base frequency */ + cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4); + qemu_register_reset(pegasos2_cpu_reset, pm->cpu); + + /* RAM */ + if (machine->ram_size > 2 * GiB) { + error_report("RAM size more than 2 GiB is not supported"); + exit(1); + } + memory_region_add_subregion(get_system_memory(), 0, machine->ram); + + /* allocate and load firmware */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, fwname); + if (!filename) { + error_report("Could not find firmware '%s'", fwname); + exit(1); + } + if (!machine->firmware && !pm->vof) { + pm->vof = g_malloc0(sizeof(*pm->vof)); + } + memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom); + sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, + PPC_ELF_MACHINE, 0, 0); + if (sz <= 0) { + sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE); + } + if (sz <= 0 || sz > PROM_SIZE) { + error_report("Could not load firmware '%s'", filename); + exit(1); + } + g_free(filename); + if (pm->vof) { + pm->vof->fw_size = sz; + } + + /* Marvell Discovery II system controller */ + pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1, + ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT])); + pci_bus = mv64361_get_pci_bus(pm->mv, 1); + + /* VIA VT8231 South Bridge (multifunction PCI device) */ + /* VT8231 function 0: PCI-to-ISA Bridge */ + dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true, + TYPE_VT8231_ISA); + qdev_connect_gpio_out(DEVICE(dev), 0, + qdev_get_gpio_in_named(pm->mv, "gpp", 31)); + + /* VT8231 function 1: IDE Controller */ + dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide"); + pci_ide_create_devs(dev); + + /* VT8231 function 2-3: USB Ports */ + pci_create_simple(pci_bus, PCI_DEVFN(12, 2), "vt82c686b-usb-uhci"); + pci_create_simple(pci_bus, PCI_DEVFN(12, 3), "vt82c686b-usb-uhci"); + + /* VT8231 function 4: Power Management Controller */ + dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 4), TYPE_VT8231_PM); + i2c_bus = I2C_BUS(qdev_get_child_bus(DEVICE(dev), "i2c")); + spd_data = spd_data_generate(DDR, machine->ram_size); + smbus_eeprom_init_one(i2c_bus, 0x57, spd_data); + + /* VT8231 function 5-6: AC97 Audio & Modem */ + pci_create_simple(pci_bus, PCI_DEVFN(12, 5), TYPE_VIA_AC97); + pci_create_simple(pci_bus, PCI_DEVFN(12, 6), TYPE_VIA_MC97); + + /* other PC hardware */ + pci_vga_init(pci_bus); + + if (machine->kernel_filename) { + sz = load_elf(machine->kernel_filename, NULL, NULL, NULL, + &pm->kernel_entry, &pm->kernel_addr, NULL, NULL, 1, + PPC_ELF_MACHINE, 0, 0); + if (sz <= 0) { + error_report("Could not load kernel '%s'", + machine->kernel_filename); + exit(1); + } + pm->kernel_size = sz; + if (!pm->vof) { + warn_report("Option -kernel may be ineffective with -bios."); + } + } else if (pm->vof && !qtest_enabled()) { + warn_report("Using Virtual OpenFirmware but no -kernel option."); + } + + if (!pm->vof && machine->kernel_cmdline && machine->kernel_cmdline[0]) { + warn_report("Option -append may be ineffective with -bios."); + } +} + +static uint32_t pegasos2_mv_reg_read(Pegasos2MachineState *pm, + uint32_t addr, uint32_t len) +{ + MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0); + uint64_t val = 0xffffffffULL; + memory_region_dispatch_read(r, addr, &val, size_memop(len) | MO_LE, + MEMTXATTRS_UNSPECIFIED); + return val; +} + +static void pegasos2_mv_reg_write(Pegasos2MachineState *pm, uint32_t addr, + uint32_t len, uint32_t val) +{ + MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0); + memory_region_dispatch_write(r, addr, val, size_memop(len) | MO_LE, + MEMTXATTRS_UNSPECIFIED); +} + +static uint32_t pegasos2_pci_config_read(Pegasos2MachineState *pm, int bus, + uint32_t addr, uint32_t len) +{ + hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR; + uint64_t val = 0xffffffffULL; + + if (len <= 4) { + pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31)); + val = pegasos2_mv_reg_read(pm, pcicfg + 4, len); + } + return val; +} + +static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus, + uint32_t addr, uint32_t len, uint32_t val) +{ + hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR; + + pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31)); + pegasos2_mv_reg_write(pm, pcicfg + 4, len, val); +} + +static void pegasos2_machine_reset(MachineState *machine) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); + void *fdt; + uint64_t d[2]; + int sz; + + qemu_devices_reset(); + if (!pm->vof) { + return; /* Firmware should set up machine so nothing to do */ + } + + /* Otherwise, set up devices that board firmware would normally do */ + pegasos2_mv_reg_write(pm, 0, 4, 0x28020ff); + pegasos2_mv_reg_write(pm, 0x278, 4, 0xa31fc); + pegasos2_mv_reg_write(pm, 0xf300, 4, 0x11ff0400); + pegasos2_mv_reg_write(pm, 0xf10c, 4, 0x80000000); + pegasos2_mv_reg_write(pm, 0x1c, 4, 0x8000000); + pegasos2_pci_config_write(pm, 0, PCI_COMMAND, 2, PCI_COMMAND_IO | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + pegasos2_pci_config_write(pm, 1, PCI_COMMAND, 2, PCI_COMMAND_IO | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + PCI_INTERRUPT_LINE, 2, 0x9); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x50, 1, 0x2); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + PCI_INTERRUPT_LINE, 2, 0x109); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + PCI_CLASS_PROG, 1, 0xf); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + 0x40, 1, 0xb); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + 0x50, 4, 0x17171717); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + PCI_COMMAND, 2, 0x87); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 2) << 8) | + PCI_INTERRUPT_LINE, 2, 0x409); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 3) << 8) | + PCI_INTERRUPT_LINE, 2, 0x409); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + PCI_INTERRUPT_LINE, 2, 0x9); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + 0x48, 4, 0xf00); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + 0x40, 4, 0x558020); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + 0x90, 4, 0xd00); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 5) << 8) | + PCI_INTERRUPT_LINE, 2, 0x309); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 6) << 8) | + PCI_INTERRUPT_LINE, 2, 0x309); + + /* Device tree and VOF set up */ + vof_init(pm->vof, machine->ram_size, &error_fatal); + if (vof_claim(pm->vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE) == -1) { + error_report("Memory allocation for stack failed"); + exit(1); + } + if (pm->kernel_size && + vof_claim(pm->vof, pm->kernel_addr, pm->kernel_size, 0) == -1) { + error_report("Memory for kernel is in use"); + exit(1); + } + fdt = build_fdt(machine, &sz); + /* FIXME: VOF assumes entry is same as load address */ + d[0] = cpu_to_be64(pm->kernel_entry); + d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr)); + qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d)); + + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); + g_free(pm->fdt_blob); + pm->fdt_blob = fdt; + + vof_build_dt(fdt, pm->vof); + vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe"); + pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); +} + +enum pegasos2_rtas_tokens { + RTAS_RESTART_RTAS = 0, + RTAS_NVRAM_FETCH = 1, + RTAS_NVRAM_STORE = 2, + RTAS_GET_TIME_OF_DAY = 3, + RTAS_SET_TIME_OF_DAY = 4, + RTAS_EVENT_SCAN = 6, + RTAS_CHECK_EXCEPTION = 7, + RTAS_READ_PCI_CONFIG = 8, + RTAS_WRITE_PCI_CONFIG = 9, + RTAS_DISPLAY_CHARACTER = 10, + RTAS_SET_INDICATOR = 11, + RTAS_POWER_OFF = 17, + RTAS_SUSPEND = 18, + RTAS_HIBERNATE = 19, + RTAS_SYSTEM_REBOOT = 20, +}; + +static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm, + target_ulong args_real) +{ + AddressSpace *as = CPU(cpu)->as; + uint32_t token = ldl_be_phys(as, args_real); + uint32_t nargs = ldl_be_phys(as, args_real + 4); + uint32_t nrets = ldl_be_phys(as, args_real + 8); + uint32_t args = args_real + 12; + uint32_t rets = args_real + 12 + nargs * 4; + + if (nrets < 1) { + qemu_log_mask(LOG_GUEST_ERROR, "Too few return values in RTAS call\n"); + return H_PARAMETER; + } + switch (token) { + case RTAS_GET_TIME_OF_DAY: + { + QObject *qo = object_property_get_qobject(qdev_get_machine(), + "rtc-time", &error_fatal); + QDict *qd = qobject_to(QDict, qo); + + if (nargs != 0 || nrets != 8 || !qd) { + stl_be_phys(as, rets, -1); + qobject_unref(qo); + return H_PARAMETER; + } + + stl_be_phys(as, rets, 0); + stl_be_phys(as, rets + 4, qdict_get_int(qd, "tm_year") + 1900); + stl_be_phys(as, rets + 8, qdict_get_int(qd, "tm_mon") + 1); + stl_be_phys(as, rets + 12, qdict_get_int(qd, "tm_mday")); + stl_be_phys(as, rets + 16, qdict_get_int(qd, "tm_hour")); + stl_be_phys(as, rets + 20, qdict_get_int(qd, "tm_min")); + stl_be_phys(as, rets + 24, qdict_get_int(qd, "tm_sec")); + stl_be_phys(as, rets + 28, 0); + qobject_unref(qo); + return H_SUCCESS; + } + case RTAS_READ_PCI_CONFIG: + { + uint32_t addr, len, val; + + if (nargs != 2 || nrets != 2) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + addr = ldl_be_phys(as, args); + len = ldl_be_phys(as, args + 4); + val = pegasos2_pci_config_read(pm, !(addr >> 24), + addr & 0x0fffffff, len); + stl_be_phys(as, rets, 0); + stl_be_phys(as, rets + 4, val); + return H_SUCCESS; + } + case RTAS_WRITE_PCI_CONFIG: + { + uint32_t addr, len, val; + + if (nargs != 3 || nrets != 1) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + addr = ldl_be_phys(as, args); + len = ldl_be_phys(as, args + 4); + val = ldl_be_phys(as, args + 8); + pegasos2_pci_config_write(pm, !(addr >> 24), + addr & 0x0fffffff, len, val); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + } + case RTAS_DISPLAY_CHARACTER: + if (nargs != 1 || nrets != 1) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + qemu_log_mask(LOG_UNIMP, "%c", ldl_be_phys(as, args)); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + case RTAS_POWER_OFF: + { + if (nargs != 2 || nrets != 1) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + } + default: + qemu_log_mask(LOG_UNIMP, "Unknown RTAS token %u (args=%u, rets=%u)\n", + token, nargs, nrets); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + } +} + +static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp); + CPUPPCState *env = &cpu->env; + + /* The TCG path should also be holding the BQL at this point */ + g_assert(qemu_mutex_iothread_locked()); + + if (msr_pr) { + qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n"); + env->gpr[3] = H_PRIVILEGE; + } else if (env->gpr[3] == KVMPPC_H_RTAS) { + env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]); + } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) { + int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob, + env->gpr[4]); + env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "Unsupported hypercall " TARGET_FMT_lx + "\n", env->gpr[3]); + env->gpr[3] = -1; + } +} + +static void vhyp_nop(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) +{ +} + +static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp) +{ + return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1]; +} + +static bool pegasos2_setprop(MachineState *ms, const char *path, + const char *propname, void *val, int vallen) +{ + return true; +} + +static void pegasos2_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc); + VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc); + + mc->desc = "Genesi/bPlan Pegasos II"; + mc->init = pegasos2_init; + mc->reset = pegasos2_machine_reset; + mc->block_default_type = IF_IDE; + mc->default_boot_order = "cd"; + mc->default_display = "std"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9"); + mc->default_ram_id = "pegasos2.ram"; + mc->default_ram_size = 512 * MiB; + + vhc->hypercall = pegasos2_hypercall; + vhc->cpu_exec_enter = vhyp_nop; + vhc->cpu_exec_exit = vhyp_nop; + vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr; + + vmc->setprop = pegasos2_setprop; +} + +static const TypeInfo pegasos2_machine_info = { + .name = TYPE_PEGASOS2_MACHINE, + .parent = TYPE_MACHINE, + .class_init = pegasos2_machine_class_init, + .instance_size = sizeof(Pegasos2MachineState), + .interfaces = (InterfaceInfo[]) { + { TYPE_PPC_VIRTUAL_HYPERVISOR }, + { TYPE_VOF_MACHINE_IF }, + { } + }, +}; + +static void pegasos2_machine_register_types(void) +{ + type_register_static(&pegasos2_machine_info); +} + +type_init(pegasos2_machine_register_types) + +/* FDT creation for passing to firmware */ + +typedef struct { + void *fdt; + const char *path; +} FDTInfo; + +/* We do everything in reverse order so it comes out right in the tree */ + +static void dt_ide(PCIBus *bus, PCIDevice *d, FDTInfo *fi) +{ + qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "spi"); +} + +static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi) +{ + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 0); + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 1); + qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb"); +} + +static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi) +{ + GString *name = g_string_sized_new(64); + uint32_t cells[3]; + + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1); + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2); + qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa"); + qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa"); + + /* addional devices */ + g_string_printf(name, "%s/lpt@i3bc", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(7); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x3bc); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt"); + + g_string_printf(name, "%s/fdc@i3f0", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(6); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x3f0); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc"); + + g_string_printf(name, "%s/timer@i40", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x40); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer"); + + g_string_printf(name, "%s/rtc@i70", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc"); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(8); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x70); + cells[2] = cpu_to_be32(2); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc"); + + g_string_printf(name, "%s/keyboard@i60", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + cells[0] = cpu_to_be32(1); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x60); + cells[2] = cpu_to_be32(5); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard"); + + g_string_printf(name, "%s/8042@i60", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2); + qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0); + qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1); + qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", ""); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x60); + cells[2] = cpu_to_be32(5); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", ""); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042"); + + g_string_printf(name, "%s/serial@i2f8", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(3); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x2f8); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial"); + + g_string_free(name, TRUE); +} + +static struct { + const char *id; + const char *name; + void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi); +} device_map[] = { + { "pci11ab,6460", "host", NULL }, + { "pci1106,8231", "isa", dt_isa }, + { "pci1106,571", "ide", dt_ide }, + { "pci1106,3044", "firewire", NULL }, + { "pci1106,3038", "usb", dt_usb }, + { "pci1106,8235", "other", NULL }, + { "pci1106,3058", "sound", NULL }, + { NULL, NULL } +}; + +static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque) +{ + FDTInfo *fi = opaque; + GString *node = g_string_new(NULL); + uint32_t cells[(PCI_NUM_REGIONS + 1) * 5]; + int i, j; + const char *name = NULL; + g_autofree const gchar *pn = g_strdup_printf("pci%x,%x", + pci_get_word(&d->config[PCI_VENDOR_ID]), + pci_get_word(&d->config[PCI_DEVICE_ID])); + + for (i = 0; device_map[i].id; i++) { + if (!strcmp(pn, device_map[i].id)) { + name = device_map[i].name; + break; + } + } + g_string_printf(node, "%s/%s@%x", fi->path, (name ?: pn), + PCI_SLOT(d->devfn)); + if (PCI_FUNC(d->devfn)) { + g_string_append_printf(node, ",%x", PCI_FUNC(d->devfn)); + } + + qemu_fdt_add_subnode(fi->fdt, node->str); + if (device_map[i].dtf) { + FDTInfo cfi = { fi->fdt, node->str }; + device_map[i].dtf(bus, d, &cfi); + } + cells[0] = cpu_to_be32(d->devfn << 8); + cells[1] = 0; + cells[2] = 0; + cells[3] = 0; + cells[4] = 0; + j = 5; + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!d->io_regions[i].size) { + continue; + } + cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4)); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { + cells[j] |= cpu_to_be32(1 << 24); + } else { + cells[j] |= cpu_to_be32(2 << 24); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) { + cells[j] |= cpu_to_be32(4 << 28); + } + } + cells[j + 1] = 0; + cells[j + 2] = 0; + cells[j + 3] = cpu_to_be32(d->io_regions[i].size >> 32); + cells[j + 4] = cpu_to_be32(d->io_regions[i].size); + j += 5; + } + qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn); + if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) { + qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts", + pci_get_byte(&d->config[PCI_INTERRUPT_PIN])); + } + /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */ + qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id", + pci_get_word(&d->config[PCI_SUBSYSTEM_ID])); + qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id", + pci_get_word(&d->config[PCI_SUBSYSTEM_VENDOR_ID])); + cells[0] = pci_get_long(&d->config[PCI_CLASS_REVISION]); + qemu_fdt_setprop_cell(fi->fdt, node->str, "class-code", cells[0] >> 8); + qemu_fdt_setprop_cell(fi->fdt, node->str, "revision-id", cells[0] & 0xff); + qemu_fdt_setprop_cell(fi->fdt, node->str, "device-id", + pci_get_word(&d->config[PCI_DEVICE_ID])); + qemu_fdt_setprop_cell(fi->fdt, node->str, "vendor-id", + pci_get_word(&d->config[PCI_VENDOR_ID])); + + g_string_free(node, TRUE); +} + +static void *build_fdt(MachineState *machine, int *fdt_size) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); + PowerPCCPU *cpu = pm->cpu; + PCIBus *pci_bus; + FDTInfo fi; + uint32_t cells[16]; + void *fdt = create_device_tree(fdt_size); + + fi.fdt = fdt; + + /* root node */ + qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description", + "Pegasos CHRP PowerPC System"); + qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2"); + qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH"); + qemu_fdt_setprop_string(fdt, "/", "revision", "2B"); + qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2"); + qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp"); + qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1); + qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2"); + + /* pci@c0000000 */ + qemu_fdt_add_subnode(fdt, "/pci@c0000000"); + cells[0] = 0; + cells[1] = 0; + qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range", + cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1); + cells[0] = cpu_to_be32(PCI0_MEM_BASE); + cells[1] = cpu_to_be32(PCI0_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(0x01000000); + cells[1] = 0; + cells[2] = 0; + cells[3] = cpu_to_be32(PCI0_IO_BASE); + cells[4] = 0; + cells[5] = cpu_to_be32(PCI0_IO_SIZE); + cells[6] = cpu_to_be32(0x02000000); + cells[7] = 0; + cells[8] = cpu_to_be32(PCI0_MEM_BASE); + cells[9] = cpu_to_be32(PCI0_MEM_BASE); + cells[10] = 0; + cells[11] = cpu_to_be32(PCI0_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges", + cells, 12 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2); + qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3); + qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci"); + qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci"); + + fi.path = "/pci@c0000000"; + pci_bus = mv64361_get_pci_bus(pm->mv, 0); + pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); + + /* pci@80000000 */ + qemu_fdt_add_subnode(fdt, "/pci@80000000"); + cells[0] = 0; + cells[1] = 0; + qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range", + cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0); + cells[0] = cpu_to_be32(PCI1_MEM_BASE); + cells[1] = cpu_to_be32(PCI1_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge", + 0xf1000cb4); + cells[0] = cpu_to_be32(0x01000000); + cells[1] = 0; + cells[2] = 0; + cells[3] = cpu_to_be32(PCI1_IO_BASE); + cells[4] = 0; + cells[5] = cpu_to_be32(PCI1_IO_SIZE); + cells[6] = cpu_to_be32(0x02000000); + cells[7] = 0; + cells[8] = cpu_to_be32(PCI1_MEM_BASE); + cells[9] = cpu_to_be32(PCI1_MEM_BASE); + cells[10] = 0; + cells[11] = cpu_to_be32(PCI1_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@80000000", "ranges", + cells, 12 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3); + qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci"); + qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci"); + + fi.path = "/pci@80000000"; + pci_bus = mv64361_get_pci_bus(pm->mv, 1); + pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); + + qemu_fdt_add_subnode(fdt, "/failsafe"); + qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial"); + qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe"); + + qemu_fdt_add_subnode(fdt, "/rtas"); + qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT); + qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE); + qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND); + qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF); + qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR); + qemu_fdt_setprop_cell(fdt, "/rtas", "display-character", + RTAS_DISPLAY_CHARACTER); + qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config", + RTAS_WRITE_PCI_CONFIG); + qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config", + RTAS_READ_PCI_CONFIG); + /* Pegasos2 firmware misspells check-exception and guests use that */ + qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption", + RTAS_CHECK_EXCEPTION); + qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN); + qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day", + RTAS_SET_TIME_OF_DAY); + qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day", + RTAS_GET_TIME_OF_DAY); + qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE); + qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH); + qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1); + + /* cpus */ + qemu_fdt_add_subnode(fdt, "/cpus"); + qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1); + qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1); + qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0); + qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus"); + + /* FIXME Get CPU name from CPU object */ + const char *cp = "/cpus/PowerPC,G4"; + qemu_fdt_add_subnode(fdt, cp); + qemu_fdt_setprop_cell(fdt, cp, "l2cr", 0); + qemu_fdt_setprop_cell(fdt, cp, "d-cache-size", 0x8000); + qemu_fdt_setprop_cell(fdt, cp, "d-cache-block-size", + cpu->env.dcache_line_size); + qemu_fdt_setprop_cell(fdt, cp, "d-cache-line-size", + cpu->env.dcache_line_size); + qemu_fdt_setprop_cell(fdt, cp, "i-cache-size", 0x8000); + qemu_fdt_setprop_cell(fdt, cp, "i-cache-block-size", + cpu->env.icache_line_size); + qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size", + cpu->env.icache_line_size); + if (cpu->env.id_tlbs) { + qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways); + qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way); + qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways); + qemu_fdt_setprop_cell(fdt, cp, "d-tlb-size", cpu->env.tlb_per_way); + qemu_fdt_setprop_string(fdt, cp, "tlb-split", ""); + } + qemu_fdt_setprop_cell(fdt, cp, "tlb-sets", cpu->env.nb_ways); + qemu_fdt_setprop_cell(fdt, cp, "tlb-size", cpu->env.nb_tlb); + qemu_fdt_setprop_string(fdt, cp, "state", "running"); + if (cpu->env.insns_flags & PPC_ALTIVEC) { + qemu_fdt_setprop_string(fdt, cp, "altivec", ""); + qemu_fdt_setprop_string(fdt, cp, "data-streams", ""); + } + /* + * FIXME What flags do data-streams, external-control and + * performance-monitor depend on? + */ + qemu_fdt_setprop_string(fdt, cp, "external-control", ""); + if (cpu->env.insns_flags & PPC_FLOAT_FSQRT) { + qemu_fdt_setprop_string(fdt, cp, "general-purpose", ""); + } + qemu_fdt_setprop_string(fdt, cp, "performance-monitor", ""); + if (cpu->env.insns_flags & PPC_FLOAT_FRES) { + qemu_fdt_setprop_string(fdt, cp, "graphics", ""); + } + qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4); + qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency", + cpu->env.tb_env->tb_freq); + qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ); + qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5); + qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]); + cells[0] = 0; + cells[1] = 0; + qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu"); + qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1); + + /* memory */ + qemu_fdt_add_subnode(fdt, "/memory@0"); + cells[0] = 0; + cells[1] = cpu_to_be32(machine->ram_size); + qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory"); + qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory"); + + qemu_fdt_add_subnode(fdt, "/chosen"); + qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", + machine->kernel_cmdline ?: ""); + qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen"); + + qemu_fdt_add_subnode(fdt, "/openprom"); + qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1"); + + return fdt; +} diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 77af846cdfe..71e45515f13 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -32,14 +32,12 @@ #include "sysemu/device_tree.h" #include "sysemu/hw_accel.h" #include "target/ppc/cpu.h" -#include "qemu/log.h" #include "hw/ppc/fdt.h" #include "hw/ppc/ppc.h" #include "hw/ppc/pnv.h" #include "hw/ppc/pnv_core.h" #include "hw/loader.h" #include "hw/nmi.h" -#include "exec/address-spaces.h" #include "qapi/visitor.h" #include "monitor/monitor.h" #include "hw/intc/intc.h" @@ -53,7 +51,6 @@ #include "hw/ppc/pnv_pnor.h" #include "hw/isa/isa.h" -#include "hw/boards.h" #include "hw/char/serial.h" #include "hw/rtc/mc146818rtc.h" @@ -199,7 +196,7 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); - if (env->spr_cb[SPR_PURR].oea_read) { + if (ppc_has_spr(cpu, SPR_PURR)) { _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); } @@ -713,6 +710,25 @@ static void pnv_chip_power10_pic_print_info(PnvChip *chip, Monitor *mon) pnv_psi_pic_print_info(&chip10->psi, mon); } +/* Always give the first 1GB to chip 0 else we won't boot */ +static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id) +{ + MachineState *machine = MACHINE(pnv); + uint64_t ram_per_chip; + + assert(machine->ram_size >= 1 * GiB); + + ram_per_chip = machine->ram_size / pnv->num_chips; + if (ram_per_chip >= 1 * GiB) { + return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB); + } + + assert(pnv->num_chips > 1); + + ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1); + return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB); +} + static void pnv_init(MachineState *machine) { const char *bios_name = machine->firmware ?: FW_FILE_NAME; @@ -720,6 +736,7 @@ static void pnv_init(MachineState *machine) MachineClass *mc = MACHINE_GET_CLASS(machine); char *fw_filename; long fw_size; + uint64_t chip_ram_start = 0; int i; char *chip_typename; DriveInfo *pnor = drive_get(IF_MTD, 0, 0); @@ -812,9 +829,10 @@ static void pnv_init(MachineState *machine) * TODO: should we decide on how many chips we can create based * on #cores and Venice vs. Murano vs. Naples chip type etc..., */ - if (!is_power_of_2(pnv->num_chips) || pnv->num_chips > 4) { + if (!is_power_of_2(pnv->num_chips) || pnv->num_chips > 16) { error_report("invalid number of chips: '%d'", pnv->num_chips); - error_printf("Try '-smp sockets=N'. Valid values are : 1, 2 or 4.\n"); + error_printf( + "Try '-smp sockets=N'. Valid values are : 1, 2, 4, 8 and 16.\n"); exit(1); } @@ -822,22 +840,20 @@ static void pnv_init(MachineState *machine) for (i = 0; i < pnv->num_chips; i++) { char chip_name[32]; Object *chip = OBJECT(qdev_new(chip_typename)); + uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, i); pnv->chips[i] = PNV_CHIP(chip); - /* - * TODO: put all the memory in one node on chip 0 until we find a - * way to specify different ranges for each chip - */ - if (i == 0) { - object_property_set_int(chip, "ram-size", machine->ram_size, - &error_fatal); - } + /* Distribute RAM among the chips */ + object_property_set_int(chip, "ram-start", chip_ram_start, + &error_fatal); + object_property_set_int(chip, "ram-size", chip_ram_size, + &error_fatal); + chip_ram_start += chip_ram_size; - snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i)); + snprintf(chip_name, sizeof(chip_name), "chip[%d]", i); object_property_add_child(OBJECT(pnv), chip_name, chip); - object_property_set_int(chip, "chip-id", PNV_CHIP_HWID(i), - &error_fatal); + object_property_set_int(chip, "chip-id", i, &error_fatal); object_property_set_int(chip, "nr-cores", machine->smp.cores, &error_fatal); object_property_set_int(chip, "nr-threads", machine->smp.threads, @@ -1354,10 +1370,10 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp) sizeof(*eq), TYPE_PNV_QUAD, &error_fatal, NULL); - object_property_set_int(OBJECT(eq), "id", core_id, &error_fatal); + object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal); qdev_realize(DEVICE(eq), NULL, &error_fatal); - pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->id), + pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id), &eq->xscom_regs); } } @@ -1919,7 +1935,7 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) static const char compat[] = "qemu,powernv10\0ibm,powernv"; mc->desc = "IBM PowerNV (Non-Virtualized) POWER10"; - mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v1.0"); + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0"); pmc->compat = compat; pmc->compat_size = sizeof(compat); diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index bd2bf2e0447..19e8eb885f7 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -29,6 +29,7 @@ #include "hw/ppc/pnv_xscom.h" #include "hw/ppc/xics.h" #include "hw/qdev-properties.h" +#include "helper_regs.h" static const char *pnv_core_cpu_typename(PnvCore *pc) { @@ -55,8 +56,8 @@ static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) env->gpr[3] = PNV_FDT_ADDR; env->nip = 0x10; env->msr |= MSR_HVB; /* Hypervisor mode */ - env->spr[SPR_HRMOR] = pc->hrmor; + hreg_compute_hflags(env); pcc->intc_reset(pc->chip, cpu); } @@ -346,7 +347,7 @@ static const TypeInfo pnv_core_infos[] = { DEFINE_PNV_CORE_TYPE(power8, "power8_v2.0"), DEFINE_PNV_CORE_TYPE(power8, "power8nvl_v1.0"), DEFINE_PNV_CORE_TYPE(power9, "power9_v2.0"), - DEFINE_PNV_CORE_TYPE(power10, "power10_v1.0"), + DEFINE_PNV_CORE_TYPE(power10, "power10_v2.0"), }; DEFINE_TYPES(pnv_core_infos) @@ -406,13 +407,13 @@ static void pnv_quad_realize(DeviceState *dev, Error **errp) PnvQuad *eq = PNV_QUAD(dev); char name[32]; - snprintf(name, sizeof(name), "xscom-quad.%d", eq->id); + snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id); pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops, eq, name, PNV9_XSCOM_EQ_SIZE); } static Property pnv_quad_properties[] = { - DEFINE_PROP_UINT32("id", PnvQuad, id, 0), + DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c index 4b455de1ea7..83ecccca28d 100644 --- a/hw/ppc/pnv_pnor.c +++ b/hw/ppc/pnv_pnor.c @@ -10,7 +10,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "qemu/log.h" #include "qemu/units.h" #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" @@ -37,7 +36,7 @@ static void pnv_pnor_update(PnvPnor *s, int offset, int size) int offset_end; int ret; - if (s->blk) { + if (!s->blk || !blk_is_writable(s->blk)) { return; } diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c index 3e868c8c8da..cd9a2c5952a 100644 --- a/hw/ppc/pnv_psi.c +++ b/hw/ppc/pnv_psi.c @@ -26,7 +26,6 @@ #include "qapi/error.h" #include "monitor/monitor.h" -#include "exec/address-spaces.h" #include "hw/ppc/fdt.h" #include "hw/ppc/pnv.h" @@ -466,7 +465,7 @@ static void pnv_psi_reset(DeviceState *dev) static void pnv_psi_reset_handler(void *dev) { - device_legacy_reset(DEVICE(dev)); + device_cold_reset(DEVICE(dev)); } static void pnv_psi_realize(DeviceState *dev, Error **errp) @@ -710,7 +709,7 @@ static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr, break; case PSIHB9_INTERRUPT_CONTROL: if (val & PSIHB9_IRQ_RESET) { - device_legacy_reset(DEVICE(&psi9->source)); + device_cold_reset(DEVICE(&psi9->source)); } psi->regs[reg] = val; break; diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c index be7018e8ac5..9ce018dbc27 100644 --- a/hw/ppc/pnv_xscom.c +++ b/hw/ppc/pnv_xscom.c @@ -284,6 +284,12 @@ int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset, _FDT(xscom_offset); g_free(name); _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id))); + /* + * On P10, the xscom bus id has been deprecated and the chip id is + * calculated from the "Primary topology table index". See skiboot. + */ + _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index", + chip->chip_id))); _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1))); _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1))); _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg)))); diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index bf28d6bfc8d..e8127599c90 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -23,7 +23,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "hw/irq.h" #include "hw/ppc/ppc.h" #include "hw/ppc/ppc_e500.h" @@ -38,22 +37,6 @@ #include "migration/vmstate.h" #include "trace.h" -//#define PPC_DEBUG_IRQ -//#define PPC_DEBUG_TB - -#ifdef PPC_DEBUG_IRQ -# define LOG_IRQ(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__) -#else -# define LOG_IRQ(...) do { } while (0) -#endif - - -#ifdef PPC_DEBUG_TB -# define LOG_TB(...) qemu_log(__VA_ARGS__) -#else -# define LOG_TB(...) do { } while (0) -#endif - static void cpu_ppc_tb_stop (CPUPPCState *env); static void cpu_ppc_tb_start (CPUPPCState *env); @@ -87,9 +70,8 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level) } - LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32 - "req %08x\n", __func__, env, n_IRQ, level, - env->pending_interrupts, CPU(cpu)->interrupt_request); + trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts, + CPU(cpu)->interrupt_request); if (locked) { qemu_mutex_unlock_iothread(); @@ -103,8 +85,8 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { @@ -113,8 +95,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) switch (pin) { case PPC6xx_INPUT_TBEN: /* Level sensitive - active high */ - LOG_IRQ("%s: %s the time base\n", - __func__, level ? "start" : "stop"); + trace_ppc_irq_set_state("time base", level); if (level) { cpu_ppc_tb_start(env); } else { @@ -123,14 +104,12 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) break; case PPC6xx_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPC6xx_INPUT_SMI: /* Level sensitive - active high */ - LOG_IRQ("%s: set the SMI IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("SMI IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level); break; case PPC6xx_INPUT_MCP: @@ -139,8 +118,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) * 603/604/740/750: check HID0[EMCP] */ if (cur_level == 1 && level == 0) { - LOG_IRQ("%s: raise machine check state\n", - __func__); + trace_ppc_irq_set_state("machine check", 1); ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1); } break; @@ -149,26 +127,23 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) /* XXX: TODO: relay the signal to CKSTP_OUT pin */ /* XXX: Note that the only way to restart the CPU is to reset it */ if (level) { - LOG_IRQ("%s: stop the CPU\n", __func__); + trace_ppc_irq_cpu("stop"); cs->halted = 1; } break; case PPC6xx_INPUT_HRESET: /* Level sensitive - active low */ if (level) { - LOG_IRQ("%s: reset the CPU\n", __func__); + trace_ppc_irq_reset("CPU"); cpu_interrupt(cs, CPU_INTERRUPT_RESET); } break; case PPC6xx_INPUT_SRESET: - LOG_IRQ("%s: set the RESET IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("RESET IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -193,8 +168,8 @@ static void ppc970_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { @@ -203,14 +178,12 @@ static void ppc970_set_irq(void *opaque, int pin, int level) switch (pin) { case PPC970_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPC970_INPUT_THINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the SMI IRQ state to %d\n", __func__, - level); + trace_ppc_irq_set_state("SMI IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level); break; case PPC970_INPUT_MCP: @@ -219,8 +192,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level) * 603/604/740/750: check HID0[EMCP] */ if (cur_level == 1 && level == 0) { - LOG_IRQ("%s: raise machine check state\n", - __func__); + trace_ppc_irq_set_state("machine check", 1); ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1); } break; @@ -228,10 +200,10 @@ static void ppc970_set_irq(void *opaque, int pin, int level) /* Level sensitive - active low */ /* XXX: TODO: relay the signal to CKSTP_OUT pin */ if (level) { - LOG_IRQ("%s: stop the CPU\n", __func__); + trace_ppc_irq_cpu("stop"); cs->halted = 1; } else { - LOG_IRQ("%s: restart the CPU\n", __func__); + trace_ppc_irq_cpu("restart"); cs->halted = 0; qemu_cpu_kick(cs); } @@ -243,19 +215,15 @@ static void ppc970_set_irq(void *opaque, int pin, int level) } break; case PPC970_INPUT_SRESET: - LOG_IRQ("%s: set the RESET IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("RESET IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level); break; case PPC970_INPUT_TBEN: - LOG_IRQ("%s: set the TBEN state to %d\n", __func__, - level); + trace_ppc_irq_set_state("TBEN IRQ", level); /* XXX: TODO */ break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -277,20 +245,16 @@ static void power7_set_irq(void *opaque, int pin, int level) { PowerPCCPU *cpu = opaque; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - &cpu->env, pin, level); + trace_ppc_irq_set(&cpu->env, pin, level); switch (pin) { case POWER7_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } } @@ -307,25 +271,21 @@ static void power9_set_irq(void *opaque, int pin, int level) { PowerPCCPU *cpu = opaque; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - &cpu->env, pin, level); + trace_ppc_irq_set(&cpu->env, pin, level); switch (pin) { case POWER9_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case POWER9_INPUT_HINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("HV external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + g_assert_not_reached(); return; } } @@ -376,6 +336,8 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val) { PowerPCCPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + switch ((val >> 28) & 0x3) { case 0x0: /* No action */ @@ -393,6 +355,8 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val) ppc40x_system_reset(cpu); break; } + + qemu_mutex_unlock_iothread(); } /* PowerPC 40x internal IRQ controller */ @@ -402,8 +366,8 @@ static void ppc40x_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { @@ -412,57 +376,51 @@ static void ppc40x_set_irq(void *opaque, int pin, int level) switch (pin) { case PPC40x_INPUT_RESET_SYS: if (level) { - LOG_IRQ("%s: reset the PowerPC system\n", - __func__); + trace_ppc_irq_reset("system"); ppc40x_system_reset(cpu); } break; case PPC40x_INPUT_RESET_CHIP: if (level) { - LOG_IRQ("%s: reset the PowerPC chip\n", __func__); + trace_ppc_irq_reset("chip"); ppc40x_chip_reset(cpu); } break; case PPC40x_INPUT_RESET_CORE: /* XXX: TODO: update DBSR[MRR] */ if (level) { - LOG_IRQ("%s: reset the PowerPC core\n", __func__); + trace_ppc_irq_reset("core"); ppc40x_core_reset(cpu); } break; case PPC40x_INPUT_CINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the critical IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("critical IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level); break; case PPC40x_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPC40x_INPUT_HALT: /* Level sensitive - active low */ if (level) { - LOG_IRQ("%s: stop the CPU\n", __func__); + trace_ppc_irq_cpu("stop"); cs->halted = 1; } else { - LOG_IRQ("%s: restart the CPU\n", __func__); + trace_ppc_irq_cpu("restart"); cs->halted = 0; qemu_cpu_kick(cs); } break; case PPC40x_INPUT_DEBUG: /* Level sensitive - active high */ - LOG_IRQ("%s: set the debug pin state to %d\n", - __func__, level); + trace_ppc_irq_set_state("debug pin", level); ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -486,47 +444,41 @@ static void ppce500_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { switch (pin) { case PPCE500_INPUT_MCK: if (level) { - LOG_IRQ("%s: reset the PowerPC system\n", - __func__); + trace_ppc_irq_reset("system"); qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case PPCE500_INPUT_RESET_CORE: if (level) { - LOG_IRQ("%s: reset the PowerPC core\n", __func__); + trace_ppc_irq_reset("core"); ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level); } break; case PPCE500_INPUT_CINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the critical IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("critical IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level); break; case PPCE500_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the core IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("core IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPCE500_INPUT_DEBUG: /* Level sensitive - active high */ - LOG_IRQ("%s: set the debug pin state to %d\n", - __func__, level); + trace_ppc_irq_set_state("debug pin", level); ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -577,7 +529,7 @@ uint64_t cpu_ppc_load_tbl (CPUPPCState *env) } tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb; } @@ -588,7 +540,7 @@ static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env) uint64_t tb; tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb >> 32; } @@ -608,8 +560,7 @@ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, *tb_offsetp = value - muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND); - LOG_TB("%s: tb %016" PRIx64 " offset %08" PRIx64 "\n", - __func__, value, *tb_offsetp); + trace_ppc_tb_store(value, *tb_offsetp); } void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value) @@ -645,7 +596,7 @@ uint64_t cpu_ppc_load_atbl (CPUPPCState *env) uint64_t tb; tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb; } @@ -656,7 +607,7 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env) uint64_t tb; tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb >> 32; } @@ -775,7 +726,7 @@ static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) } else { decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); } - LOG_TB("%s: %016" PRIx64 "\n", __func__, decr); + trace_ppc_decr_load(decr); return decr; } @@ -834,7 +785,7 @@ uint64_t cpu_ppc_load_purr (CPUPPCState *env) static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu) { /* Raise it */ - LOG_TB("raise decrementer exception\n"); + trace_ppc_decr_excp("raise"); ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1); } @@ -848,7 +799,7 @@ static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu) CPUPPCState *env = &cpu->env; /* Raise it */ - LOG_TB("raise hv decrementer exception\n"); + trace_ppc_decr_excp("raise HV"); /* The architecture specifies that we don't deliver HDEC * interrupts in a PM state. Not only they don't cause a @@ -874,17 +825,14 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, CPUPPCState *env = &cpu->env; ppc_tb_t *tb_env = env->tb_env; uint64_t now, next; - bool negative; + int64_t signed_value; + int64_t signed_decr; /* Truncate value to decr_width and sign extend for simplicity */ - value &= ((1ULL << nr_bits) - 1); - negative = !!(value & (1ULL << (nr_bits - 1))); - if (negative) { - value |= (0xFFFFFFFFULL << nr_bits); - } + signed_value = sextract64(value, 0, nr_bits); + signed_decr = sextract64(decr, 0, nr_bits); - LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n", __func__, - decr, value); + trace_ppc_decr_store(nr_bits, decr, value); if (kvm_enabled()) { /* KVM handles decrementer exceptions, we don't need our own timer */ @@ -905,15 +853,15 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, * an edge interrupt, so raise it here too. */ if ((value < 3) || - ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative) || - ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && negative - && !(decr & (1ULL << (nr_bits - 1))))) { + ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) || + ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0 + && signed_decr >= 0)) { (*raise_excp)(cpu); return; } /* On MSB level based systems a 0 for the MSB stops interrupt delivery */ - if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) { + if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) { (*lower_excp)(cpu); } @@ -1212,9 +1160,8 @@ static void cpu_4xx_fit_cb (void *opaque) if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) { ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1); } - LOG_TB("%s: ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__, - (int)((env->spr[SPR_40x_TCR] >> 23) & 0x1), - env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); + trace_ppc4xx_fit((int)((env->spr[SPR_40x_TCR] >> 23) & 0x1), + env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); } /* Programmable interval timer */ @@ -1228,11 +1175,10 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp) !((env->spr[SPR_40x_TCR] >> 26) & 0x1) || (is_excp && !((env->spr[SPR_40x_TCR] >> 22) & 0x1))) { /* Stop PIT */ - LOG_TB("%s: stop PIT\n", __func__); + trace_ppc4xx_pit_stop(); timer_del(tb_env->decr_timer); } else { - LOG_TB("%s: start PIT %016" PRIx64 "\n", - __func__, ppc40x_timer->pit_reload); + trace_ppc4xx_pit_start(ppc40x_timer->pit_reload); now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); next = now + muldiv64(ppc40x_timer->pit_reload, NANOSECONDS_PER_SECOND, tb_env->decr_freq); @@ -1261,9 +1207,7 @@ static void cpu_4xx_pit_cb (void *opaque) ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1); } start_stop_pit(env, tb_env, 1); - LOG_TB("%s: ar %d ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx " " - "%016" PRIx64 "\n", __func__, - (int)((env->spr[SPR_40x_TCR] >> 22) & 0x1), + trace_ppc4xx_pit((int)((env->spr[SPR_40x_TCR] >> 22) & 0x1), (int)((env->spr[SPR_40x_TCR] >> 26) & 0x1), env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR], ppc40x_timer->pit_reload); @@ -1303,8 +1247,7 @@ static void cpu_4xx_wdt_cb (void *opaque) next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq); if (next == now) next++; - LOG_TB("%s: TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__, - env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); + trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) { case 0x0: case 0x1: @@ -1347,7 +1290,7 @@ void store_40x_pit (CPUPPCState *env, target_ulong val) tb_env = env->tb_env; ppc40x_timer = tb_env->opaque; - LOG_TB("%s val" TARGET_FMT_lx "\n", __func__, val); + trace_ppc40x_store_pit(val); ppc40x_timer->pit_reload = val; start_stop_pit(env, tb_env, 0); } @@ -1362,8 +1305,7 @@ static void ppc_40x_set_tb_clk (void *opaque, uint32_t freq) CPUPPCState *env = opaque; ppc_tb_t *tb_env = env->tb_env; - LOG_TB("%s set new frequency to %" PRIu32 "\n", __func__, - freq); + trace_ppc40x_set_tb_clk(freq); tb_env->tb_freq = freq; tb_env->decr_freq = freq; /* XXX: we should also update all timers */ @@ -1382,7 +1324,7 @@ clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq, tb_env->tb_freq = freq; tb_env->decr_freq = freq; tb_env->opaque = ppc40x_timer; - LOG_TB("%s freq %" PRIu32 "\n", __func__, freq); + trace_ppc40x_timers_init(freq); if (ppc40x_timer != NULL) { /* We use decr timer for PIT */ tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_pit_cb, env); diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c index 8f77887fb18..972a7a4a3e5 100644 --- a/hw/ppc/ppc405_boards.c +++ b/hw/ppc/ppc405_boards.c @@ -34,15 +34,12 @@ #include "ppc405.h" #include "hw/rtc/m48t59.h" #include "hw/block/flash.h" -#include "sysemu/sysemu.h" #include "sysemu/qtest.h" #include "sysemu/reset.h" #include "sysemu/block-backend.h" #include "hw/boards.h" -#include "qemu/log.h" #include "qemu/error-report.h" #include "hw/loader.h" -#include "exec/address-spaces.h" #include "qemu/cutils.h" #define BIOS_FILENAME "ppc405_rom.bin" diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c index fe047074a17..e632c408bdf 100644 --- a/hw/ppc/ppc405_uc.c +++ b/hw/ppc/ppc405_uc.c @@ -34,7 +34,6 @@ #include "qemu/timer.h" #include "sysemu/reset.h" #include "sysemu/sysemu.h" -#include "qemu/log.h" #include "exec/address-spaces.h" #include "hw/intc/ppc-uic.h" #include "hw/qdev-properties.h" diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index b7539aa7210..7fb620b9a05 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -25,7 +25,6 @@ #include "sysemu/device_tree.h" #include "hw/loader.h" #include "elf.h" -#include "exec/address-spaces.h" #include "hw/char/serial.h" #include "hw/ppc/ppc.h" #include "ppc405.h" diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c index 91cbcd0504a..788d25514a4 100644 --- a/hw/ppc/ppc440_pcix.c +++ b/hw/ppc/ppc440_pcix.c @@ -28,7 +28,6 @@ #include "hw/ppc/ppc4xx.h" #include "hw/pci/pci.h" #include "hw/pci/pci_host.h" -#include "exec/address-spaces.h" #include "trace.h" #include "qom/object.h" diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c index f6f89058ab7..993e3ba955d 100644 --- a/hw/ppc/ppc440_uc.c +++ b/hw/ppc/ppc440_uc.c @@ -14,9 +14,7 @@ #include "qapi/error.h" #include "qemu/log.h" #include "qemu/module.h" -#include "cpu.h" #include "hw/irq.h" -#include "exec/address-spaces.h" #include "exec/memory.h" #include "hw/ppc/ppc.h" #include "hw/qdev-properties.h" diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c index fe9d4f7155e..980c48944fc 100644 --- a/hw/ppc/ppc4xx_devs.c +++ b/hw/ppc/ppc4xx_devs.c @@ -29,7 +29,6 @@ #include "hw/irq.h" #include "hw/ppc/ppc.h" #include "hw/ppc/ppc4xx.h" -#include "hw/boards.h" #include "hw/intc/ppc-uic.h" #include "hw/qdev-properties.h" #include "qemu/log.h" diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c index e8789f64e80..304a29349c2 100644 --- a/hw/ppc/ppc4xx_pci.c +++ b/hw/ppc/ppc4xx_pci.c @@ -28,7 +28,6 @@ #include "sysemu/reset.h" #include "hw/pci/pci.h" #include "hw/pci/pci_host.h" -#include "exec/address-spaces.h" #include "trace.h" #include "qom/object.h" @@ -49,12 +48,14 @@ OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxPCIState, PPC4xx_PCI_HOST_BRIDGE) #define PPC4xx_PCI_NR_PMMS 3 #define PPC4xx_PCI_NR_PTMS 2 +#define PPC4xx_PCI_NUM_DEVS 5 + struct PPC4xxPCIState { PCIHostState parent_obj; struct PCIMasterMap pmm[PPC4xx_PCI_NR_PMMS]; struct PCITargetMap ptm[PPC4xx_PCI_NR_PTMS]; - qemu_irq irq[PCI_NUM_PINS]; + qemu_irq irq[PPC4xx_PCI_NUM_DEVS]; MemoryRegion container; MemoryRegion iomem; @@ -247,7 +248,7 @@ static int ppc4xx_pci_map_irq(PCIDevice *pci_dev, int irq_num) trace_ppc4xx_pci_map_irq(pci_dev->devfn, irq_num, slot); - return slot - 1; + return slot > 0 ? slot - 1 : PPC4xx_PCI_NUM_DEVS - 1; } static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level) @@ -255,7 +256,7 @@ static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level) qemu_irq *pci_irqs = opaque; trace_ppc4xx_pci_set_irq(irq_num); - assert(irq_num >= 0); + assert(irq_num >= 0 && irq_num < PPC4xx_PCI_NUM_DEVS); qemu_set_irq(pci_irqs[irq_num], level); } diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c index 974c0c8a752..10b643861f2 100644 --- a/hw/ppc/ppc_booke.c +++ b/hw/ppc/ppc_booke.c @@ -28,7 +28,6 @@ #include "qemu/timer.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" -#include "qemu/log.h" #include "hw/loader.h" #include "kvm_ppc.h" diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index f1b1efdcef9..25a2e86b421 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -24,12 +24,10 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "hw/rtc/m48t59.h" #include "hw/char/serial.h" #include "hw/block/fdc.h" #include "net/net.h" -#include "sysemu/sysemu.h" #include "hw/isa/isa.h" #include "hw/pci/pci.h" #include "hw/pci/pci_host.h" @@ -38,15 +36,12 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/log.h" -#include "hw/irq.h" #include "hw/loader.h" #include "hw/rtc/mc146818rtc.h" #include "hw/isa/pc87312.h" #include "hw/qdev-properties.h" -#include "sysemu/arch_init.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" -#include "exec/address-spaces.h" #include "trace.h" #include "elf.h" #include "qemu/units.h" diff --git a/hw/ppc/rs6000_mc.c b/hw/ppc/rs6000_mc.c index 4db5b51a2dd..c0bc212e924 100644 --- a/hw/ppc/rs6000_mc.c +++ b/hw/ppc/rs6000_mc.c @@ -23,7 +23,6 @@ #include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "exec/address-spaces.h" -#include "hw/boards.h" #include "qapi/error.h" #include "trace.h" #include "qom/object.h" diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c index 0c6baf77e8c..0737234d66e 100644 --- a/hw/ppc/sam460ex.c +++ b/hw/ppc/sam460ex.c @@ -24,7 +24,6 @@ #include "sysemu/block-backend.h" #include "hw/loader.h" #include "elf.h" -#include "exec/address-spaces.h" #include "exec/memory.h" #include "ppc440.h" #include "ppc405.h" diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e4be00b732a..3b5fd749be8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -29,6 +29,7 @@ #include "qemu/datadir.h" #include "qapi/error.h" #include "qapi/qapi-events-machine.h" +#include "qapi/qapi-events-qdev.h" #include "qapi/visitor.h" #include "sysemu/sysemu.h" #include "sysemu/hostmem.h" @@ -54,7 +55,6 @@ #include "cpu-models.h" #include "hw/core/cpu.h" -#include "hw/boards.h" #include "hw/ppc/ppc.h" #include "hw/loader.h" @@ -70,7 +70,6 @@ #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/vhost-scsi-common.h" -#include "exec/address-spaces.h" #include "exec/ram_addr.h" #include "hw/usb.h" #include "qemu/config-file.h" @@ -100,9 +99,10 @@ * * We load our kernel at 4M, leaving space for SLOF initial image */ -#define RTAS_MAX_ADDR 0x80000000 /* RTAS must stay below that */ +#define FDT_MAX_ADDR 0x80000000 /* FDT must stay below that */ #define FW_MAX_SIZE 0x400000 #define FW_FILE_NAME "slof.bin" +#define FW_FILE_NAME_VOF "vof.bin" #define FW_OVERHEAD 0x2800000 #define KERNEL_LOAD_ADDR FW_MAX_SIZE @@ -705,10 +705,10 @@ static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset, _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); - if (env->spr_cb[SPR_PURR].oea_read) { + if (ppc_has_spr(cpu, SPR_PURR)) { _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1))); } - if (env->spr_cb[SPR_SPURR].oea_read) { + if (ppc_has_spr(cpu, SPR_PURR)) { _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1))); } @@ -882,6 +882,10 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) add_str(hypertas, "hcall-copy"); add_str(hypertas, "hcall-debug"); add_str(hypertas, "hcall-vphn"); + if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) { + add_str(hypertas, "hcall-rpt-invalidate"); + } + add_str(qemu_hypertas, "hcall-memop1"); if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { @@ -921,9 +925,13 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) * * The extra 8 bytes is required because Linux's FWNMI error log check * is off-by-one. + * + * RTAS_MIN_SIZE is required for the RTAS blob itself. */ - _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX + - ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t))); + _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_MIN_SIZE + + RTAS_ERROR_LOG_MAX + + ms->smp.max_cpus * sizeof(uint64_t) * 2 + + sizeof(uint64_t))); _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)); _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate", @@ -981,6 +989,7 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt, */ val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */ val[3] = 0x00; /* Hash */ + spapr_check_mmu_mode(false); } else if (kvm_enabled()) { if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) { val[3] = 0x80; /* OV5_MMU_BOTH */ @@ -1006,7 +1015,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); if (reset) { - const char *boot_device = machine->boot_order; + const char *boot_device = spapr->boot_device; char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); size_t cb = 0; char *bootlist = get_boot_devices_list(&cb); @@ -1405,7 +1414,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, kvmppc_write_hpte(ptex, pte0, pte1); } else { if (pte0 & HPTE64_V_VALID) { - stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + stq_p(spapr->htab + offset + HPTE64_DW1, pte1); /* * When setting valid, we write PTE1 first. This ensures * proper synchronization with the reading code in @@ -1421,7 +1430,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, * ppc_hash64_pteg_search() */ smp_wmb(); - stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + stq_p(spapr->htab + offset + HPTE64_DW1, pte1); } } } @@ -1429,7 +1438,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1) { - hwaddr offset = ptex * HASH_PTE_SIZE_64 + 15; + hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); if (!spapr->htab) { @@ -1445,7 +1454,7 @@ static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex, static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1) { - hwaddr offset = ptex * HASH_PTE_SIZE_64 + 14; + hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); if (!spapr->htab) { @@ -1558,6 +1567,22 @@ void spapr_setup_hpt(SpaprMachineState *spapr) } } +void spapr_check_mmu_mode(bool guest_radix) +{ + if (guest_radix) { + if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) { + error_report("Guest requested unavailable MMU mode (radix)."); + exit(EXIT_FAILURE); + } + } else { + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() + && !kvmppc_has_cap_mmu_hash_v3()) { + error_report("Guest requested unavailable MMU mode (hash)."); + exit(EXIT_FAILURE); + } + } +} + static void spapr_machine_reset(MachineState *machine) { SpaprMachineState *spapr = SPAPR_MACHINE(machine); @@ -1617,29 +1642,36 @@ static void spapr_machine_reset(MachineState *machine) spapr_clear_pending_events(spapr); /* - * We place the device tree and RTAS just below either the top of the RMA, + * We place the device tree just below either the top of the RMA, * or just below 2GB, whichever is lower, so that it can be * processed with 32-bit real mode code if necessary */ - fdt_addr = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FDT_MAX_SIZE; + fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE; fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE); + if (spapr->vof) { + spapr_vof_reset(spapr, fdt, &error_fatal); + /* + * Do not pack the FDT as the client may change properties. + * VOF client does not expect the FDT so we do not load it to the VM. + */ + } else { + rc = fdt_pack(fdt); + /* Should only fail if we've built a corrupted tree */ + assert(rc == 0); - rc = fdt_pack(fdt); - - /* Should only fail if we've built a corrupted tree */ - assert(rc == 0); - - /* Load the fdt */ + spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, + 0, fdt_addr, 0); + cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); + } qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); - cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); + g_free(spapr->fdt_blob); spapr->fdt_size = fdt_totalsize(fdt); spapr->fdt_initial_size = spapr->fdt_size; spapr->fdt_blob = fdt; /* Set up the entry state */ - spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, 0, fdt_addr, 0); first_ppc_cpu->env.gpr[5] = 0; spapr->fwnmi_system_reset_addr = -1; @@ -2003,6 +2035,7 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_cap_ccf_assist, &vmstate_spapr_cap_fwnmi, &vmstate_spapr_fwnmi, + &vmstate_spapr_cap_rpt_invalidate, NULL } }; @@ -2361,8 +2394,10 @@ static SaveVMHandlers savevm_htab_handlers = { static void spapr_boot_set(void *opaque, const char *boot_device, Error **errp) { - MachineState *machine = MACHINE(opaque); - machine->boot_order = g_strdup(boot_device); + SpaprMachineState *spapr = SPAPR_MACHINE(opaque); + + g_free(spapr->boot_device); + spapr->boot_device = g_strdup(boot_device); } static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr) @@ -2640,7 +2675,8 @@ static void spapr_machine_init(MachineState *machine) SpaprMachineState *spapr = SPAPR_MACHINE(machine); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); MachineClass *mc = MACHINE_GET_CLASS(machine); - const char *bios_name = machine->firmware ?: FW_FILE_NAME; + const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME; + const char *bios_name = machine->firmware ?: bios_default; const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; PCIHostState *phb; @@ -2694,7 +2730,7 @@ static void spapr_machine_init(MachineState *machine) spapr->rma_size = spapr_rma_size(spapr, &error_fatal); /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ - load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; + load_limit = MIN(spapr->rma_size, FDT_MAX_ADDR) - FW_OVERHEAD; /* * VSMT must be set in order to be able to compute VCPU ids, ie to @@ -2717,6 +2753,11 @@ static void spapr_machine_init(MachineState *machine) spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY); + /* Do not advertise FORM2 NUMA support for pseries-6.1 and older */ + if (!smc->pre_6_2_numa_affinity) { + spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY); + } + /* advertise support for dedicated HP event source to guests */ if (spapr->use_hotplug_event_source) { spapr_ovec_set(spapr->ov5, OV5_HP_EVT); @@ -2738,39 +2779,6 @@ static void spapr_machine_init(MachineState *machine) /* init CPUs */ spapr_init_cpus(spapr); - /* - * check we don't have a memory-less/cpu-less NUMA node - * Firmware relies on the existing memory/cpu topology to provide the - * NUMA topology to the kernel. - * And the linux kernel needs to know the NUMA topology at start - * to be able to hotplug CPUs later. - */ - if (machine->numa_state->num_nodes) { - for (i = 0; i < machine->numa_state->num_nodes; ++i) { - /* check for memory-less node */ - if (machine->numa_state->nodes[i].node_mem == 0) { - CPUState *cs; - int found = 0; - /* check for cpu-less node */ - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - if (cpu->node_id == i) { - found = 1; - break; - } - } - /* memory-less and cpu-less node */ - if (!found) { - error_report( - "Memory-less/cpu-less nodes are not supported (node %d)", - i); - exit(1); - } - } - } - - } - spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine); /* Init numa_assoc_array */ @@ -2997,6 +3005,10 @@ static void spapr_machine_init(MachineState *machine) } qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond); + if (spapr->vof) { + spapr->vof->fw_size = fw_size; /* for claim() on itself */ + spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client); + } } #define DEFAULT_KVM_TYPE "auto" @@ -3089,7 +3101,7 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, */ if (strcmp("usb-host", qdev_fw_name(dev)) == 0) { USBDevice *usbdev = CAST(USBDevice, dev, TYPE_USB_DEVICE); - if (usb_host_dev_is_scsi_storage(usbdev)) { + if (usb_device_is_scsi_storage(usbdev)) { return g_strdup_printf("storage@%s/disk", usbdev->port->path); } } @@ -3187,6 +3199,28 @@ static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp) } } +static bool spapr_get_vof(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + return spapr->vof != NULL; +} + +static void spapr_set_vof(Object *obj, bool value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + if (spapr->vof) { + vof_cleanup(spapr->vof); + g_free(spapr->vof); + spapr->vof = NULL; + } + if (!value) { + return; + } + spapr->vof = g_malloc0(sizeof(*spapr->vof)); +} + static char *spapr_get_ic_mode(Object *obj, Error **errp) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); @@ -3312,6 +3346,11 @@ static void spapr_instance_init(Object *obj) stringify(KERNEL_LOAD_ADDR) " for -kernel is the default"); spapr->kernel_addr = KERNEL_LOAD_ADDR; + + object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof); + object_property_set_description(obj, "x-vof", + "Enable Virtual Open Firmware (experimental)"); + /* The machine class defines the default interrupt controller mode */ spapr->irq = smc->irq; object_property_add_str(obj, "ic-mode", spapr_get_ic_mode, @@ -3620,11 +3659,18 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev) /* * Tell QAPI that something happened and the memory - * hotunplug wasn't successful. + * hotunplug wasn't successful. Keep sending + * MEM_UNPLUG_ERROR even while sending + * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of + * MEM_UNPLUG_ERROR is due. */ qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest " "for device %s", dev->id); - qapi_event_send_mem_unplug_error(dev->id, qapi_error); + + qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error); + + qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id, + dev->canonical_path); } /* Callback to be called during DRC release. */ @@ -4475,6 +4521,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) XICSFabricClass *xic = XICS_FABRIC_CLASS(oc); InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc); XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc); + VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc); mc->desc = "pSeries Logical Partition (PAPR compliant)"; mc->ignore_boot_device_suffixes = true; @@ -4487,7 +4534,16 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->init = spapr_machine_init; mc->reset = spapr_machine_reset; mc->block_default_type = IF_SCSI; - mc->max_cpus = 1024; + + /* + * Setting max_cpus to INT32_MAX. Both KVM and TCG max_cpus values + * should be limited by the host capability instead of hardcoded. + * max_cpus for KVM guests will be checked in kvm_init(), and TCG + * guests are welcome to have as many CPUs as the host are capable + * of emulate. + */ + mc->max_cpus = INT32_MAX; + mc->no_parallel = 1; mc->default_boot_order = ""; mc->default_ram_size = 512 * MiB; @@ -4547,6 +4603,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF; spapr_caps_add_properties(smc); smc->irq = &spapr_irq_dual; smc->dr_phb_enabled = true; @@ -4554,6 +4611,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; xfc->match_nvt = spapr_match_nvt; + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; } static const TypeInfo spapr_machine_info = { @@ -4573,6 +4633,7 @@ static const TypeInfo spapr_machine_info = { { TYPE_XICS_FABRIC }, { TYPE_INTERRUPT_STATS_PROVIDER }, { TYPE_XIVE_FABRIC }, + { TYPE_VOF_MACHINE_IF }, { } }, }; @@ -4604,15 +4665,41 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +/* + * pseries-6.2 + */ +static void spapr_machine_6_2_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(6_2, "6.2", true); + +/* + * pseries-6.1 + */ +static void spapr_machine_6_1_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_6_2_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len); + smc->pre_6_2_numa_affinity = true; + mc->smp_props.prefer_sockets = true; +} + +DEFINE_SPAPR_MACHINE(6_1, "6.1", false); + /* * pseries-6.0 */ static void spapr_machine_6_0_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_6_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); } -DEFINE_SPAPR_MACHINE(6_0, "6.0", true); +DEFINE_SPAPR_MACHINE(6_0, "6.0", false); /* * pseries-5.2 diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 9ea7ddd1e9a..ed7c077a0d9 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -371,6 +371,65 @@ static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift, return true; } +static void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu, + bool (*cb)(void *, uint32_t, uint32_t), + void *opaque) +{ + PPCHash64Options *opts = cpu->hash64_opts; + int i; + int n = 0; + bool ci_largepage = false; + + assert(opts); + + n = 0; + for (i = 0; i < ARRAY_SIZE(opts->sps); i++) { + PPCHash64SegmentPageSizes *sps = &opts->sps[i]; + int j; + int m = 0; + + assert(n <= i); + + if (!sps->page_shift) { + break; + } + + for (j = 0; j < ARRAY_SIZE(sps->enc); j++) { + PPCHash64PageSize *ps = &sps->enc[j]; + + assert(m <= j); + if (!ps->page_shift) { + break; + } + + if (cb(opaque, sps->page_shift, ps->page_shift)) { + if (ps->page_shift >= 16) { + ci_largepage = true; + } + sps->enc[m++] = *ps; + } + } + + /* Clear rest of the row */ + for (j = m; j < ARRAY_SIZE(sps->enc); j++) { + memset(&sps->enc[j], 0, sizeof(sps->enc[j])); + } + + if (m) { + n++; + } + } + + /* Clear the rest of the table */ + for (i = n; i < ARRAY_SIZE(opts->sps); i++) { + memset(&opts->sps[i], 0, sizeof(opts->sps[i])); + } + + if (!ci_largepage) { + opts->flags &= ~PPC_HASH64_CI_LARGEPAGE; + } +} + static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu, uint8_t val, Error **errp) @@ -523,6 +582,37 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val, } } +static void cap_rpt_invalidate_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + + if (!val) { + /* capability disabled by default */ + return; + } + + if (tcg_enabled()) { + error_setg(errp, "No H_RPT_INVALIDATE support in TCG"); + error_append_hint(errp, + "Try appending -machine cap-rpt-invalidate=off\n"); + } else if (kvm_enabled()) { + if (!kvmppc_has_cap_mmu_radix()) { + error_setg(errp, "H_RPT_INVALIDATE only supported on Radix"); + return; + } + + if (!kvmppc_has_cap_rpt_invalidate()) { + error_setg(errp, + "KVM implementation does not support H_RPT_INVALIDATE"); + error_append_hint(errp, + "Try appending -machine cap-rpt-invalidate=off\n"); + } else { + kvmppc_enable_h_rpt_invalidate(); + } + } +} + SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { [SPAPR_CAP_HTM] = { .name = "htm", @@ -631,6 +721,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { .type = "bool", .apply = cap_fwnmi_apply, }, + [SPAPR_CAP_RPT_INVALIDATE] = { + .name = "rpt-invalidate", + .description = "Allow H_RPT_INVALIDATE", + .index = SPAPR_CAP_RPT_INVALIDATE, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_rpt_invalidate_apply, + }, }; static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr, @@ -771,6 +870,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV); SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI); +SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE); void spapr_caps_init(SpaprMachineState *spapr) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 4f316a6f9d3..58e7341cb78 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -382,6 +382,7 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"), #ifdef CONFIG_KVM DEFINE_SPAPR_CPU_CORE_TYPE("host"), #endif diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 9e16505fa1b..f8ac0a10df1 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -13,11 +13,12 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qmp/qnull.h" -#include "cpu.h" #include "qemu/cutils.h" #include "hw/ppc/spapr_drc.h" #include "qom/object.h" #include "migration/vmstate.h" +#include "qapi/error.h" +#include "qapi/qapi-events-qdev.h" #include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ @@ -151,9 +152,34 @@ static uint32_t drc_isolate_logical(SpaprDrc *drc) static uint32_t drc_unisolate_logical(SpaprDrc *drc) { + SpaprMachineState *spapr = NULL; + switch (drc->state) { case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + /* + * Unisolating a logical DRC that was marked for unplug + * means that the kernel is refusing the removal. + */ + if (drc->unplug_requested && drc->dev) { + if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) { + spapr = SPAPR_MACHINE(qdev_get_machine()); + + spapr_memory_unplug_rollback(spapr, drc->dev); + } + + drc->unplug_requested = false; + + if (drc->dev->id) { + error_report("Device hotunplug rejected by the guest " + "for device %s", drc->dev->id); + } + + qapi_event_send_device_unplug_guest_error(!!drc->dev->id, + drc->dev->id, + drc->dev->canonical_path); + } + return RTAS_OUT_SUCCESS; /* Nothing to do */ case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: break; /* see below */ diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index d51daedfa6e..630e86282c9 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -27,7 +27,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "sysemu/device_tree.h" #include "sysemu/runstate.h" @@ -873,7 +872,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); CPUState *cs = CPU(cpu); int ret; - Error *local_err = NULL; if (spapr->fwnmi_machine_check_addr == -1) { /* Non-FWNMI case, deliver it like an architected CPU interrupt. */ @@ -913,16 +911,17 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) } } - ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err); + /* + * Try to block migration while FWNMI is being handled, so the + * machine check handler runs where the information passed to it + * actually makes sense. This shouldn't actually block migration, + * only delay it slightly, assuming migration is retried. If the + * attempt to block fails, carry on. Unfortunately, it always + * fails when running with -only-migrate. A proper interface to + * delay migration completion for a bit could avoid that. + */ + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL); if (ret == -EBUSY) { - /* - * We don't want to abort so we let the migration to continue. - * In a rare case, the machine check handler will run on the target. - * Though this is not preferable, it is better than aborting - * the migration or killing the VM. It is okay to call - * migrate_del_blocker on a blocker that was not added (which the - * nmi-interlock handler would do when it's called after this). - */ warn_report("Received a fwnmi while migration was in progress"); } @@ -935,7 +934,6 @@ static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t nret, target_ulong rets) { uint32_t mask, buf, len, event_len; - uint64_t xinfo; SpaprEventLogEntry *event; struct rtas_error_log header; int i; @@ -945,13 +943,9 @@ static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, return; } - xinfo = rtas_ld(args, 1); mask = rtas_ld(args, 2); buf = rtas_ld(args, 4); len = rtas_ld(args, 5); - if (nargs == 7) { - xinfo |= (uint64_t)rtas_ld(args, 6) << 32; - } event = rtas_event_log_dequeue(spapr, mask); if (!event) { diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 7b5cd3553c2..222c1b6bbdb 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -7,7 +7,6 @@ #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/error-report.h" -#include "cpu.h" #include "exec/exec-all.h" #include "helper_regs.h" #include "hw/ppc/spapr.h" @@ -18,27 +17,11 @@ #include "kvm_ppc.h" #include "hw/ppc/fdt.h" #include "hw/ppc/spapr_ovec.h" +#include "hw/ppc/spapr_numa.h" #include "mmu-book3s-v3.h" #include "hw/mem/memory-device.h" -static bool has_spr(PowerPCCPU *cpu, int spr) -{ - /* We can test whether the SPR is defined by checking for a valid name */ - return cpu->env.spr_cb[spr].name != NULL; -} - -static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) -{ - /* - * hash value/pteg group index is normalized by HPT mask - */ - if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { - return false; - } - return true; -} - -static bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) +bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) { MachineState *machine = MACHINE(spapr); DeviceMemoryState *dms = machine->device_memory; @@ -54,355 +37,6 @@ static bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) return false; } -static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - target_ulong pteh = args[2]; - target_ulong ptel = args[3]; - unsigned apshift; - target_ulong raddr; - target_ulong slot; - const ppc_hash_pte64_t *hptes; - - apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); - if (!apshift) { - /* Bad page size encoding */ - return H_PARAMETER; - } - - raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1); - - if (is_ram_address(spapr, raddr)) { - /* Regular RAM - should have WIMG=0010 */ - if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { - return H_PARAMETER; - } - } else { - target_ulong wimg_flags; - /* Looks like an IO address */ - /* FIXME: What WIMG combinations could be sensible for IO? - * For now we allow WIMG=010x, but are there others? */ - /* FIXME: Should we check against registered IO addresses? */ - wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); - - if (wimg_flags != HPTE64_R_I && - wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { - return H_PARAMETER; - } - } - - pteh &= ~0x60ULL; - - if (!valid_ptex(cpu, ptex)) { - return H_PARAMETER; - } - - slot = ptex & 7ULL; - ptex = ptex & ~7ULL; - - if (likely((flags & H_EXACT) == 0)) { - hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); - for (slot = 0; slot < 8; slot++) { - if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { - break; - } - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); - if (slot == 8) { - return H_PTEG_FULL; - } - } else { - hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); - if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { - ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); - return H_PTEG_FULL; - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); - } - - spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); - - args[0] = ptex + slot; - return H_SUCCESS; -} - -typedef enum { - REMOVE_SUCCESS = 0, - REMOVE_NOT_FOUND = 1, - REMOVE_PARM = 2, - REMOVE_HW = 3, -} RemoveResult; - -static RemoveResult remove_hpte(PowerPCCPU *cpu - , target_ulong ptex, - target_ulong avpn, - target_ulong flags, - target_ulong *vp, target_ulong *rp) -{ - const ppc_hash_pte64_t *hptes; - target_ulong v, r; - - if (!valid_ptex(cpu, ptex)) { - return REMOVE_PARM; - } - - hptes = ppc_hash64_map_hptes(cpu, ptex, 1); - v = ppc_hash64_hpte0(cpu, hptes, 0); - r = ppc_hash64_hpte1(cpu, hptes, 0); - ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); - - if ((v & HPTE64_V_VALID) == 0 || - ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || - ((flags & H_ANDCOND) && (v & avpn) != 0)) { - return REMOVE_NOT_FOUND; - } - *vp = v; - *rp = r; - spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0); - ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); - return REMOVE_SUCCESS; -} - -static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - CPUPPCState *env = &cpu->env; - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - target_ulong avpn = args[2]; - RemoveResult ret; - - ret = remove_hpte(cpu, ptex, avpn, flags, - &args[0], &args[1]); - - switch (ret) { - case REMOVE_SUCCESS: - check_tlb_flush(env, true); - return H_SUCCESS; - - case REMOVE_NOT_FOUND: - return H_NOT_FOUND; - - case REMOVE_PARM: - return H_PARAMETER; - - case REMOVE_HW: - return H_HARDWARE; - } - - g_assert_not_reached(); -} - -#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL -#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL -#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL -#define H_BULK_REMOVE_END 0xc000000000000000ULL -#define H_BULK_REMOVE_CODE 0x3000000000000000ULL -#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL -#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL -#define H_BULK_REMOVE_PARM 0x2000000000000000ULL -#define H_BULK_REMOVE_HW 0x3000000000000000ULL -#define H_BULK_REMOVE_RC 0x0c00000000000000ULL -#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL -#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL -#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL -#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL -#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL - -#define H_BULK_REMOVE_MAX_BATCH 4 - -static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - CPUPPCState *env = &cpu->env; - int i; - target_ulong rc = H_SUCCESS; - - for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { - target_ulong *tsh = &args[i*2]; - target_ulong tsl = args[i*2 + 1]; - target_ulong v, r, ret; - - if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { - break; - } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) { - return H_PARAMETER; - } - - *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; - *tsh |= H_BULK_REMOVE_RESPONSE; - - if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) { - *tsh |= H_BULK_REMOVE_PARM; - return H_PARAMETER; - } - - ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl, - (*tsh & H_BULK_REMOVE_FLAGS) >> 26, - &v, &r); - - *tsh |= ret << 60; - - switch (ret) { - case REMOVE_SUCCESS: - *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43; - break; - - case REMOVE_PARM: - rc = H_PARAMETER; - goto exit; - - case REMOVE_HW: - rc = H_HARDWARE; - goto exit; - } - } - exit: - check_tlb_flush(env, true); - - return rc; -} - -static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - CPUPPCState *env = &cpu->env; - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - target_ulong avpn = args[2]; - const ppc_hash_pte64_t *hptes; - target_ulong v, r; - - if (!valid_ptex(cpu, ptex)) { - return H_PARAMETER; - } - - hptes = ppc_hash64_map_hptes(cpu, ptex, 1); - v = ppc_hash64_hpte0(cpu, hptes, 0); - r = ppc_hash64_hpte1(cpu, hptes, 0); - ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); - - if ((v & HPTE64_V_VALID) == 0 || - ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { - return H_NOT_FOUND; - } - - r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N | - HPTE64_R_KEY_HI | HPTE64_R_KEY_LO); - r |= (flags << 55) & HPTE64_R_PP0; - r |= (flags << 48) & HPTE64_R_KEY_HI; - r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); - spapr_store_hpte(cpu, ptex, - (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); - ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); - /* Flush the tlb */ - check_tlb_flush(env, true); - /* Don't need a memory barrier, due to qemu's global lock */ - spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); - return H_SUCCESS; -} - -static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - int i, ridx, n_entries = 1; - const ppc_hash_pte64_t *hptes; - - if (!valid_ptex(cpu, ptex)) { - return H_PARAMETER; - } - - if (flags & H_READ_4) { - /* Clear the two low order bits */ - ptex &= ~(3ULL); - n_entries = 4; - } - - hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries); - for (i = 0, ridx = 0; i < n_entries; i++) { - args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i); - args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i); - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries); - - return H_SUCCESS; -} - -struct SpaprPendingHpt { - /* These fields are read-only after initialization */ - int shift; - QemuThread thread; - - /* These fields are protected by the BQL */ - bool complete; - - /* These fields are private to the preparation thread if - * !complete, otherwise protected by the BQL */ - int ret; - void *hpt; -}; - -static void free_pending_hpt(SpaprPendingHpt *pending) -{ - if (pending->hpt) { - qemu_vfree(pending->hpt); - } - - g_free(pending); -} - -static void *hpt_prepare_thread(void *opaque) -{ - SpaprPendingHpt *pending = opaque; - size_t size = 1ULL << pending->shift; - - pending->hpt = qemu_try_memalign(size, size); - if (pending->hpt) { - memset(pending->hpt, 0, size); - pending->ret = H_SUCCESS; - } else { - pending->ret = H_NO_MEM; - } - - qemu_mutex_lock_iothread(); - - if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { - /* Ready to go */ - pending->complete = true; - } else { - /* We've been cancelled, clean ourselves up */ - free_pending_hpt(pending); - } - - qemu_mutex_unlock_iothread(); - return NULL; -} - -/* Must be called with BQL held */ -static void cancel_hpt_prepare(SpaprMachineState *spapr) -{ - SpaprPendingHpt *pending = spapr->pending_hpt; - - /* Let the thread know it's cancelled */ - spapr->pending_hpt = NULL; - - if (!pending) { - /* Nothing to do */ - return; - } - - if (!pending->complete) { - /* thread will clean itself up */ - return; - } - - free_pending_hpt(pending); -} - /* Convert a return code from the KVM ioctl()s implementing resize HPT * into a PAPR hypercall return code */ static target_ulong resize_hpt_convert_rc(int ret) @@ -448,7 +82,6 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, { target_ulong flags = args[0]; int shift = args[1]; - SpaprPendingHpt *pending = spapr->pending_hpt; uint64_t current_ram_size; int rc; @@ -485,182 +118,11 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, return resize_hpt_convert_rc(rc); } - if (pending) { - /* something already in progress */ - if (pending->shift == shift) { - /* and it's suitable */ - if (pending->complete) { - return pending->ret; - } else { - return H_LONG_BUSY_ORDER_100_MSEC; - } - } - - /* not suitable, cancel and replace */ - cancel_hpt_prepare(spapr); - } - - if (!shift) { - /* nothing to do */ - return H_SUCCESS; - } - - /* start new prepare */ - - pending = g_new0(SpaprPendingHpt, 1); - pending->shift = shift; - pending->ret = H_HARDWARE; - - qemu_thread_create(&pending->thread, "sPAPR HPT prepare", - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); - - spapr->pending_hpt = pending; - - /* In theory we could estimate the time more accurately based on - * the new size, but there's not much point */ - return H_LONG_BUSY_ORDER_100_MSEC; -} - -static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) -{ - uint8_t *addr = htab; - - addr += pteg * HASH_PTEG_SIZE_64; - addr += slot * HASH_PTE_SIZE_64; - return ldq_p(addr); -} - -static void new_hpte_store(void *htab, uint64_t pteg, int slot, - uint64_t pte0, uint64_t pte1) -{ - uint8_t *addr = htab; - - addr += pteg * HASH_PTEG_SIZE_64; - addr += slot * HASH_PTE_SIZE_64; - - stq_p(addr, pte0); - stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1); -} - -static int rehash_hpte(PowerPCCPU *cpu, - const ppc_hash_pte64_t *hptes, - void *old_hpt, uint64_t oldsize, - void *new_hpt, uint64_t newsize, - uint64_t pteg, int slot) -{ - uint64_t old_hash_mask = (oldsize >> 7) - 1; - uint64_t new_hash_mask = (newsize >> 7) - 1; - target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); - target_ulong pte1; - uint64_t avpn; - unsigned base_pg_shift; - uint64_t hash, new_pteg, replace_pte0; - - if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { - return H_SUCCESS; - } - - pte1 = ppc_hash64_hpte1(cpu, hptes, slot); - - base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); - assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ - avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); - - if (pte0 & HPTE64_V_SECONDARY) { - pteg = ~pteg; - } - - if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { - uint64_t offset, vsid; - - /* We only have 28 - 23 bits of offset in avpn */ - offset = (avpn & 0x1f) << 23; - vsid = avpn >> 5; - /* We can find more bits from the pteg value */ - if (base_pg_shift < 23) { - offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; - } - - hash = vsid ^ (offset >> base_pg_shift); - } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { - uint64_t offset, vsid; - - /* We only have 40 - 23 bits of seg_off in avpn */ - offset = (avpn & 0x1ffff) << 23; - vsid = avpn >> 17; - if (base_pg_shift < 23) { - offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) - << base_pg_shift; - } - - hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); - } else { - error_report("rehash_pte: Bad segment size in HPTE"); + if (kvm_enabled()) { return H_HARDWARE; } - new_pteg = hash & new_hash_mask; - if (pte0 & HPTE64_V_SECONDARY) { - assert(~pteg == (hash & old_hash_mask)); - new_pteg = ~new_pteg; - } else { - assert(pteg == (hash & old_hash_mask)); - } - assert((oldsize != newsize) || (pteg == new_pteg)); - replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); - /* - * Strictly speaking, we don't need all these tests, since we only - * ever rehash bolted HPTEs. We might in future handle non-bolted - * HPTEs, though so make the logic correct for those cases as - * well. - */ - if (replace_pte0 & HPTE64_V_VALID) { - assert(newsize < oldsize); - if (replace_pte0 & HPTE64_V_BOLTED) { - if (pte0 & HPTE64_V_BOLTED) { - /* Bolted collision, nothing we can do */ - return H_PTEG_FULL; - } else { - /* Discard this hpte */ - return H_SUCCESS; - } - } - } - - new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); - return H_SUCCESS; -} - -static int rehash_hpt(PowerPCCPU *cpu, - void *old_hpt, uint64_t oldsize, - void *new_hpt, uint64_t newsize) -{ - uint64_t n_ptegs = oldsize >> 7; - uint64_t pteg; - int slot; - int rc; - - for (pteg = 0; pteg < n_ptegs; pteg++) { - hwaddr ptex = pteg * HPTES_PER_GROUP; - const ppc_hash_pte64_t *hptes - = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); - - if (!hptes) { - return H_HARDWARE; - } - - for (slot = 0; slot < HPTES_PER_GROUP; slot++) { - rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, - pteg, slot); - if (rc != H_SUCCESS) { - ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); - return rc; - } - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); - } - - return H_SUCCESS; + return softmmu_resize_hpt_prepare(cpu, spapr, shift); } static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data) @@ -676,7 +138,7 @@ static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data) } } -static void push_sregs_to_kvm_pr(SpaprMachineState *spapr) +void push_sregs_to_kvm_pr(SpaprMachineState *spapr) { CPUState *cs; @@ -701,9 +163,7 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, { target_ulong flags = args[0]; target_ulong shift = args[1]; - SpaprPendingHpt *pending = spapr->pending_hpt; int rc; - size_t newsize; if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { return H_AUTHORITY; @@ -726,42 +186,14 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, return rc; } - if (flags != 0) { - return H_PARAMETER; - } - - if (!pending || (pending->shift != shift)) { - /* no matching prepare */ - return H_CLOSED; - } - - if (!pending->complete) { - /* prepare has not completed */ - return H_BUSY; + if (kvm_enabled()) { + return H_HARDWARE; } - /* Shouldn't have got past PREPARE without an HPT */ - g_assert(spapr->htab_shift); - - newsize = 1ULL << pending->shift; - rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), - pending->hpt, newsize); - if (rc == H_SUCCESS) { - qemu_vfree(spapr->htab); - spapr->htab = pending->hpt; - spapr->htab_shift = pending->shift; - - push_sregs_to_kvm_pr(spapr); - - pending->hpt = NULL; /* so it's not free()d */ - } + return softmmu_resize_hpt_commit(cpu, spapr, flags, shift); +} - /* Clean up */ - spapr->pending_hpt = NULL; - free_pending_hpt(pending); - return rc; -} static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) @@ -775,12 +207,12 @@ static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr, static target_ulong h_set_dabr(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) { - if (!has_spr(cpu, SPR_DABR)) { + if (!ppc_has_spr(cpu, SPR_DABR)) { return H_HARDWARE; /* DABR register not available */ } cpu_synchronize_state(CPU(cpu)); - if (has_spr(cpu, SPR_DABRX)) { + if (ppc_has_spr(cpu, SPR_DABRX)) { cpu->env.spr[SPR_DABRX] = 0x3; /* Use Problem and Privileged state */ } else if (!(args[0] & 0x4)) { /* Breakpoint Translation set? */ return H_RESERVED_DABR; @@ -795,7 +227,7 @@ static target_ulong h_set_xdabr(PowerPCCPU *cpu, SpaprMachineState *spapr, { target_ulong dabrx = args[1]; - if (!has_spr(cpu, SPR_DABR) || !has_spr(cpu, SPR_DABRX)) { + if (!ppc_has_spr(cpu, SPR_DABR) || !ppc_has_spr(cpu, SPR_DABRX)) { return H_HARDWARE; } @@ -1395,7 +827,13 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, return H_P4; } - if (mflags == AIL_RESERVED) { + if (mflags == 1) { + /* AIL=1 is reserved in POWER8/POWER9/POWER10 */ + return H_UNSUPPORTED_FLAG; + } + + if (mflags == 2 && (pcc->insns_flags2 & PPC2_ISA310)) { + /* AIL=2 is reserved in POWER10 (ISA v3.1) */ return H_UNSUPPORTED_FLAG; } @@ -1755,21 +1193,17 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest); spapr_ovec_cleanup(ov5_guest); - if (guest_radix) { - if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) { - error_report("Guest requested unavailable MMU mode (radix)."); - exit(EXIT_FAILURE); - } - } else { - if (kvm_enabled() && kvmppc_has_cap_mmu_radix() - && !kvmppc_has_cap_mmu_hash_v3()) { - error_report("Guest requested unavailable MMU mode (hash)."); - exit(EXIT_FAILURE); - } - } + spapr_check_mmu_mode(guest_radix); + spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); spapr_ovec_cleanup(ov1_guest); + /* + * Check for NUMA affinity conditions now that we know which NUMA + * affinity the guest will use. + */ + spapr_numa_associativity_check(spapr); + /* * Ensure the guest asks for an interrupt mode we support; * otherwise terminate the boot. @@ -1806,8 +1240,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, spapr_setup_hpt(spapr); } - fdt = spapr_build_fdt(spapr, false, fdt_bufsize); - + fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize); g_free(spapr->fdt_blob); spapr->fdt_size = fdt_totalsize(fdt); spapr->fdt_initial_size = spapr->fdt_size; @@ -1850,6 +1283,25 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, return ret; } +target_ulong spapr_vof_client_architecture_support(MachineState *ms, + CPUState *cs, + target_ulong ovec_addr) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr, + ovec_addr, FDT_MAX_SIZE); + + /* + * This adds stdout and generates phandles for boottime and CAS FDTs. + * It is alright to update the FDT here as do_client_architecture_support() + * does not pack it. + */ + spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob); + + return ret; +} + static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, @@ -1872,6 +1324,8 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR; break; case SPAPR_CAP_FIXED: + behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY; + behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS; break; default: /* broken */ assert(safe_cache == SPAPR_CAP_BROKEN); @@ -2018,16 +1472,34 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, return H_FUNCTION; } -static void hypercall_register_types(void) +#ifndef CONFIG_TCG +static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + g_assert_not_reached(); +} + +static void hypercall_register_softmmu(void) { /* hcall-pft */ - spapr_register_hypercall(H_ENTER, h_enter); - spapr_register_hypercall(H_REMOVE, h_remove); - spapr_register_hypercall(H_PROTECT, h_protect); - spapr_register_hypercall(H_READ, h_read); + spapr_register_hypercall(H_ENTER, h_softmmu); + spapr_register_hypercall(H_REMOVE, h_softmmu); + spapr_register_hypercall(H_PROTECT, h_softmmu); + spapr_register_hypercall(H_READ, h_softmmu); /* hcall-bulk */ - spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); + spapr_register_hypercall(H_BULK_REMOVE, h_softmmu); +} +#else +static void hypercall_register_softmmu(void) +{ + /* DO NOTHING */ +} +#endif + +static void hypercall_register_types(void) +{ + hypercall_register_softmmu(); /* hcall-hpt-resize */ spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare); diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 24537ffcbd3..db010718589 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -25,7 +25,6 @@ #include "kvm_ppc.h" #include "migration/vmstate.h" #include "sysemu/dma.h" -#include "exec/address-spaces.h" #include "trace.h" #include "hw/ppc/spapr.h" diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index 779f18b9943..e9ef7e76469 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -19,25 +19,88 @@ /* Moved from hw/ppc/spapr_pci_nvlink2.c */ #define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) -static bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr) +/* + * Retrieves max_dist_ref_points of the current NUMA affinity. + */ +static int get_max_dist_ref_points(SpaprMachineState *spapr) { - MachineState *machine = MACHINE(spapr); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return FORM2_DIST_REF_POINTS; + } + + return FORM1_DIST_REF_POINTS; +} + +/* + * Retrieves numa_assoc_size of the current NUMA affinity. + */ +static int get_numa_assoc_size(SpaprMachineState *spapr) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return FORM2_NUMA_ASSOC_SIZE; + } + + return FORM1_NUMA_ASSOC_SIZE; +} - return smc->pre_5_2_numa_associativity || - machine->numa_state->num_nodes <= 1; +/* + * Retrieves vcpu_assoc_size of the current NUMA affinity. + * + * vcpu_assoc_size is the size of ibm,associativity array + * for CPUs, which has an extra element (vcpu_id) in the end. + */ +static int get_vcpu_assoc_size(SpaprMachineState *spapr) +{ + return get_numa_assoc_size(spapr) + 1; +} + +/* + * Retrieves the ibm,associativity array of NUMA node 'node_id' + * for the current NUMA affinity. + */ +static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return spapr->FORM2_assoc_array[node_id]; + } + return spapr->FORM1_assoc_array[node_id]; +} + +/* + * Wrapper that returns node distance from ms->numa_state->nodes + * after handling edge cases where the distance might be absent. + */ +static int get_numa_distance(MachineState *ms, int src, int dst) +{ + NodeInfo *numa_info = ms->numa_state->nodes; + int ret = numa_info[src].distance[dst]; + + if (ret != 0) { + return ret; + } + + /* + * In case QEMU adds a default NUMA single node when the user + * did not add any, or where the user did not supply distances, + * the distance will be absent (zero). Return local/remote + * distance in this case. + */ + if (src == dst) { + return NUMA_DISTANCE_MIN; + } + + return NUMA_DISTANCE_DEFAULT; } static bool spapr_numa_is_symmetrical(MachineState *ms) { - int src, dst; int nb_numa_nodes = ms->numa_state->num_nodes; - NodeInfo *numa_info = ms->numa_state->nodes; + int src, dst; for (src = 0; src < nb_numa_nodes; src++) { for (dst = src; dst < nb_numa_nodes; dst++) { - if (numa_info[src].distance[dst] != - numa_info[dst].distance[src]) { + if (get_numa_distance(ms, src, dst) != + get_numa_distance(ms, dst, src)) { return false; } } @@ -92,12 +155,22 @@ static uint8_t spapr_numa_get_numa_level(uint8_t distance) return 0; } -static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr) +static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr) { MachineState *ms = MACHINE(spapr); - NodeInfo *numa_info = ms->numa_state->nodes; int nb_numa_nodes = ms->numa_state->num_nodes; - int src, dst, i; + int src, dst, i, j; + + /* + * Fill all associativity domains of non-zero NUMA nodes with + * node_id. This is required because the default value (0) is + * considered a match with associativity domains of node 0. + */ + for (i = 1; i < nb_numa_nodes; i++) { + for (j = 1; j < FORM1_DIST_REF_POINTS; j++) { + spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i); + } + } for (src = 0; src < nb_numa_nodes; src++) { for (dst = src; dst < nb_numa_nodes; dst++) { @@ -121,7 +194,7 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr) * The PPC kernel expects the associativity domains of node 0 to * be always 0, and this algorithm will grant that by default. */ - uint8_t distance = numa_info[src].distance[dst]; + uint8_t distance = get_numa_distance(ms, src, dst); uint8_t n_level = spapr_numa_get_numa_level(distance); uint32_t assoc_src; @@ -132,7 +205,7 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr) * * The Linux kernel will assume that the distance between src and * dst, in this case of no match, is 10 (local distance) doubled - * for each NUMA it didn't match. We have MAX_DISTANCE_REF_POINTS + * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS * levels (4), so this gives us 10*2*2*2*2 = 160. * * This logic can be seen in the Linux kernel source code, as of @@ -147,25 +220,69 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr) * and going up to 0x1. */ for (i = n_level; i > 0; i--) { - assoc_src = spapr->numa_assoc_array[src][i]; - spapr->numa_assoc_array[dst][i] = assoc_src; + assoc_src = spapr->FORM1_assoc_array[src][i]; + spapr->FORM1_assoc_array[dst][i] = assoc_src; } } } } -void spapr_numa_associativity_init(SpaprMachineState *spapr, - MachineState *machine) +static void spapr_numa_FORM1_affinity_check(MachineState *machine) +{ + int i; + + /* + * Check we don't have a memory-less/cpu-less NUMA node + * Firmware relies on the existing memory/cpu topology to provide the + * NUMA topology to the kernel. + * And the linux kernel needs to know the NUMA topology at start + * to be able to hotplug CPUs later. + */ + if (machine->numa_state->num_nodes) { + for (i = 0; i < machine->numa_state->num_nodes; ++i) { + /* check for memory-less node */ + if (machine->numa_state->nodes[i].node_mem == 0) { + CPUState *cs; + int found = 0; + /* check for cpu-less node */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + if (cpu->node_id == i) { + found = 1; + break; + } + } + /* memory-less and cpu-less node */ + if (!found) { + error_report( +"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i); + exit(EXIT_FAILURE); + } + } + } + } + + if (!spapr_numa_is_symmetrical(machine)) { + error_report( +"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA"); + exit(EXIT_FAILURE); + } +} + +/* + * Set NUMA machine state data based on FORM1 affinity semantics. + */ +static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr, + MachineState *machine) { SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); int nb_numa_nodes = machine->numa_state->num_nodes; int i, j, max_nodes_with_gpus; - bool using_legacy_numa = spapr_machine_using_legacy_numa(spapr); /* * For all associativity arrays: first position is the size, - * position MAX_DISTANCE_REF_POINTS is always the numa_id, + * position FORM1_DIST_REF_POINTS is always the numa_id, * represented by the index 'i'. * * This will break on sparse NUMA setups, when/if QEMU starts @@ -173,19 +290,8 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr, * 'i' will be a valid node_id set by the user. */ for (i = 0; i < nb_numa_nodes; i++) { - spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); - spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); - - /* - * Fill all associativity domains of non-zero NUMA nodes with - * node_id. This is required because the default value (0) is - * considered a match with associativity domains of node 0. - */ - if (!using_legacy_numa && i != 0) { - for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { - spapr->numa_assoc_array[i][j] = cpu_to_be32(i); - } - } + spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS); + spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i); } /* @@ -199,47 +305,95 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr, max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { - spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS); - for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { + for (j = 1; j < FORM1_DIST_REF_POINTS; j++) { uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? SPAPR_GPU_NUMA_ID : cpu_to_be32(i); - spapr->numa_assoc_array[i][j] = gpu_assoc; + spapr->FORM1_assoc_array[i][j] = gpu_assoc; } - spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); + spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i); } /* - * Legacy NUMA guests (pseries-5.1 and older, or guests with only - * 1 NUMA node) will not benefit from anything we're going to do - * after this point. + * Guests pseries-5.1 and older uses zeroed associativity domains, + * i.e. no domain definition based on NUMA distance input. + * + * Same thing with guests that have only one NUMA node. */ - if (using_legacy_numa) { + if (smc->pre_5_2_numa_associativity || + machine->numa_state->num_nodes <= 1) { return; } - if (!spapr_numa_is_symmetrical(machine)) { - error_report("Asymmetrical NUMA topologies aren't supported " - "in the pSeries machine"); - exit(EXIT_FAILURE); + spapr_numa_define_FORM1_domains(spapr); +} + +/* + * Init NUMA FORM2 machine state data + */ +static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr) +{ + int i; + + /* + * For all resources but CPUs, FORM2 associativity arrays will + * be a size 2 array with the following format: + * + * ibm,associativity = {1, numa_id} + * + * CPUs will write an additional 'vcpu_id' on top of the arrays + * being initialized here. 'numa_id' is represented by the + * index 'i' of the loop. + * + * Given that this initialization is also valid for GPU associativity + * arrays, handle everything in one single step by populating the + * arrays up to NUMA_NODES_MAX_NUM. + */ + for (i = 0; i < NUMA_NODES_MAX_NUM; i++) { + spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1); + spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i); + } +} + +void spapr_numa_associativity_init(SpaprMachineState *spapr, + MachineState *machine) +{ + spapr_numa_FORM1_affinity_init(spapr, machine); + spapr_numa_FORM2_affinity_init(spapr); +} + +void spapr_numa_associativity_check(SpaprMachineState *spapr) +{ + /* + * FORM2 does not have any restrictions we need to handle + * at CAS time, for now. + */ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return; } - spapr_numa_define_associativity_domains(spapr); + spapr_numa_FORM1_affinity_check(MACHINE(spapr)); } void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, int offset, int nodeid) { + const uint32_t *associativity = get_associativity(spapr, nodeid); + _FDT((fdt_setprop(fdt, offset, "ibm,associativity", - spapr->numa_assoc_array[nodeid], - sizeof(spapr->numa_assoc_array[nodeid])))); + associativity, + get_numa_assoc_size(spapr) * sizeof(uint32_t)))); } static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr, PowerPCCPU *cpu) { - uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE); + const uint32_t *associativity = get_associativity(spapr, cpu->node_id); + int max_distance_ref_points = get_max_dist_ref_points(spapr); + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); + uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size); int index = spapr_get_vcpu_id(cpu); /* @@ -248,10 +402,10 @@ static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr, * 0, put cpu_id last, then copy the remaining associativity * domains. */ - vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1); - vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index); - memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1, - (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t)); + vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1); + vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index); + memcpy(vcpu_assoc + 1, associativity + 1, + (vcpu_assoc_size - 2) * sizeof(uint32_t)); return vcpu_assoc; } @@ -260,12 +414,13 @@ int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, int offset, PowerPCCPU *cpu) { g_autofree uint32_t *vcpu_assoc = NULL; + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu); /* Advertise NUMA via ibm,associativity */ return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc, - VCPU_ASSOC_SIZE * sizeof(uint32_t)); + vcpu_assoc_size * sizeof(uint32_t)); } @@ -273,27 +428,28 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, int offset) { MachineState *machine = MACHINE(spapr); + int max_distance_ref_points = get_max_dist_ref_points(spapr); int nb_numa_nodes = machine->numa_state->num_nodes; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; uint32_t *int_buf, *cur_index, buf_len; int ret, i; /* ibm,associativity-lookup-arrays */ - buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t); + buf_len = (nr_nodes * max_distance_ref_points + 2) * sizeof(uint32_t); cur_index = int_buf = g_malloc0(buf_len); int_buf[0] = cpu_to_be32(nr_nodes); /* Number of entries per associativity list */ - int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + int_buf[1] = cpu_to_be32(max_distance_ref_points); cur_index += 2; for (i = 0; i < nr_nodes; i++) { /* - * For the lookup-array we use the ibm,associativity array, - * from numa_assoc_array. without the first element (size). + * For the lookup-array we use the ibm,associativity array of the + * current NUMA affinity, without the first element (size). */ - uint32_t *associativity = spapr->numa_assoc_array[i]; + const uint32_t *associativity = get_associativity(spapr, i); memcpy(cur_index, ++associativity, - sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS); - cur_index += MAX_DISTANCE_REF_POINTS; + sizeof(uint32_t) * max_distance_ref_points); + cur_index += max_distance_ref_points; } ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, (cur_index - int_buf) * sizeof(uint32_t)); @@ -302,12 +458,8 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, return ret; } -/* - * Helper that writes ibm,associativity-reference-points and - * max-associativity-domains in the RTAS pointed by @rtas - * in the DT @fdt. - */ -void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) +static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr, + void *fdt, int rtas) { MachineState *ms = MACHINE(spapr); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); @@ -329,7 +481,8 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) cpu_to_be32(maxdomain) }; - if (spapr_machine_using_legacy_numa(spapr)) { + if (smc->pre_5_2_numa_associativity || + ms->numa_state->num_nodes <= 1) { uint32_t legacy_refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4), @@ -365,6 +518,113 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) maxdomains, sizeof(maxdomains))); } +static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr, + void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + int nb_numa_nodes = ms->numa_state->num_nodes; + int distance_table_entries = nb_numa_nodes * nb_numa_nodes; + g_autofree uint32_t *lookup_index_table = NULL; + g_autofree uint8_t *distance_table = NULL; + int src, dst, i, distance_table_size; + + /* + * ibm,numa-lookup-index-table: array with length and a + * list of NUMA ids present in the guest. + */ + lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1); + lookup_index_table[0] = cpu_to_be32(nb_numa_nodes); + + for (i = 0; i < nb_numa_nodes; i++) { + lookup_index_table[i + 1] = cpu_to_be32(i); + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table", + lookup_index_table, + (nb_numa_nodes + 1) * sizeof(uint32_t))); + + /* + * ibm,numa-distance-table: contains all node distances. First + * element is the size of the table as uint32, followed up + * by all the uint8 distances from the first NUMA node, then all + * distances from the second NUMA node and so on. + * + * ibm,numa-lookup-index-table is used by guest to navigate this + * array because NUMA ids can be sparse (node 0 is the first, + * node 8 is the second ...). + */ + distance_table_size = distance_table_entries * sizeof(uint8_t) + + sizeof(uint32_t); + distance_table = g_new0(uint8_t, distance_table_size); + stl_be_p(distance_table, distance_table_entries); + + /* Skip the uint32_t array length at the start */ + i = sizeof(uint32_t); + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + distance_table[i++] = get_numa_distance(ms, src, dst); + } + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table", + distance_table, distance_table_size)); +} + +/* + * This helper could be compressed in a single function with + * FORM1 logic since we're setting the same DT values, with the + * difference being a call to spapr_numa_FORM2_write_rtas_tables() + * in the end. The separation was made to avoid clogging FORM1 code + * which already has to deal with compat modes from previous + * QEMU machine types. + */ +static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr, + void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + uint32_t number_nvgpus_nodes = spapr->gpu_numa_id - + spapr_numa_initial_nvgpu_numa_id(ms); + + /* + * In FORM2, ibm,associativity-reference-points will point to + * the element in the ibm,associativity array that contains the + * primary domain index (for FORM2, the first element). + * + * This value (in our case, the numa-id) is then used as an index + * to retrieve all other attributes of the node (distance, + * bandwidth, latency) via ibm,numa-lookup-index-table and other + * ibm,numa-*-table properties. + */ + uint32_t refpoints[] = { cpu_to_be32(1) }; + + uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes; + uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) }; + + _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", + refpoints, sizeof(refpoints))); + + _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", + maxdomains, sizeof(maxdomains))); + + spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas); +} + +/* + * Helper that writes ibm,associativity-reference-points and + * max-associativity-domains in the RTAS pointed by @rtas + * in the DT @fdt. + */ +void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas); + return; + } + + spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas); +} + static target_ulong h_home_node_associativity(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, @@ -375,6 +635,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, target_ulong procno = args[1]; PowerPCCPU *tcpu; int idx, assoc_idx; + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); /* only support procno from H_REGISTER_VPA */ if (flags != 0x1) { @@ -393,7 +654,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, * 12 associativity domains for vcpus. Assert and bail if that's * not the case. */ - G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12); + g_assert((vcpu_assoc_size - 1) <= 12); vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu); /* assoc_idx starts at 1 to skip associativity size */ @@ -414,9 +675,9 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, * macro. The ternary will fill the remaining registers with -1 * after we went through vcpu_assoc[]. */ - a = assoc_idx < VCPU_ASSOC_SIZE ? + a = assoc_idx < vcpu_assoc_size ? be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; - b = assoc_idx < VCPU_ASSOC_SIZE ? + b = assoc_idx < vcpu_assoc_size ? be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; args[idx] = ASSOCIATIVITY(a, b); diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index b46c36917c9..91de1052f23 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -31,6 +31,22 @@ #include "qemu/range.h" #include "hw/ppc/spapr_numa.h" +/* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED PPC_BIT(0) + +/* + * The nvdimm size should be aligned to SCM block size. + * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE + * in order to have SCM regions not to overlap with dimm memory regions. + * The SCM devices can have variable block sizes. For now, fixing the + * block size to the minimum value. + */ +#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE + +/* Have an explicit check for alignment */ +QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); + bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, uint64_t size, Error **errp) { @@ -159,11 +175,11 @@ int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt) { - int offset = fdt_subnode_offset(fdt, 0, "persistent-memory"); + int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory"); GSList *iter, *nvdimms = nvdimm_get_device_list(); if (offset < 0) { - offset = fdt_add_subnode(fdt, 0, "persistent-memory"); + offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory"); _FDT(offset); _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); @@ -467,6 +483,37 @@ static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr, return H_SUCCESS; } +static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + + NVDIMMDevice *nvdimm; + uint64_t hbitmap = 0; + uint32_t drc_index = args[0]; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED; + + + /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */ + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + nvdimm = NVDIMM(drc->dev); + + /* Update if the nvdimm is unarmed and send its status via health bitmaps */ + if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) { + hbitmap |= PAPR_PMEM_UNARMED; + } + + /* Update the out args with health bitmap/mask */ + args[0] = hbitmap; + args[1] = hbitmap_mask; + + return H_SUCCESS; +} + static void spapr_scm_register_types(void) { /* qemu/scm specific hcalls */ @@ -475,6 +522,7 @@ static void spapr_scm_register_types(void) spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem); spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem); spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all); + spapr_register_hypercall(H_SCM_HEALTH, h_scm_health); } type_init(spapr_scm_register_types) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index feba18cb126..5bfd4aa9e5a 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -25,7 +25,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "hw/irq.h" #include "hw/sysbus.h" #include "migration/vmstate.h" @@ -35,7 +34,6 @@ #include "hw/pci/pci_host.h" #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" -#include "exec/address-spaces.h" #include "exec/ram_addr.h" #include #include "trace.h" @@ -784,33 +782,29 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) { - char *path = NULL, *buf = NULL, *host = NULL; + g_autofree char *path = NULL; + g_autofree char *host = NULL; + g_autofree char *devspec = NULL; + char *buf = NULL; /* Get the PCI VFIO host id */ host = object_property_get_str(OBJECT(pdev), "host", NULL); if (!host) { - goto err_out; + return NULL; } /* Construct the path of the file that will give us the DT location */ path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host); - g_free(host); - if (!g_file_get_contents(path, &buf, NULL, NULL)) { - goto err_out; + if (!g_file_get_contents(path, &devspec, NULL, NULL)) { + return NULL; } - g_free(path); /* Construct and read from host device tree the loc-code */ - path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf); - g_free(buf); + path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", devspec); if (!g_file_get_contents(path, &buf, NULL, NULL)) { - goto err_out; + return NULL; } return buf; - -err_out: - g_free(path); - return NULL; } static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) @@ -1323,8 +1317,7 @@ static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, RESOURCE_CELLS_SIZE)); assert(bus); - pci_for_each_device_reverse(bus, pci_bus_num(bus), - spapr_dt_pci_device_cb, &cbinfo); + pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo); if (cbinfo.err) { return cbinfo.err; } @@ -2312,8 +2305,8 @@ static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, return; } - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - spapr_phb_pci_enumerate_bridge, bus_no); + pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge, + bus_no); pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); } @@ -2322,9 +2315,8 @@ static void spapr_phb_pci_enumerate(SpaprPhbState *phb) PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; unsigned int bus_no = 0; - pci_for_each_device(bus, pci_bus_num(bus), - spapr_phb_pci_enumerate_bridge, - &bus_no); + pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge, + &bus_no); } diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c index 8ef9b40a18d..7fb0cf4d044 100644 --- a/hw/ppc/spapr_pci_nvlink2.c +++ b/hw/ppc/spapr_pci_nvlink2.c @@ -164,8 +164,7 @@ static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev, return; } - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - spapr_phb_pci_collect_nvgpu, opaque); + pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_collect_nvgpu, opaque); } void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) @@ -183,8 +182,8 @@ void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr; bus = PCI_HOST_BRIDGE(sphb)->bus; - pci_for_each_device(bus, pci_bus_num(bus), - spapr_phb_pci_collect_nvgpu, sphb->nvgpus); + pci_for_each_device_under_bus(bus, spapr_phb_pci_collect_nvgpu, + sphb->nvgpus); if (sphb->nvgpus->err) { error_propagate(errp, sphb->nvgpus->err); diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index e0547b17408..2a76b4e0b51 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include -#include "cpu.h" #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" #include "hw/pci/msix.h" @@ -47,6 +46,16 @@ void spapr_phb_vfio_reset(DeviceState *qdev) spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); } +static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + bool *found = opaque; + + if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + *found = true; + } +} + int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, unsigned int addr, int option) { @@ -59,17 +68,33 @@ int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, break; case RTAS_EEH_ENABLE: { PCIHostState *phb; - PCIDevice *pdev; + bool found = false; /* - * The EEH functionality is enabled on basis of PCI device, - * instead of PE. We need check the validity of the PCI - * device address. + * The EEH functionality is enabled per sphb level instead of + * per PCI device. We have already identified this specific sphb + * based on buid passed as argument to ibm,set-eeh-option rtas + * call. Now we just need to check the validity of the PCI + * pass-through devices (vfio-pci) under this sphb bus. + * We have already validated that all the devices under this sphb + * are from same iommu group (within same PE) before comming here. + * + * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: + * Rework device EEH PE determination") kernel would call + * eeh-set-option for each device in the PE using the device's + * config_address as the argument rather than the PE address. + * Hence if we check validity of supplied config_addr whether + * it matches to this PHB will cause issues with older kernel + * versions v5.9 and older. If we return an error from + * eeh-set-option when the argument isn't a valid PE address + * then older kernels (v5.9 and older) will interpret that as + * EEH not being supported. */ phb = PCI_HOST_BRIDGE(sphb); - pdev = pci_find_device(phb->bus, - (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); - if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, + spapr_eeh_pci_find_device, &found); + + if (!found) { return RTAS_OUT_PARAM_ERROR; } @@ -139,8 +164,8 @@ static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) { - pci_for_each_device(bus, pci_bus_num(bus), - spapr_phb_vfio_eeh_clear_dev_msix, NULL); + pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix, + NULL); } static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb) diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c index d14800e9def..df5c4b96873 100644 --- a/hw/ppc/spapr_rng.c +++ b/hw/ppc/spapr_rng.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/module.h" diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 8a79f9c6289..b476382ae6f 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -26,7 +26,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "qemu/log.h" #include "qemu/error-report.h" #include "sysemu/sysemu.h" @@ -41,7 +40,6 @@ #include "hw/ppc/spapr_rtas.h" #include "hw/ppc/spapr_cpu_core.h" #include "hw/ppc/ppc.h" -#include "hw/boards.h" #include #include "hw/ppc/spapr_drc.h" @@ -51,6 +49,7 @@ #include "target/ppc/mmu-hash64.h" #include "target/ppc/mmu-book3s-v3.h" #include "migration/blocker.h" +#include "helper_regs.h" static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, @@ -133,8 +132,8 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, target_ulong id, start, r3; PowerPCCPU *newcpu; CPUPPCState *env; - PowerPCCPUClass *pcc; target_ulong lpcr; + target_ulong caller_lpcr; if (nargs != 3 || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -153,7 +152,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, } env = &newcpu->env; - pcc = POWERPC_CPU_GET_CLASS(newcpu); if (!CPU(newcpu)->halted) { rtas_st(rets, 0, RTAS_OUT_HW_ERROR); @@ -163,12 +161,17 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, cpu_synchronize_state(CPU(newcpu)); env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); + hreg_compute_hflags(env); - /* Enable Power-saving mode Exit Cause exceptions for the new CPU */ + caller_lpcr = callcpu->env.spr[SPR_LPCR]; lpcr = env->spr[SPR_LPCR]; - if (!pcc->interrupts_big_endian(callcpu)) { - lpcr |= LPCR_ILE; - } + + /* Set ILE the same way */ + lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE); + + /* Set AIL the same way */ + lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL); + if (env->mmu_model == POWERPC_MMU_3_00) { /* * New cpus are expected to start in the same radix/hash mode diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c index 3501b058199..3e826e1308c 100644 --- a/hw/ppc/spapr_rtas_ddw.c +++ b/hw/ppc/spapr_rtas_ddw.c @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "hw/ppc/spapr.h" diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index 68cfc578a3a..fba4dfca358 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -27,7 +27,6 @@ #include "qemu/osdep.h" #include "qemu-common.h" -#include "cpu.h" #include "qemu/timer.h" #include "sysemu/sysemu.h" #include "hw/ppc/spapr.h" diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c new file mode 100644 index 00000000000..4ee03c83e48 --- /dev/null +++ b/hw/ppc/spapr_softmmu.c @@ -0,0 +1,612 @@ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "cpu.h" +#include "helper_regs.h" +#include "hw/ppc/spapr.h" +#include "mmu-hash64.h" +#include "mmu-book3s-v3.h" + +static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) +{ + /* + * hash value/pteg group index is normalized by HPT mask + */ + if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { + return false; + } + return true; +} + +static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong pteh = args[2]; + target_ulong ptel = args[3]; + unsigned apshift; + target_ulong raddr; + target_ulong slot; + const ppc_hash_pte64_t *hptes; + + apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); + if (!apshift) { + /* Bad page size encoding */ + return H_PARAMETER; + } + + raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1); + + if (is_ram_address(spapr, raddr)) { + /* Regular RAM - should have WIMG=0010 */ + if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { + return H_PARAMETER; + } + } else { + target_ulong wimg_flags; + /* Looks like an IO address */ + /* FIXME: What WIMG combinations could be sensible for IO? + * For now we allow WIMG=010x, but are there others? */ + /* FIXME: Should we check against registered IO addresses? */ + wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); + + if (wimg_flags != HPTE64_R_I && + wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { + return H_PARAMETER; + } + } + + pteh &= ~0x60ULL; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + slot = ptex & 7ULL; + ptex = ptex & ~7ULL; + + if (likely((flags & H_EXACT) == 0)) { + hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + for (slot = 0; slot < 8; slot++) { + if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { + break; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + if (slot == 8) { + return H_PTEG_FULL; + } + } else { + hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); + if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); + return H_PTEG_FULL; + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + } + + spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); + + args[0] = ptex + slot; + return H_SUCCESS; +} + +typedef enum { + REMOVE_SUCCESS = 0, + REMOVE_NOT_FOUND = 1, + REMOVE_PARM = 2, + REMOVE_HW = 3, +} RemoveResult; + +static RemoveResult remove_hpte(PowerPCCPU *cpu + , target_ulong ptex, + target_ulong avpn, + target_ulong flags, + target_ulong *vp, target_ulong *rp) +{ + const ppc_hash_pte64_t *hptes; + target_ulong v, r; + + if (!valid_ptex(cpu, ptex)) { + return REMOVE_PARM; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + + if ((v & HPTE64_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || + ((flags & H_ANDCOND) && (v & avpn) != 0)) { + return REMOVE_NOT_FOUND; + } + *vp = v; + *rp = r; + spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0); + ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); + return REMOVE_SUCCESS; +} + +static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong avpn = args[2]; + RemoveResult ret; + + ret = remove_hpte(cpu, ptex, avpn, flags, + &args[0], &args[1]); + + switch (ret) { + case REMOVE_SUCCESS: + check_tlb_flush(env, true); + return H_SUCCESS; + + case REMOVE_NOT_FOUND: + return H_NOT_FOUND; + + case REMOVE_PARM: + return H_PARAMETER; + + case REMOVE_HW: + return H_HARDWARE; + } + + g_assert_not_reached(); +} + +#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL +#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL +#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL +#define H_BULK_REMOVE_END 0xc000000000000000ULL +#define H_BULK_REMOVE_CODE 0x3000000000000000ULL +#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL +#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL +#define H_BULK_REMOVE_PARM 0x2000000000000000ULL +#define H_BULK_REMOVE_HW 0x3000000000000000ULL +#define H_BULK_REMOVE_RC 0x0c00000000000000ULL +#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL +#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL +#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL +#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL +#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL + +#define H_BULK_REMOVE_MAX_BATCH 4 + +static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + int i; + target_ulong rc = H_SUCCESS; + + for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { + target_ulong *tsh = &args[i*2]; + target_ulong tsl = args[i*2 + 1]; + target_ulong v, r, ret; + + if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { + break; + } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) { + return H_PARAMETER; + } + + *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; + *tsh |= H_BULK_REMOVE_RESPONSE; + + if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) { + *tsh |= H_BULK_REMOVE_PARM; + return H_PARAMETER; + } + + ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl, + (*tsh & H_BULK_REMOVE_FLAGS) >> 26, + &v, &r); + + *tsh |= ret << 60; + + switch (ret) { + case REMOVE_SUCCESS: + *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43; + break; + + case REMOVE_PARM: + rc = H_PARAMETER; + goto exit; + + case REMOVE_HW: + rc = H_HARDWARE; + goto exit; + } + } + exit: + check_tlb_flush(env, true); + + return rc; +} + +static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong avpn = args[2]; + const ppc_hash_pte64_t *hptes; + target_ulong v, r; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + + if ((v & HPTE64_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { + return H_NOT_FOUND; + } + + r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N | + HPTE64_R_KEY_HI | HPTE64_R_KEY_LO); + r |= (flags << 55) & HPTE64_R_PP0; + r |= (flags << 48) & HPTE64_R_KEY_HI; + r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); + spapr_store_hpte(cpu, ptex, + (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); + ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); + /* Flush the tlb */ + check_tlb_flush(env, true); + /* Don't need a memory barrier, due to qemu's global lock */ + spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); + return H_SUCCESS; +} + +static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + int i, ridx, n_entries = 1; + const ppc_hash_pte64_t *hptes; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + if (flags & H_READ_4) { + /* Clear the two low order bits */ + ptex &= ~(3ULL); + n_entries = 4; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries); + for (i = 0, ridx = 0; i < n_entries; i++) { + args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i); + args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i); + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries); + + return H_SUCCESS; +} + +struct SpaprPendingHpt { + /* These fields are read-only after initialization */ + int shift; + QemuThread thread; + + /* These fields are protected by the BQL */ + bool complete; + + /* These fields are private to the preparation thread if + * !complete, otherwise protected by the BQL */ + int ret; + void *hpt; +}; + +static void free_pending_hpt(SpaprPendingHpt *pending) +{ + if (pending->hpt) { + qemu_vfree(pending->hpt); + } + + g_free(pending); +} + +static void *hpt_prepare_thread(void *opaque) +{ + SpaprPendingHpt *pending = opaque; + size_t size = 1ULL << pending->shift; + + pending->hpt = qemu_try_memalign(size, size); + if (pending->hpt) { + memset(pending->hpt, 0, size); + pending->ret = H_SUCCESS; + } else { + pending->ret = H_NO_MEM; + } + + qemu_mutex_lock_iothread(); + + if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { + /* Ready to go */ + pending->complete = true; + } else { + /* We've been cancelled, clean ourselves up */ + free_pending_hpt(pending); + } + + qemu_mutex_unlock_iothread(); + return NULL; +} + +/* Must be called with BQL held */ +static void cancel_hpt_prepare(SpaprMachineState *spapr) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + + /* Let the thread know it's cancelled */ + spapr->pending_hpt = NULL; + + if (!pending) { + /* Nothing to do */ + return; + } + + if (!pending->complete) { + /* thread will clean itself up */ + return; + } + + free_pending_hpt(pending); +} + +target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong shift) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + + if (pending) { + /* something already in progress */ + if (pending->shift == shift) { + /* and it's suitable */ + if (pending->complete) { + return pending->ret; + } else { + return H_LONG_BUSY_ORDER_100_MSEC; + } + } + + /* not suitable, cancel and replace */ + cancel_hpt_prepare(spapr); + } + + if (!shift) { + /* nothing to do */ + return H_SUCCESS; + } + + /* start new prepare */ + + pending = g_new0(SpaprPendingHpt, 1); + pending->shift = shift; + pending->ret = H_HARDWARE; + + qemu_thread_create(&pending->thread, "sPAPR HPT prepare", + hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); + + spapr->pending_hpt = pending; + + /* In theory we could estimate the time more accurately based on + * the new size, but there's not much point */ + return H_LONG_BUSY_ORDER_100_MSEC; +} + +static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + return ldq_p(addr); +} + +static void new_hpte_store(void *htab, uint64_t pteg, int slot, + uint64_t pte0, uint64_t pte1) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + + stq_p(addr, pte0); + stq_p(addr + HPTE64_DW1, pte1); +} + +static int rehash_hpte(PowerPCCPU *cpu, + const ppc_hash_pte64_t *hptes, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize, + uint64_t pteg, int slot) +{ + uint64_t old_hash_mask = (oldsize >> 7) - 1; + uint64_t new_hash_mask = (newsize >> 7) - 1; + target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); + target_ulong pte1; + uint64_t avpn; + unsigned base_pg_shift; + uint64_t hash, new_pteg, replace_pte0; + + if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { + return H_SUCCESS; + } + + pte1 = ppc_hash64_hpte1(cpu, hptes, slot); + + base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); + assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ + avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); + + if (pte0 & HPTE64_V_SECONDARY) { + pteg = ~pteg; + } + + if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { + uint64_t offset, vsid; + + /* We only have 28 - 23 bits of offset in avpn */ + offset = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (base_pg_shift < 23) { + offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; + } + + hash = vsid ^ (offset >> base_pg_shift); + } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { + uint64_t offset, vsid; + + /* We only have 40 - 23 bits of seg_off in avpn */ + offset = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (base_pg_shift < 23) { + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) + << base_pg_shift; + } + + hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); + } else { + error_report("rehash_pte: Bad segment size in HPTE"); + return H_HARDWARE; + } + + new_pteg = hash & new_hash_mask; + if (pte0 & HPTE64_V_SECONDARY) { + assert(~pteg == (hash & old_hash_mask)); + new_pteg = ~new_pteg; + } else { + assert(pteg == (hash & old_hash_mask)); + } + assert((oldsize != newsize) || (pteg == new_pteg)); + replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); + /* + * Strictly speaking, we don't need all these tests, since we only + * ever rehash bolted HPTEs. We might in future handle non-bolted + * HPTEs, though so make the logic correct for those cases as + * well. + */ + if (replace_pte0 & HPTE64_V_VALID) { + assert(newsize < oldsize); + if (replace_pte0 & HPTE64_V_BOLTED) { + if (pte0 & HPTE64_V_BOLTED) { + /* Bolted collision, nothing we can do */ + return H_PTEG_FULL; + } else { + /* Discard this hpte */ + return H_SUCCESS; + } + } + } + + new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); + return H_SUCCESS; +} + +static int rehash_hpt(PowerPCCPU *cpu, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize) +{ + uint64_t n_ptegs = oldsize >> 7; + uint64_t pteg; + int slot; + int rc; + + for (pteg = 0; pteg < n_ptegs; pteg++) { + hwaddr ptex = pteg * HPTES_PER_GROUP; + const ppc_hash_pte64_t *hptes + = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + + if (!hptes) { + return H_HARDWARE; + } + + for (slot = 0; slot < HPTES_PER_GROUP; slot++) { + rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, + pteg, slot); + if (rc != H_SUCCESS) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + return rc; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + } + + return H_SUCCESS; +} + +target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong flags, + target_ulong shift) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + int rc; + size_t newsize; + + if (flags != 0) { + return H_PARAMETER; + } + + if (!pending || (pending->shift != shift)) { + /* no matching prepare */ + return H_CLOSED; + } + + if (!pending->complete) { + /* prepare has not completed */ + return H_BUSY; + } + + /* Shouldn't have got past PREPARE without an HPT */ + g_assert(spapr->htab_shift); + + newsize = 1ULL << pending->shift; + rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), + pending->hpt, newsize); + if (rc == H_SUCCESS) { + qemu_vfree(spapr->htab); + spapr->htab = pending->hpt; + spapr->htab_shift = pending->shift; + + push_sregs_to_kvm_pr(spapr); + + pending->hpt = NULL; /* so it's not free()d */ + } + + /* Clean up */ + spapr->pending_hpt = NULL; + free_pending_hpt(pending); + + return rc; +} + +static void hypercall_register_types(void) +{ + /* hcall-pft */ + spapr_register_hypercall(H_ENTER, h_enter); + spapr_register_hypercall(H_REMOVE, h_remove); + spapr_register_hypercall(H_PROTECT, h_protect); + spapr_register_hypercall(H_READ, h_read); + + /* hcall-bulk */ + spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); + +} + +type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c index a01f81f9e04..2454086744b 100644 --- a/hw/ppc/spapr_tpm_proxy.c +++ b/hw/ppc/spapr_tpm_proxy.c @@ -15,7 +15,6 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/reset.h" -#include "cpu.h" #include "hw/ppc/spapr.h" #include "hw/qdev-properties.h" #include "trace.h" diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index ef06e0362c8..b975ed29cad 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -310,7 +310,7 @@ int spapr_vio_send_crq(SpaprVioDevice *dev, uint8_t *crq) static void spapr_vio_quiesce_one(SpaprVioDevice *dev) { if (dev->tcet) { - device_legacy_reset(DEVICE(dev->tcet)); + device_cold_reset(DEVICE(dev->tcet)); } free_crq(dev); } @@ -577,7 +577,7 @@ SpaprVioBus *spapr_vio_bus_init(void) sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); /* Create bus on bridge device */ - qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); + qbus = qbus_new(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); bus = SPAPR_VIO_BUS(qbus); bus->next_reg = SPAPR_VIO_REG_BASE; diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c new file mode 100644 index 00000000000..40ce8fe0037 --- /dev/null +++ b/hw/ppc/spapr_vof.c @@ -0,0 +1,167 @@ +/* + * SPAPR machine hooks to Virtual Open Firmware, + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/vof.h" +#include "sysemu/sysemu.h" +#include "qom/qom-qobject.h" +#include "trace.h" + +target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *_args) +{ + int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob, + ppc64_phys_to_real(_args[0])); + + if (ret) { + return H_PARAMETER; + } + return H_SUCCESS; +} + +void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt) +{ + char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); + + vof_build_dt(fdt, spapr->vof); + + if (spapr->vof->bootargs) { + int chosen; + + _FDT(chosen = fdt_path_offset(fdt, "/chosen")); + /* + * If the client did not change "bootargs", spapr_dt_chosen() must have + * stored machine->kernel_cmdline in it before getting here. + */ + _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs)); + } + + /* + * SLOF-less setup requires an open instance of stdout for early + * kernel printk. By now all phandles are settled so we can open + * the default serial console. + */ + if (stdout_path) { + _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout", + stdout_path)); + } +} + +void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp) +{ + target_ulong stack_ptr; + Vof *vof = spapr->vof; + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + + vof_init(vof, spapr->rma_size, errp); + + stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE); + if (stack_ptr == -1) { + error_setg(errp, "Memory allocation for stack failed"); + return; + } + /* Stack grows downwards plus reserve space for the minimum stack frame */ + stack_ptr += VOF_STACK_SIZE - 0x20; + + if (spapr->kernel_size && + vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) { + error_setg(errp, "Memory for kernel is in use"); + return; + } + + if (spapr->initrd_size && + vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) { + error_setg(errp, "Memory for initramdisk is in use"); + return; + } + + spapr_vof_client_dt_finalize(spapr, fdt); + + spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, + stack_ptr, spapr->initrd_base, + spapr->initrd_size); + /* VOF is 32bit BE so enforce MSR here */ + first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE)); + + /* + * At this point the expected allocation map is: + * + * 0..c38 - the initial firmware + * 8000..10000 - stack + * 400000.. - kernel + * 3ea0000.. - initramdisk + * + * We skip writing FDT as nothing expects it; OF client interface is + * going to be used for reading the device tree. + */ +} + +void spapr_vof_quiesce(MachineState *ms) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + spapr->fdt_size = fdt_totalsize(spapr->fdt_blob); + spapr->fdt_initial_size = spapr->fdt_size; +} + +bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname, + void *val, int vallen) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + /* + * We only allow changing properties which we know how to update in QEMU + * OR + * the ones which we know that they need to survive during "quiesce". + */ + + if (strcmp(path, "/rtas") == 0) { + if (strcmp(propname, "linux,rtas-base") == 0 || + strcmp(propname, "linux,rtas-entry") == 0) { + /* These need to survive quiesce so let them store in the FDT */ + return true; + } + } + + if (strcmp(path, "/chosen") == 0) { + if (strcmp(propname, "bootargs") == 0) { + Vof *vof = spapr->vof; + + g_free(vof->bootargs); + vof->bootargs = g_strndup(val, vallen); + return true; + } + if (strcmp(propname, "linux,initrd-start") == 0) { + if (vallen == sizeof(uint32_t)) { + spapr->initrd_base = ldl_be_p(val); + return true; + } + if (vallen == sizeof(uint64_t)) { + spapr->initrd_base = ldq_be_p(val); + return true; + } + return false; + } + if (strcmp(propname, "linux,initrd-end") == 0) { + if (vallen == sizeof(uint32_t)) { + spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base; + return true; + } + if (vallen == sizeof(uint64_t)) { + spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base; + return true; + } + return false; + } + } + + return true; +} diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index b4bbfbb0134..3bf43fa340f 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # spapr_pci.c spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=0x%x)" @@ -71,9 +71,53 @@ spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx3 spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64 spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed" +# vof.c +vof_error_str_truncated(const char *s, int len) "%s truncated to %d" +vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d" +vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d" +vof_error_unknown_method(const char *method) "\"%s\"" +vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x" +vof_error_unknown_path(const char *path) "\"%s\"" +vof_error_write(uint32_t ih) "ih=0x%x" +vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x" +vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x" +vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x" +vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x" +vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]" +vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d" +vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d" +vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x" +vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x" +vof_package_to_path(uint32_t ph, const char *tmp, int ret) "ph=0x%x => %s len=%d" +vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, int ret) "ih=0x%x ph=0x%x => %s len=%d" +vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x" +vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\"" +vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 +vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 + # ppc.c ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)" +ppc_tb_load(uint64_t tb) "tb 0x%016" PRIx64 +ppc_tb_store(uint64_t tb, uint64_t offset) "tb 0x%016" PRIx64 " offset 0x%08" PRIx64 + +ppc_decr_load(uint64_t tb) "decr 0x%016" PRIx64 +ppc_decr_excp(const char *action) "%s decrementer" +ppc_decr_store(uint32_t nr_bits, uint64_t decr, uint64_t value) "%d-bit 0x%016" PRIx64 " => 0x%016" PRIx64 + +ppc4xx_fit(uint32_t ir, uint64_t tcr, uint64_t tsr) "ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 +ppc4xx_pit_stop(void) "" +ppc4xx_pit_start(uint64_t reload) "PIT 0x%016" PRIx64 +ppc4xx_pit(uint32_t ar, uint32_t ir, uint64_t tcr, uint64_t tsr, uint64_t reload) "ar %d ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 " PIT 0x%016" PRIx64 +ppc4xx_wdt(uint64_t tcr, uint64_t tsr) "TCR 0x%" PRIx64 " TSR 0x%" PRIx64 +ppc40x_store_pit(uint64_t value) "val 0x%" PRIx64 +ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32 +ppc40x_timers_init(uint32_t value) "frequency %" PRIu32 +ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d" +ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32 +ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d" +ppc_irq_reset(const char *name) "%s" +ppc_irq_cpu(const char *action) "%s" # prep_systemio.c prep_systemio_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index cb421570dab..9c575403b85 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -38,9 +38,7 @@ #include "elf.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "qemu/log.h" #include "qemu/option.h" -#include "exec/address-spaces.h" #include "hw/intc/ppc-uic.h" #include "hw/ppc/ppc.h" diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c new file mode 100644 index 00000000000..73adc44ec21 --- /dev/null +++ b/hw/ppc/vof.c @@ -0,0 +1,1062 @@ +/* + * QEMU PowerPC Virtual Open Firmware. + * + * This implements client interface from OpenFirmware IEEE1275 on the QEMU + * side to leave only a very basic firmware in the VM. + * + * Copyright (c) 2021 IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/range.h" +#include "qemu/units.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "exec/ram_addr.h" +#include "exec/address-spaces.h" +#include "hw/ppc/vof.h" +#include "hw/ppc/fdt.h" +#include "sysemu/runstate.h" +#include "qom/qom-qobject.h" +#include "trace.h" + +#include + +/* + * OF 1275 "nextprop" description suggests is it 32 bytes max but + * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long. + */ +#define OF_PROPNAME_LEN_MAX 64 + +#define VOF_MAX_PATH 256 +#define VOF_MAX_SETPROPLEN 2048 +#define VOF_MAX_METHODLEN 256 +#define VOF_MAX_FORTHCODE 256 +#define VOF_VTY_BUF_SIZE 256 + +typedef struct { + uint64_t start; + uint64_t size; +} OfClaimed; + +typedef struct { + char *path; /* the path used to open the instance */ + uint32_t phandle; +} OfInstance; + +static int readstr(hwaddr pa, char *buf, int size) +{ + if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) { + return -1; + } + if (strnlen(buf, size) == size) { + buf[size - 1] = '\0'; + trace_vof_error_str_truncated(buf, size); + return -1; + } + return 0; +} + +static bool cmpservice(const char *s, unsigned nargs, unsigned nret, + const char *s1, unsigned nargscheck, unsigned nretcheck) +{ + if (strcmp(s, s1)) { + return false; + } + if ((nargscheck && (nargs != nargscheck)) || + (nretcheck && (nret != nretcheck))) { + trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret); + return false; + } + + return true; +} + +static void prop_format(char *tval, int tlen, const void *prop, int len) +{ + int i; + const unsigned char *c; + char *t; + const char bin[] = "..."; + + for (i = 0, c = prop; i < len; ++i, ++c) { + if (*c == '\0' && i == len - 1) { + strncpy(tval, prop, tlen - 1); + return; + } + if (*c < 0x20 || *c >= 0x80) { + break; + } + } + + for (i = 0, c = prop, t = tval; i < len; ++i, ++c) { + if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) { + strcpy(t, bin); + return; + } + if (i && i % 4 == 0 && i != len - 1) { + strcat(t, " "); + ++t; + } + t += sprintf(t, "%02X", *c & 0xFF); + } +} + +static int get_path(const void *fdt, int offset, char *buf, int len) +{ + int ret; + + ret = fdt_get_path(fdt, offset, buf, len - 1); + if (ret < 0) { + return ret; + } + + buf[len - 1] = '\0'; + + return strlen(buf) + 1; +} + +static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len) +{ + int ret; + + ret = fdt_node_offset_by_phandle(fdt, ph); + if (ret < 0) { + return ret; + } + + return get_path(fdt, ret, buf, len); +} + +static int path_offset(const void *fdt, const char *path) +{ + g_autofree char *p = NULL; + char *at; + + /* + * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16 + * + * "Conversion from numeric representation to text representation shall use + * the lower case forms of the hexadecimal digits in the range a..f, + * suppressing leading zeros". + */ + p = g_strdup(path); + for (at = strchr(p, '@'); at && *at; ) { + if (*at == '/') { + at = strchr(at, '@'); + } else { + *at = tolower(*at); + ++at; + } + } + + return fdt_path_offset(fdt, p); +} + +static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr) +{ + char fullnode[VOF_MAX_PATH]; + uint32_t ret = PROM_ERROR; + int offset; + + if (readstr(nodeaddr, fullnode, sizeof(fullnode))) { + return (uint32_t) ret; + } + + offset = path_offset(fdt, fullnode); + if (offset >= 0) { + ret = fdt_get_phandle(fdt, offset); + } + trace_vof_finddevice(fullnode, ret); + return ret; +} + +static const void *getprop(const void *fdt, int nodeoff, const char *propname, + int *proplen, bool *write0) +{ + const char *unit, *prop; + const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen); + + if (ret) { + if (write0) { + *write0 = false; + } + return ret; + } + + if (strcmp(propname, "name")) { + return NULL; + } + /* + * We return a value for "name" from path if queried but property does not + * exist. @proplen does not include the unit part in this case. + */ + prop = fdt_get_name(fdt, nodeoff, proplen); + if (!prop) { + *proplen = 0; + return NULL; + } + + unit = memchr(prop, '@', *proplen); + if (unit) { + *proplen = unit - prop; + } + *proplen += 1; + + /* + * Since it might be cut at "@" and there will be no trailing zero + * in the prop buffer, tell the caller to write zero at the end. + */ + if (write0) { + *write0 = true; + } + return prop; +} + +static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname, + uint32_t valaddr, uint32_t vallen) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = 0; + int proplen = 0; + const void *prop; + char trval[64] = ""; + int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph); + bool write0; + + if (nodeoff < 0) { + return PROM_ERROR; + } + if (readstr(pname, propname, sizeof(propname))) { + return PROM_ERROR; + } + prop = getprop(fdt, nodeoff, propname, &proplen, &write0); + if (prop) { + const char zero = 0; + int cb = MIN(proplen, vallen); + + if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK || + /* if that was "name" with a unit address, overwrite '@' with '0' */ + (write0 && + cb == proplen && + VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) { + ret = PROM_ERROR; + } else { + /* + * OF1275 says: + * "Size is either the actual size of the property, or -1 if name + * does not exist", hence returning proplen instead of cb. + */ + ret = proplen; + /* Do not format a value if tracepoint is silent, for performance */ + if (trace_event_get_state(TRACE_VOF_GETPROP) && + qemu_loglevel_mask(LOG_TRACE)) { + prop_format(trval, sizeof(trval), prop, ret); + } + } + } else { + ret = PROM_ERROR; + } + trace_vof_getprop(nodeph, propname, ret, trval); + + return ret; +} + +static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = 0; + int proplen = 0; + const void *prop; + int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph); + + if (nodeoff < 0) { + return PROM_ERROR; + } + if (readstr(pname, propname, sizeof(propname))) { + return PROM_ERROR; + } + prop = getprop(fdt, nodeoff, propname, &proplen, NULL); + if (prop) { + ret = proplen; + } else { + ret = PROM_ERROR; + } + trace_vof_getproplen(nodeph, propname, ret); + + return ret; +} + +static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof, + uint32_t nodeph, uint32_t pname, + uint32_t valaddr, uint32_t vallen) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = PROM_ERROR; + int offset, rc; + char trval[64] = ""; + char nodepath[VOF_MAX_PATH] = ""; + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + VofMachineIfClass *vmc; + g_autofree char *val = NULL; + + if (vallen > VOF_MAX_SETPROPLEN) { + goto trace_exit; + } + if (readstr(pname, propname, sizeof(propname))) { + goto trace_exit; + } + offset = fdt_node_offset_by_phandle(fdt, nodeph); + if (offset < 0) { + goto trace_exit; + } + rc = get_path(fdt, offset, nodepath, sizeof(nodepath)); + if (rc <= 0) { + goto trace_exit; + } + + val = g_malloc0(vallen); + if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) { + goto trace_exit; + } + + if (!vmo) { + goto trace_exit; + } + + vmc = VOF_MACHINE_GET_CLASS(vmo); + if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) { + goto trace_exit; + } + + rc = fdt_setprop(fdt, offset, propname, val, vallen); + if (rc) { + goto trace_exit; + } + + if (trace_event_get_state(TRACE_VOF_SETPROP) && + qemu_loglevel_mask(LOG_TRACE)) { + prop_format(trval, sizeof(trval), val, vallen); + } + ret = vallen; + +trace_exit: + trace_vof_setprop(nodeph, propname, trval, vallen, ret); + + return ret; +} + +static uint32_t vof_nextprop(const void *fdt, uint32_t phandle, + uint32_t prevaddr, uint32_t nameaddr) +{ + int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle); + char prev[OF_PROPNAME_LEN_MAX + 1]; + const char *tmp; + + if (readstr(prevaddr, prev, sizeof(prev))) { + return PROM_ERROR; + } + + fdt_for_each_property_offset(offset, fdt, nodeoff) { + if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { + return 0; + } + if (prev[0] == '\0' || strcmp(prev, tmp) == 0) { + if (prev[0] != '\0') { + offset = fdt_next_property_offset(fdt, offset); + if (offset < 0) { + return 0; + } + } + if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { + return 0; + } + + if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) { + return PROM_ERROR; + } + return 1; + } + } + + return 0; +} + +static uint32_t vof_peer(const void *fdt, uint32_t phandle) +{ + uint32_t ret = 0; + int rc; + + if (phandle == 0) { + rc = fdt_path_offset(fdt, "/"); + } else { + rc = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + } + + if (rc >= 0) { + ret = fdt_get_phandle(fdt, rc); + } + + return ret; +} + +static uint32_t vof_child(const void *fdt, uint32_t phandle) +{ + uint32_t ret = 0; + int rc = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + + if (rc >= 0) { + ret = fdt_get_phandle(fdt, rc); + } + + return ret; +} + +static uint32_t vof_parent(const void *fdt, uint32_t phandle) +{ + uint32_t ret = 0; + int rc = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + + if (rc >= 0) { + ret = fdt_get_phandle(fdt, rc); + } + + return ret; +} + +static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path) +{ + uint32_t ret = PROM_ERROR; + OfInstance *inst = NULL; + + if (vof->of_instance_last == 0xFFFFFFFF) { + /* We do not recycle ihandles yet */ + goto trace_exit; + } + + inst = g_new0(OfInstance, 1); + inst->phandle = fdt_get_phandle(fdt, offset); + g_assert(inst->phandle); + ++vof->of_instance_last; + + inst->path = g_strdup(path); + g_hash_table_insert(vof->of_instances, + GINT_TO_POINTER(vof->of_instance_last), + inst); + ret = vof->of_instance_last; + +trace_exit: + trace_vof_open(path, inst ? inst->phandle : 0, ret); + + return ret; +} + +uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename, + const char *prop, const char *path) +{ + int offset, node = fdt_path_offset(fdt, nodename); + uint32_t inst; + + offset = fdt_path_offset(fdt, path); + if (offset < 0) { + trace_vof_error_unknown_path(path); + return PROM_ERROR; + } + + inst = vof_do_open(fdt, vof, offset, path); + + return fdt_setprop_cell(fdt, node, prop, inst) >= 0 ? 0 : PROM_ERROR; +} + +static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr) +{ + char path[VOF_MAX_PATH]; + int offset; + + if (readstr(pathaddr, path, sizeof(path))) { + return PROM_ERROR; + } + + offset = path_offset(fdt, path); + if (offset < 0) { + trace_vof_error_unknown_path(path); + return PROM_ERROR; + } + + return vof_do_open(fdt, vof, offset, path); +} + +static void vof_close(Vof *vof, uint32_t ihandle) +{ + if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) { + trace_vof_error_unknown_ihandle_close(ihandle); + } +} + +static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle) +{ + gpointer instp = g_hash_table_lookup(vof->of_instances, + GINT_TO_POINTER(ihandle)); + uint32_t ret = PROM_ERROR; + + if (instp) { + ret = ((OfInstance *)instp)->phandle; + } + trace_vof_instance_to_package(ihandle, ret); + + return ret; +} + +static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle, + uint32_t buf, uint32_t len) +{ + int rc; + char tmp[VOF_MAX_PATH] = ""; + + rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp)); + if (rc > 0) { + if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) { + rc = -1; + } + } + + trace_vof_package_to_path(phandle, tmp, rc); + + return rc > 0 ? (uint32_t)rc : PROM_ERROR; +} + +static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle, + uint32_t buf, uint32_t len) +{ + int rc = -1; + uint32_t phandle = vof_instance_to_package(vof, ihandle); + char tmp[VOF_MAX_PATH] = ""; + + if (phandle != -1) { + rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp)); + if (rc > 0) { + if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) { + rc = -1; + } + } + } + trace_vof_instance_to_path(ihandle, phandle, tmp, rc); + + return rc > 0 ? (uint32_t)rc : PROM_ERROR; +} + +static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf, + uint32_t len) +{ + char tmp[VOF_VTY_BUF_SIZE]; + unsigned cb; + OfInstance *inst = (OfInstance *) + g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle)); + + if (!inst) { + trace_vof_error_write(ihandle); + return PROM_ERROR; + } + + for ( ; len > 0; len -= cb) { + cb = MIN(len, sizeof(tmp) - 1); + if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) { + return PROM_ERROR; + } + + /* FIXME: there is no backend(s) yet so just call a trace */ + if (trace_event_get_state(TRACE_VOF_WRITE) && + qemu_loglevel_mask(LOG_TRACE)) { + tmp[cb] = '\0'; + trace_vof_write(ihandle, cb, tmp); + } + } + + return len; +} + +static void vof_claimed_dump(GArray *claimed) +{ + int i; + OfClaimed c; + + if (trace_event_get_state(TRACE_VOF_CLAIMED) && + qemu_loglevel_mask(LOG_TRACE)) { + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + trace_vof_claimed(c.start, c.start + c.size, c.size); + } + } +} + +static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size) +{ + int i; + OfClaimed c; + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + if (ranges_overlap(c.start, c.size, virt, size)) { + return false; + } + } + + return true; +} + +static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size) +{ + OfClaimed newclaim; + + newclaim.start = virt; + newclaim.size = size; + g_array_append_val(claimed, newclaim); +} + +static gint of_claimed_compare_func(gconstpointer a, gconstpointer b) +{ + return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start; +} + +static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base) +{ + int i, n, offset, proplen = 0, sc, ac; + target_ulong mem0_end; + const uint8_t *mem0_reg; + g_autofree uint8_t *avail = NULL; + uint8_t *availcur; + + if (!fdt || !claimed) { + return; + } + + offset = fdt_path_offset(fdt, "/"); + _FDT(offset); + ac = fdt_address_cells(fdt, offset); + g_assert(ac == 1 || ac == 2); + sc = fdt_size_cells(fdt, offset); + g_assert(sc == 1 || sc == 2); + + offset = fdt_path_offset(fdt, "/memory@0"); + _FDT(offset); + + mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen); + g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc)); + if (sc == 2) { + mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac)); + } else { + mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac)); + } + + g_array_sort(claimed, of_claimed_compare_func); + vof_claimed_dump(claimed); + + /* + * VOF resides in the first page so we do not need to check if there is + * available memory before the first claimed block + */ + g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0)); + + avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len); + for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) { + OfClaimed c = g_array_index(claimed, OfClaimed, i); + uint64_t start, size; + + start = c.start + c.size; + if (i < claimed->len - 1) { + OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1); + + size = cn.start - start; + } else { + size = mem0_end - start; + } + + if (ac == 2) { + *(uint64_t *) availcur = cpu_to_be64(start); + } else { + *(uint32_t *) availcur = cpu_to_be32(start); + } + availcur += sizeof(uint32_t) * ac; + if (sc == 2) { + *(uint64_t *) availcur = cpu_to_be64(size); + } else { + *(uint32_t *) availcur = cpu_to_be32(size); + } + availcur += sizeof(uint32_t) * sc; + + if (size) { + trace_vof_avail(c.start + c.size, c.start + c.size + size, size); + ++n; + } + } + _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail))); +} + +/* + * OF1275: + * "Allocates size bytes of memory. If align is zero, the allocated range + * begins at the virtual address virt. Otherwise, an aligned address is + * automatically chosen and the input argument virt is ignored". + * + * In other words, exactly one of @virt and @align is non-zero. + */ +uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, + uint64_t align) +{ + uint64_t ret; + + if (size == 0) { + ret = -1; + } else if (align == 0) { + if (!vof_claim_avail(vof->claimed, virt, size)) { + ret = -1; + } else { + ret = virt; + } + } else { + vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align); + while (1) { + if (vof->claimed_base >= vof->top_addr) { + error_report("Out of RMA memory for the OF client"); + return -1; + } + if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) { + break; + } + vof->claimed_base += size; + } + ret = vof->claimed_base; + } + + if (ret != -1) { + vof->claimed_base = MAX(vof->claimed_base, ret + size); + vof_claim_add(vof->claimed, ret, size); + } + trace_vof_claim(virt, size, align, ret); + + return ret; +} + +static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size) +{ + uint32_t ret = PROM_ERROR; + int i; + GArray *claimed = vof->claimed; + OfClaimed c; + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + if (c.start == virt && c.size == size) { + g_array_remove_index(claimed, i); + ret = 0; + break; + } + } + + trace_vof_release(virt, size, ret); + + return ret; +} + +static void vof_instantiate_rtas(Error **errp) +{ + error_setg(errp, "The firmware should have instantiated RTAS"); +} + +static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr, + uint32_t ihandle, uint32_t param1, + uint32_t param2, uint32_t param3, + uint32_t param4, uint32_t *ret2) +{ + uint32_t ret = PROM_ERROR; + char method[VOF_MAX_METHODLEN] = ""; + OfInstance *inst; + + if (!ihandle) { + goto trace_exit; + } + + inst = (OfInstance *)g_hash_table_lookup(vof->of_instances, + GINT_TO_POINTER(ihandle)); + if (!inst) { + goto trace_exit; + } + + if (readstr(methodaddr, method, sizeof(method))) { + goto trace_exit; + } + + if (strcmp(inst->path, "/") == 0) { + if (strcmp(method, "ibm,client-architecture-support") == 0) { + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + + if (vmo) { + VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + + g_assert(vmc->client_architecture_support); + ret = (uint32_t)vmc->client_architecture_support(ms, first_cpu, + param1); + } + + *ret2 = 0; + } + } else if (strcmp(inst->path, "/rtas") == 0) { + if (strcmp(method, "instantiate-rtas") == 0) { + vof_instantiate_rtas(&error_fatal); + ret = 0; + *ret2 = param1; /* rtas-base */ + } + } else { + trace_vof_error_unknown_method(method); + } + +trace_exit: + trace_vof_method(ihandle, method, param1, ret, *ret2); + + return ret; +} + +static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1, + uint32_t param2, uint32_t *ret2) +{ + uint32_t ret = PROM_ERROR; + char cmd[VOF_MAX_FORTHCODE] = ""; + + /* No interpret implemented so just call a trace */ + readstr(cmdaddr, cmd, sizeof(cmd)); + trace_vof_interpret(cmd, param1, param2, ret, *ret2); + + return ret; +} + +static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof) +{ + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */ + int rc = fdt_pack(fdt); + + assert(rc == 0); + + if (vmo) { + VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + + if (vmc->quiesce) { + vmc->quiesce(ms); + } + } + + vof_claimed_dump(vof->claimed); +} + +static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof, + const char *service, + uint32_t *args, unsigned nargs, + uint32_t *rets, unsigned nrets) +{ + uint32_t ret = 0; + + /* @nrets includes the value which this function returns */ +#define cmpserv(s, a, r) \ + cmpservice(service, nargs, nrets, (s), (a), (r)) + + if (cmpserv("finddevice", 1, 1)) { + ret = vof_finddevice(fdt, args[0]); + } else if (cmpserv("getprop", 4, 1)) { + ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]); + } else if (cmpserv("getproplen", 2, 1)) { + ret = vof_getproplen(fdt, args[0], args[1]); + } else if (cmpserv("setprop", 4, 1)) { + ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]); + } else if (cmpserv("nextprop", 3, 1)) { + ret = vof_nextprop(fdt, args[0], args[1], args[2]); + } else if (cmpserv("peer", 1, 1)) { + ret = vof_peer(fdt, args[0]); + } else if (cmpserv("child", 1, 1)) { + ret = vof_child(fdt, args[0]); + } else if (cmpserv("parent", 1, 1)) { + ret = vof_parent(fdt, args[0]); + } else if (cmpserv("open", 1, 1)) { + ret = vof_open(fdt, vof, args[0]); + } else if (cmpserv("close", 1, 0)) { + vof_close(vof, args[0]); + } else if (cmpserv("instance-to-package", 1, 1)) { + ret = vof_instance_to_package(vof, args[0]); + } else if (cmpserv("package-to-path", 3, 1)) { + ret = vof_package_to_path(fdt, args[0], args[1], args[2]); + } else if (cmpserv("instance-to-path", 3, 1)) { + ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]); + } else if (cmpserv("write", 3, 1)) { + ret = vof_write(vof, args[0], args[1], args[2]); + } else if (cmpserv("claim", 3, 1)) { + uint64_t ret64 = vof_claim(vof, args[0], args[1], args[2]); + + if (ret64 < 0x100000000UL) { + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); + ret = (uint32_t)ret64; + } else { + if (ret64 != -1) { + vof_release(vof, ret, args[1]); + } + ret = PROM_ERROR; + } + } else if (cmpserv("release", 2, 0)) { + ret = vof_release(vof, args[0], args[1]); + if (ret != PROM_ERROR) { + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); + } + } else if (cmpserv("call-method", 0, 0)) { + ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3], + args[4], args[5], rets); + } else if (cmpserv("interpret", 0, 0)) { + ret = vof_call_interpret(args[0], args[1], args[2], rets); + } else if (cmpserv("milliseconds", 0, 1)) { + ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + } else if (cmpserv("quiesce", 0, 0)) { + vof_quiesce(ms, fdt, vof); + } else if (cmpserv("exit", 0, 0)) { + error_report("Stopped as the VM requested \"exit\""); + vm_stop(RUN_STATE_PAUSED); + } else { + trace_vof_error_unknown_service(service, nargs, nrets); + ret = -1; + } + +#undef cmpserv + + return ret; +} + +/* Defined as Big Endian */ +struct prom_args { + uint32_t service; + uint32_t nargs; + uint32_t nret; + uint32_t args[10]; +} QEMU_PACKED; + +int vof_client_call(MachineState *ms, Vof *vof, void *fdt, + target_ulong args_real) +{ + struct prom_args args_be; + uint32_t args[ARRAY_SIZE(args_be.args)]; + uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret; + char service[64]; + unsigned nargs, nret, i; + + if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) { + return -EINVAL; + } + nargs = be32_to_cpu(args_be.nargs); + if (nargs >= ARRAY_SIZE(args_be.args)) { + return -EINVAL; + } + + if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) != + MEMTX_OK) { + return -EINVAL; + } + if (strnlen(service, sizeof(service)) == sizeof(service)) { + /* Too long service name */ + return -EINVAL; + } + + for (i = 0; i < nargs; ++i) { + args[i] = be32_to_cpu(args_be.args[i]); + } + + nret = be32_to_cpu(args_be.nret); + if (nret > ARRAY_SIZE(args_be.args) - nargs) { + return -EINVAL; + } + ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret); + if (!nret) { + return 0; + } + + /* @nrets includes the value which this function returns */ + args_be.args[nargs] = cpu_to_be32(ret); + for (i = 1; i < nret; ++i) { + args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]); + } + + if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]), + args_be.args + nargs, sizeof(args_be.args[0]) * nret) != + MEMTX_OK) { + return -EINVAL; + } + + return 0; +} + +static void vof_instance_free(gpointer data) +{ + OfInstance *inst = (OfInstance *)data; + + g_free(inst->path); + g_free(inst); +} + +void vof_init(Vof *vof, uint64_t top_addr, Error **errp) +{ + vof_cleanup(vof); + + vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, vof_instance_free); + vof->claimed = g_array_new(false, false, sizeof(OfClaimed)); + + /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */ + vof->top_addr = MIN(top_addr, 4 * GiB); + if (vof_claim(vof, 0, vof->fw_size, 0) == -1) { + error_setg(errp, "Memory for firmware is in use"); + } +} + +void vof_cleanup(Vof *vof) +{ + if (vof->claimed) { + g_array_unref(vof->claimed); + } + if (vof->of_instances) { + g_hash_table_unref(vof->of_instances); + } + vof->claimed = NULL; + vof->of_instances = NULL; +} + +void vof_build_dt(void *fdt, Vof *vof) +{ + uint32_t phandle = fdt_get_max_phandle(fdt); + int offset, proplen = 0; + const void *prop; + + /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */ + for (offset = fdt_next_node(fdt, -1, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + prop = fdt_getprop(fdt, offset, "phandle", &proplen); + if (prop) { + continue; + } + ++phandle; + _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle)); + } + + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); +} + +static const TypeInfo vof_machine_if_info = { + .name = TYPE_VOF_MACHINE_IF, + .parent = TYPE_INTERFACE, + .class_size = sizeof(VofMachineIfClass), +}; + +static void vof_machine_if_register_types(void) +{ + type_register_static(&vof_machine_if_info); +} +type_init(vof_machine_if_register_types) diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 49141d4074b..cfd85de3e66 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -27,58 +27,58 @@ #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } -void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res) -{ - monitor_printf(mon, "\ttx : %" PRId64 "\n", - dev_res->stats.tx); - monitor_printf(mon, "\ttx_len : %" PRId64 "\n", - dev_res->stats.tx_len); - monitor_printf(mon, "\ttx_err : %" PRId64 "\n", - dev_res->stats.tx_err); - monitor_printf(mon, "\trx_bufs : %" PRId64 "\n", - dev_res->stats.rx_bufs); - monitor_printf(mon, "\trx_srq : %" PRId64 "\n", - dev_res->stats.rx_srq); - monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n", - dev_res->stats.rx_bufs_len); - monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n", - dev_res->stats.rx_bufs_err); - monitor_printf(mon, "\tcomps : %" PRId64 "\n", - dev_res->stats.completions); - monitor_printf(mon, "\tmissing_comps : %" PRId32 "\n", - dev_res->stats.missing_cqe); - monitor_printf(mon, "\tpoll_cq (bk) : %" PRId64 "\n", - dev_res->stats.poll_cq_from_bk); - monitor_printf(mon, "\tpoll_cq_ppoll_to : %" PRId64 "\n", - dev_res->stats.poll_cq_ppoll_to); - monitor_printf(mon, "\tpoll_cq (fe) : %" PRId64 "\n", - dev_res->stats.poll_cq_from_guest); - monitor_printf(mon, "\tpoll_cq_empty : %" PRId64 "\n", - dev_res->stats.poll_cq_from_guest_empty); - monitor_printf(mon, "\tmad_tx : %" PRId64 "\n", - dev_res->stats.mad_tx); - monitor_printf(mon, "\tmad_tx_err : %" PRId64 "\n", - dev_res->stats.mad_tx_err); - monitor_printf(mon, "\tmad_rx : %" PRId64 "\n", - dev_res->stats.mad_rx); - monitor_printf(mon, "\tmad_rx_err : %" PRId64 "\n", - dev_res->stats.mad_rx_err); - monitor_printf(mon, "\tmad_rx_bufs : %" PRId64 "\n", - dev_res->stats.mad_rx_bufs); - monitor_printf(mon, "\tmad_rx_bufs_err : %" PRId64 "\n", - dev_res->stats.mad_rx_bufs_err); - monitor_printf(mon, "\tPDs : %" PRId32 "\n", - dev_res->pd_tbl.used); - monitor_printf(mon, "\tMRs : %" PRId32 "\n", - dev_res->mr_tbl.used); - monitor_printf(mon, "\tUCs : %" PRId32 "\n", - dev_res->uc_tbl.used); - monitor_printf(mon, "\tQPs : %" PRId32 "\n", - dev_res->qp_tbl.used); - monitor_printf(mon, "\tCQs : %" PRId32 "\n", - dev_res->cq_tbl.used); - monitor_printf(mon, "\tCEQ_CTXs : %" PRId32 "\n", - dev_res->cqe_ctx_tbl.used); +void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf) +{ + g_string_append_printf(buf, "\ttx : %" PRId64 "\n", + dev_res->stats.tx); + g_string_append_printf(buf, "\ttx_len : %" PRId64 "\n", + dev_res->stats.tx_len); + g_string_append_printf(buf, "\ttx_err : %" PRId64 "\n", + dev_res->stats.tx_err); + g_string_append_printf(buf, "\trx_bufs : %" PRId64 "\n", + dev_res->stats.rx_bufs); + g_string_append_printf(buf, "\trx_srq : %" PRId64 "\n", + dev_res->stats.rx_srq); + g_string_append_printf(buf, "\trx_bufs_len : %" PRId64 "\n", + dev_res->stats.rx_bufs_len); + g_string_append_printf(buf, "\trx_bufs_err : %" PRId64 "\n", + dev_res->stats.rx_bufs_err); + g_string_append_printf(buf, "\tcomps : %" PRId64 "\n", + dev_res->stats.completions); + g_string_append_printf(buf, "\tmissing_comps : %" PRId32 "\n", + dev_res->stats.missing_cqe); + g_string_append_printf(buf, "\tpoll_cq (bk) : %" PRId64 "\n", + dev_res->stats.poll_cq_from_bk); + g_string_append_printf(buf, "\tpoll_cq_ppoll_to : %" PRId64 "\n", + dev_res->stats.poll_cq_ppoll_to); + g_string_append_printf(buf, "\tpoll_cq (fe) : %" PRId64 "\n", + dev_res->stats.poll_cq_from_guest); + g_string_append_printf(buf, "\tpoll_cq_empty : %" PRId64 "\n", + dev_res->stats.poll_cq_from_guest_empty); + g_string_append_printf(buf, "\tmad_tx : %" PRId64 "\n", + dev_res->stats.mad_tx); + g_string_append_printf(buf, "\tmad_tx_err : %" PRId64 "\n", + dev_res->stats.mad_tx_err); + g_string_append_printf(buf, "\tmad_rx : %" PRId64 "\n", + dev_res->stats.mad_rx); + g_string_append_printf(buf, "\tmad_rx_err : %" PRId64 "\n", + dev_res->stats.mad_rx_err); + g_string_append_printf(buf, "\tmad_rx_bufs : %" PRId64 "\n", + dev_res->stats.mad_rx_bufs); + g_string_append_printf(buf, "\tmad_rx_bufs_err : %" PRId64 "\n", + dev_res->stats.mad_rx_bufs_err); + g_string_append_printf(buf, "\tPDs : %" PRId32 "\n", + dev_res->pd_tbl.used); + g_string_append_printf(buf, "\tMRs : %" PRId32 "\n", + dev_res->mr_tbl.used); + g_string_append_printf(buf, "\tUCs : %" PRId32 "\n", + dev_res->uc_tbl.used); + g_string_append_printf(buf, "\tQPs : %" PRId32 "\n", + dev_res->qp_tbl.used); + g_string_append_printf(buf, "\tCQs : %" PRId32 "\n", + dev_res->cq_tbl.used); + g_string_append_printf(buf, "\tCEQ_CTXs : %" PRId32 "\n", + dev_res->cqe_ctx_tbl.used); } static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl, diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index e8639909cd3..d69a917795d 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -92,6 +92,6 @@ static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res, { return &dev_res->port.gid_tbl[sgid_idx].gid; } -void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res); +void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf); #endif diff --git a/hw/rdma/trace-events b/hw/rdma/trace-events index 2022a820cbe..9accb149734 100644 --- a/hw/rdma/trace-events +++ b/hw/rdma/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # rdma_backend.c rdma_check_dev_attr(const char *name, int max_bk, int max_fe) "%s: be=%d, fe=%d" diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index f59879e2574..da7ddfa548f 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -38,6 +38,13 @@ static void *pvrdma_map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma, return NULL; } + length = ROUND_UP(length, TARGET_PAGE_SIZE); + if (nchunks * TARGET_PAGE_SIZE != length) { + rdma_error_report("Invalid nchunks/length (%u, %lu)", nchunks, + (unsigned long)length); + return NULL; + } + dir = rdma_pci_dma_map(pdev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { rdma_error_report("Failed to map to page directory"); diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c b/hw/rdma/vmw/pvrdma_dev_ring.c index 074ac59b84d..42130667a7d 100644 --- a/hw/rdma/vmw/pvrdma_dev_ring.c +++ b/hw/rdma/vmw/pvrdma_dev_ring.c @@ -41,7 +41,7 @@ int pvrdma_ring_init(PvrdmaRing *ring, const char *name, PCIDevice *dev, qatomic_set(&ring->ring_state->cons_head, 0); */ ring->npages = npages; - ring->pages = g_malloc(npages * sizeof(void *)); + ring->pages = g_malloc0(npages * sizeof(void *)); for (i = 0; i < npages; i++) { if (!tbl[i]) { diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 84ae8024fcf..91206dbb8eb 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -58,24 +58,25 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_END_OF_LIST(), }; -static void pvrdma_print_statistics(Monitor *mon, RdmaProvider *obj) +static void pvrdma_format_statistics(RdmaProvider *obj, GString *buf) { PVRDMADev *dev = PVRDMA_DEV(obj); PCIDevice *pdev = PCI_DEVICE(dev); - monitor_printf(mon, "%s, %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); - monitor_printf(mon, "\tcommands : %" PRId64 "\n", - dev->stats.commands); - monitor_printf(mon, "\tregs_reads : %" PRId64 "\n", - dev->stats.regs_reads); - monitor_printf(mon, "\tregs_writes : %" PRId64 "\n", - dev->stats.regs_writes); - monitor_printf(mon, "\tuar_writes : %" PRId64 "\n", - dev->stats.uar_writes); - monitor_printf(mon, "\tinterrupts : %" PRId64 "\n", - dev->stats.interrupts); - rdma_dump_device_counters(mon, &dev->rdma_dev_res); + g_string_append_printf(buf, "%s, %x.%x\n", + pdev->name, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + g_string_append_printf(buf, "\tcommands : %" PRId64 "\n", + dev->stats.commands); + g_string_append_printf(buf, "\tregs_reads : %" PRId64 "\n", + dev->stats.regs_reads); + g_string_append_printf(buf, "\tregs_writes : %" PRId64 "\n", + dev->stats.regs_writes); + g_string_append_printf(buf, "\tuar_writes : %" PRId64 "\n", + dev->stats.uar_writes); + g_string_append_printf(buf, "\tinterrupts : %" PRId64 "\n", + dev->stats.interrupts); + rdma_format_device_counters(&dev->rdma_dev_res, buf); } static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring, @@ -92,6 +93,11 @@ static int init_dev_ring(PvrdmaRing *ring, PvrdmaRingState **ring_state, uint64_t *dir, *tbl; int rc = 0; + if (!num_pages) { + rdma_error_report("Ring pages count must be strictly positive"); + return -EINVAL; + } + dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE); if (!dir) { rdma_error_report("Failed to map to page directory (ring %s)", name); @@ -694,7 +700,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, pvrdma_dev_properties); set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); - ir->print_statistics = pvrdma_print_statistics; + ir->format_statistics = pvrdma_format_statistics; } static const TypeInfo pvrdma_info = { diff --git a/hw/rdma/vmw/trace-events b/hw/rdma/vmw/trace-events index 323fca8456d..a6c77e1e10a 100644 --- a/hw/rdma/vmw/trace-events +++ b/hw/rdma/vmw/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # pvrdma_main.c pvrdma_regs_read(uint64_t addr, uint64_t val) "pvrdma.regs[0x%"PRIx64"]=0x%"PRIx64 diff --git a/hw/remote/iohub.c b/hw/remote/iohub.c index e4ff131a6b8..547d597f0fe 100644 --- a/hw/remote/iohub.c +++ b/hw/remote/iohub.c @@ -15,7 +15,6 @@ #include "hw/pci/pci_ids.h" #include "hw/pci/pci_bus.h" #include "qemu/thread.h" -#include "hw/boards.h" #include "hw/remote/machine.h" #include "hw/remote/iohub.h" #include "qemu/main-loop.h" diff --git a/hw/remote/machine.c b/hw/remote/machine.c index c0ab4f528aa..952105eab5a 100644 --- a/hw/remote/machine.c +++ b/hw/remote/machine.c @@ -17,7 +17,6 @@ #include "qemu-common.h" #include "hw/remote/machine.h" -#include "exec/address-spaces.h" #include "exec/memory.h" #include "qapi/error.h" #include "hw/pci/pci_host.h" diff --git a/hw/remote/memory.c b/hw/remote/memory.c index 32085b1e05e..6e21ab1a45c 100644 --- a/hw/remote/memory.c +++ b/hw/remote/memory.c @@ -12,7 +12,6 @@ #include "qemu-common.h" #include "hw/remote/memory.h" -#include "exec/address-spaces.h" #include "exec/ram_addr.h" #include "qapi/error.h" @@ -42,13 +41,12 @@ void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp) remote_sysmem_reset(); - for (region = 0; region < msg->num_fds; region++) { - g_autofree char *name; + for (region = 0; region < msg->num_fds; region++, suffix++) { + g_autofree char *name = g_strdup_printf("remote-mem-%u", suffix); subregion = g_new(MemoryRegion, 1); - name = g_strdup_printf("remote-mem-%u", suffix++); memory_region_init_ram_from_fd(subregion, NULL, name, sysmem_info->sizes[region], - true, msg->fds[region], + RAM_SHARED, msg->fds[region], sysmem_info->offsets[region], errp); diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c index 9ce31526e8f..7e841820e52 100644 --- a/hw/remote/mpqemu-link.c +++ b/hw/remote/mpqemu-link.c @@ -34,7 +34,6 @@ */ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) { - ERRP_GUARD(); bool iolock = qemu_mutex_iothread_locked(); bool iothread = qemu_in_iothread(); struct iovec send[2] = {}; @@ -97,7 +96,6 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, size_t *nfds, Error **errp) { - ERRP_GUARD(); struct iovec iov = { .iov_base = buf, .iov_len = len }; bool iolock = qemu_mutex_iothread_locked(); bool iothread = qemu_in_iothread(); @@ -192,7 +190,6 @@ bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev, Error **errp) { - ERRP_GUARD(); MPQemuMsg msg_reply = {0}; uint64_t ret = UINT64_MAX; @@ -218,7 +215,7 @@ uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev, bool mpqemu_msg_valid(MPQemuMsg *msg) { - if (msg->cmd >= MPQEMU_CMD_MAX && msg->cmd < 0) { + if (msg->cmd >= MPQEMU_CMD_MAX || msg->cmd < 0) { return false; } diff --git a/hw/remote/proxy-memory-listener.c b/hw/remote/proxy-memory-listener.c index af1fa6f5aaa..882c9b4854d 100644 --- a/hw/remote/proxy-memory-listener.c +++ b/hw/remote/proxy-memory-listener.c @@ -14,9 +14,7 @@ #include "qemu/range.h" #include "exec/memory.h" #include "exec/cpu-common.h" -#include "cpu.h" #include "exec/ram_addr.h" -#include "exec/address-spaces.h" #include "qapi/error.h" #include "hw/remote/mpqemu-link.h" #include "hw/remote/proxy-memory-listener.h" @@ -221,6 +219,7 @@ void proxy_memory_listener_configure(ProxyMemoryListener *proxy_listener, proxy_listener->listener.region_add = proxy_memory_listener_region_addnop; proxy_listener->listener.region_nop = proxy_memory_listener_region_addnop; proxy_listener->listener.priority = 10; + proxy_listener->listener.name = "proxy"; memory_listener_register(&proxy_listener->listener, &address_space_memory); diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c index 4fa4be079d7..bad164299dd 100644 --- a/hw/remote/proxy.c +++ b/hw/remote/proxy.c @@ -102,10 +102,18 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) } dev->ioc = qio_channel_new_fd(fd, errp); + if (!dev->ioc) { + close(fd); + return; + } error_setg(&dev->migration_blocker, "%s does not support migration", TYPE_PCI_PROXY_DEV); - migrate_add_blocker(dev->migration_blocker, errp); + if (migrate_add_blocker(dev->migration_blocker, errp) < 0) { + error_free(dev->migration_blocker); + object_unref(dev->ioc); + return; + } qemu_mutex_init(&dev->io_mutex); qio_channel_set_blocking(dev->ioc, true, NULL); @@ -316,6 +324,7 @@ static void probe_pci_info(PCIDevice *dev, Error **errp) set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); break; case PCI_BASE_CLASS_NETWORK: + case PCI_BASE_CLASS_WIRELESS: set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); break; case PCI_BASE_CLASS_INPUT: @@ -347,13 +356,12 @@ static void probe_pci_info(PCIDevice *dev, Error **errp) PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY; if (size) { - g_autofree char *name; + g_autofree char *name = g_strdup_printf("bar-region-%d", i); pdev->region[i].dev = pdev; pdev->region[i].present = true; if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) { pdev->region[i].memory = true; } - name = g_strdup_printf("bar-region-%d", i); memory_region_init_io(&pdev->region[i].mr, OBJECT(pdev), &proxy_mr_ops, &pdev->region[i], name, size); diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index 1de18cdcf1f..d2d869aaade 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -1,3 +1,6 @@ +config RISCV_NUMA + bool + config IBEX bool @@ -9,7 +12,7 @@ config MICROCHIP_PFSOC select MCHP_PFSOC_MMUART select MCHP_PFSOC_SYSREG select MSI_NONBROKEN - select SIFIVE_CLINT + select RISCV_ACLINT select SIFIVE_PDMA select SIFIVE_PLIC select UNIMP @@ -19,17 +22,26 @@ config OPENTITAN select IBEX select UNIMP +config SHAKTI_C + bool + select UNIMP + select SHAKTI_UART + select RISCV_ACLINT + select SIFIVE_PLIC + config RISCV_VIRT bool imply PCI_DEVICES + imply VIRTIO_VGA imply TEST_DEVICES + select RISCV_NUMA select GOLDFISH_RTC select MSI_NONBROKEN select PCI select PCI_EXPRESS_GENERIC_BRIDGE select PFLASH_CFI01 select SERIAL - select SIFIVE_CLINT + select RISCV_ACLINT select SIFIVE_PLIC select SIFIVE_TEST select VIRTIO_MMIO @@ -38,7 +50,7 @@ config RISCV_VIRT config SIFIVE_E bool select MSI_NONBROKEN - select SIFIVE_CLINT + select RISCV_ACLINT select SIFIVE_GPIO select SIFIVE_PLIC select SIFIVE_UART @@ -49,7 +61,7 @@ config SIFIVE_U bool select CADENCE select MSI_NONBROKEN - select SIFIVE_CLINT + select RISCV_ACLINT select SIFIVE_GPIO select SIFIVE_PDMA select SIFIVE_PLIC @@ -57,13 +69,15 @@ config SIFIVE_U select SIFIVE_UART select SIFIVE_U_OTP select SIFIVE_U_PRCI + select SIFIVE_PWM select SSI_M25P80 select SSI_SD select UNIMP config SPIKE bool + select RISCV_NUMA select HTIF select MSI_NONBROKEN - select SIFIVE_CLINT + select RISCV_ACLINT select SIFIVE_PLIC diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index 0d38bb7426b..519fa455a15 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -35,7 +35,32 @@ bool riscv_is_32bit(RISCVHartArrayState *harts) { - return riscv_cpu_is_32bit(&harts->harts[0].env); + return harts->harts[0].env.misa_mxl_max == MXL_RV32; +} + +/* + * Return the per-socket PLIC hart topology configuration string + * (caller must free with g_free()) + */ +char *riscv_plic_hart_config_string(int hart_count) +{ + g_autofree const char **vals = g_new(const char *, hart_count + 1); + int i; + + for (i = 0; i < hart_count; i++) { + CPUState *cs = qemu_get_cpu(i); + CPURISCVState *env = &RISCV_CPU(cs)->env; + + if (riscv_has_ext(env, RVS)) { + vals[i] = "MS"; + } else { + vals[i] = "M"; + } + } + vals[i] = NULL; + + /* g_strjoinv() obliges us to cast away const here */ + return g_strjoinv(",", (char **)vals); } target_ulong riscv_calc_kernel_start_addr(RISCVHartArrayState *harts, @@ -182,7 +207,7 @@ uint32_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt) { uint32_t temp, fdt_addr; hwaddr dram_end = dram_base + mem_size; - int fdtsize = fdt_totalsize(fdt); + int ret, fdtsize = fdt_totalsize(fdt); if (fdtsize <= 0) { error_report("invalid device-tree"); @@ -198,7 +223,9 @@ uint32_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt) temp = MIN(dram_end, 3072 * MiB); fdt_addr = QEMU_ALIGN_DOWN(temp - fdtsize, 16 * MiB); - fdt_pack(fdt); + ret = fdt_pack(fdt); + /* Should only fail if we've built a corrupted tree */ + g_assert(ret == 0); /* copy in the device tree */ qemu_fdt_dumpdtb(fdt, fdtsize); diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index 275c0f7eb7c..ab6cae57eae 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -1,9 +1,10 @@ riscv_ss = ss.source_set() riscv_ss.add(files('boot.c'), fdt) -riscv_ss.add(files('numa.c')) +riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c')) riscv_ss.add(files('riscv_hart.c')) riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c')) riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c')) +riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c')) riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c')) riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c')) riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c index 4704337fb00..57d779fb555 100644 --- a/hw/riscv/microchip_pfsoc.c +++ b/hw/riscv/microchip_pfsoc.c @@ -36,12 +36,10 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include "qemu/log.h" #include "qemu/units.h" #include "qemu/cutils.h" #include "qapi/error.h" #include "hw/boards.h" -#include "hw/irq.h" #include "hw/loader.h" #include "hw/sysbus.h" #include "chardev/char.h" @@ -51,8 +49,9 @@ #include "hw/riscv/boot.h" #include "hw/riscv/riscv_hart.h" #include "hw/riscv/microchip_pfsoc.h" -#include "hw/intc/sifive_clint.h" +#include "hw/intc/riscv_aclint.h" #include "hw/intc/sifive_plic.h" +#include "sysemu/device_tree.h" #include "sysemu/sysemu.h" /* @@ -188,7 +187,6 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) MemoryRegion *envm_data = g_new(MemoryRegion, 1); MemoryRegion *qspi_xip_mem = g_new(MemoryRegion, 1); char *plic_hart_config; - size_t plic_hart_config_len; NICInfo *nd; int i; @@ -235,9 +233,12 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) memmap[MICROCHIP_PFSOC_BUSERR_UNIT4].size); /* CLINT */ - sifive_clint_create(memmap[MICROCHIP_PFSOC_CLINT].base, - memmap[MICROCHIP_PFSOC_CLINT].size, 0, ms->smp.cpus, - SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, + riscv_aclint_swi_create(memmap[MICROCHIP_PFSOC_CLINT].base, + 0, ms->smp.cpus, false); + riscv_aclint_mtimer_create( + memmap[MICROCHIP_PFSOC_CLINT].base + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, CLINT_TIMEBASE_FREQ, false); /* L2 cache controller */ @@ -260,22 +261,11 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) l2lim_mem); /* create PLIC hart topology configuration string */ - plic_hart_config_len = (strlen(MICROCHIP_PFSOC_PLIC_HART_CONFIG) + 1) * - ms->smp.cpus; - plic_hart_config = g_malloc0(plic_hart_config_len); - for (i = 0; i < ms->smp.cpus; i++) { - if (i != 0) { - strncat(plic_hart_config, "," MICROCHIP_PFSOC_PLIC_HART_CONFIG, - plic_hart_config_len); - } else { - strncat(plic_hart_config, "M", plic_hart_config_len); - } - plic_hart_config_len -= (strlen(MICROCHIP_PFSOC_PLIC_HART_CONFIG) + 1); - } + plic_hart_config = riscv_plic_hart_config_string(ms->smp.cpus); /* PLIC */ s->plic = sifive_plic_create(memmap[MICROCHIP_PFSOC_PLIC].base, - plic_hart_config, 0, + plic_hart_config, ms->smp.cpus, 0, MICROCHIP_PFSOC_PLIC_NUM_SOURCES, MICROCHIP_PFSOC_PLIC_NUM_PRIORITIES, MICROCHIP_PFSOC_PLIC_PRIORITY_BASE, @@ -461,7 +451,13 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) MemoryRegion *mem_low_alias = g_new(MemoryRegion, 1); MemoryRegion *mem_high = g_new(MemoryRegion, 1); MemoryRegion *mem_high_alias = g_new(MemoryRegion, 1); - uint64_t mem_high_size; + uint64_t mem_low_size, mem_high_size; + hwaddr firmware_load_addr; + const char *firmware_name; + bool kernel_as_payload = false; + target_ulong firmware_end_addr, kernel_start_addr; + uint64_t kernel_entry; + uint32_t fdt_load_addr; DriveInfo *dinfo = drive_get_next(IF_SD); /* Sanity check on RAM size */ @@ -477,38 +473,38 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) TYPE_MICROCHIP_PFSOC); qdev_realize(DEVICE(&s->soc), NULL, &error_abort); + /* Split RAM into low and high regions using aliases to machine->ram */ + mem_low_size = memmap[MICROCHIP_PFSOC_DRAM_LO].size; + mem_high_size = machine->ram_size - mem_low_size; + memory_region_init_alias(mem_low, NULL, + "microchip.icicle.kit.ram_low", machine->ram, + 0, mem_low_size); + memory_region_init_alias(mem_high, NULL, + "microchip.icicle.kit.ram_high", machine->ram, + mem_low_size, mem_high_size); + /* Register RAM */ - memory_region_init_ram(mem_low, NULL, "microchip.icicle.kit.ram_low", - memmap[MICROCHIP_PFSOC_DRAM_LO].size, - &error_fatal); - memory_region_init_alias(mem_low_alias, NULL, - "microchip.icicle.kit.ram_low.alias", - mem_low, 0, - memmap[MICROCHIP_PFSOC_DRAM_LO_ALIAS].size); memory_region_add_subregion(system_memory, memmap[MICROCHIP_PFSOC_DRAM_LO].base, mem_low); + memory_region_add_subregion(system_memory, + memmap[MICROCHIP_PFSOC_DRAM_HI].base, + mem_high); + + /* Create aliases for the low and high RAM regions */ + memory_region_init_alias(mem_low_alias, NULL, + "microchip.icicle.kit.ram_low.alias", + mem_low, 0, mem_low_size); memory_region_add_subregion(system_memory, memmap[MICROCHIP_PFSOC_DRAM_LO_ALIAS].base, mem_low_alias); - - mem_high_size = machine->ram_size - 1 * GiB; - - memory_region_init_ram(mem_high, NULL, "microchip.icicle.kit.ram_high", - mem_high_size, &error_fatal); memory_region_init_alias(mem_high_alias, NULL, "microchip.icicle.kit.ram_high.alias", mem_high, 0, mem_high_size); - memory_region_add_subregion(system_memory, - memmap[MICROCHIP_PFSOC_DRAM_HI].base, - mem_high); memory_region_add_subregion(system_memory, memmap[MICROCHIP_PFSOC_DRAM_HI_ALIAS].base, mem_high_alias); - /* Load the firmware */ - (void)riscv_find_and_load_firmware(machine, BIOS_FILENAME, RESET_VECTOR, NULL); - /* Attach an SD card */ if (dinfo) { CadenceSDHCIState *sdhci = &(s->soc.sdhci); @@ -518,6 +514,77 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) &error_fatal); qdev_realize_and_unref(card, sdhci->bus, &error_fatal); } + + /* + * We follow the following table to select which payload we execute. + * + * -bios | -kernel | payload + * -------+------------+-------- + * N | N | HSS + * Y | don't care | HSS + * N | Y | kernel + * + * This ensures backwards compatibility with how we used to expose -bios + * to users but allows them to run through direct kernel booting as well. + * + * When -kernel is used for direct boot, -dtb must be present to provide + * a valid device tree for the board, as we don't generate device tree. + */ + + if (machine->kernel_filename && machine->dtb) { + int fdt_size; + machine->fdt = load_device_tree(machine->dtb, &fdt_size); + if (!machine->fdt) { + error_report("load_device_tree() failed"); + exit(1); + } + + firmware_name = RISCV64_BIOS_BIN; + firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base; + kernel_as_payload = true; + } + + if (!kernel_as_payload) { + firmware_name = BIOS_FILENAME; + firmware_load_addr = RESET_VECTOR; + } + + /* Load the firmware */ + firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name, + firmware_load_addr, NULL); + + if (kernel_as_payload) { + kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, + firmware_end_addr); + + kernel_entry = riscv_load_kernel(machine->kernel_filename, + kernel_start_addr, NULL); + + if (machine->initrd_filename) { + hwaddr start; + hwaddr end = riscv_load_initrd(machine->initrd_filename, + machine->ram_size, kernel_entry, + &start); + qemu_fdt_setprop_cell(machine->fdt, "/chosen", + "linux,initrd-start", start); + qemu_fdt_setprop_cell(machine->fdt, "/chosen", + "linux,initrd-end", end); + } + + if (machine->kernel_cmdline) { + qemu_fdt_setprop_string(machine->fdt, "/chosen", + "bootargs", machine->kernel_cmdline); + } + + /* Compute the fdt load address in dram */ + fdt_load_addr = riscv_load_fdt(memmap[MICROCHIP_PFSOC_DRAM_LO].base, + machine->ram_size, machine->fdt); + /* Load the reset vector */ + riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, firmware_load_addr, + memmap[MICROCHIP_PFSOC_ENVM_DATA].base, + memmap[MICROCHIP_PFSOC_ENVM_DATA].size, + kernel_entry, fdt_load_addr, machine->fdt); + } } static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data) @@ -530,6 +597,7 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data) MICROCHIP_PFSOC_COMPUTE_CPU_COUNT; mc->min_cpus = MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT + 1; mc->default_cpus = mc->min_cpus; + mc->default_ram_id = "microchip.icicle.kit.ram"; /* * Map 513 MiB high memory, the mimimum required high memory size, because diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c index 4f92307102f..7fe92d402f6 100644 --- a/hw/riscv/numa.c +++ b/hw/riscv/numa.c @@ -18,7 +18,6 @@ #include "qemu/osdep.h" #include "qemu/units.h" -#include "qemu/log.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "hw/boards.h" diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c index e168bffe692..c531450b9fd 100644 --- a/hw/riscv/opentitan.c +++ b/hw/riscv/opentitan.c @@ -19,12 +19,12 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "hw/riscv/opentitan.h" #include "qapi/error.h" #include "hw/boards.h" #include "hw/misc/unimp.h" #include "hw/riscv/boot.h" -#include "exec/address-spaces.h" #include "qemu/units.h" #include "sysemu/sysemu.h" @@ -37,46 +37,53 @@ static const MemMapEntry ibex_memmap[] = { [IBEX_DEV_SPI] = { 0x40050000, 0x1000 }, [IBEX_DEV_I2C] = { 0x40080000, 0x1000 }, [IBEX_DEV_PATTGEN] = { 0x400e0000, 0x1000 }, - [IBEX_DEV_RV_TIMER] = { 0x40100000, 0x1000 }, + [IBEX_DEV_TIMER] = { 0x40100000, 0x1000 }, [IBEX_DEV_SENSOR_CTRL] = { 0x40110000, 0x1000 }, [IBEX_DEV_OTP_CTRL] = { 0x40130000, 0x4000 }, + [IBEX_DEV_USBDEV] = { 0x40150000, 0x1000 }, [IBEX_DEV_PWRMGR] = { 0x40400000, 0x1000 }, [IBEX_DEV_RSTMGR] = { 0x40410000, 0x1000 }, [IBEX_DEV_CLKMGR] = { 0x40420000, 0x1000 }, [IBEX_DEV_PINMUX] = { 0x40460000, 0x1000 }, [IBEX_DEV_PADCTRL] = { 0x40470000, 0x1000 }, - [IBEX_DEV_USBDEV] = { 0x40500000, 0x1000 }, [IBEX_DEV_FLASH_CTRL] = { 0x41000000, 0x1000 }, - [IBEX_DEV_PLIC] = { 0x41010000, 0x1000 }, [IBEX_DEV_AES] = { 0x41100000, 0x1000 }, [IBEX_DEV_HMAC] = { 0x41110000, 0x1000 }, [IBEX_DEV_KMAC] = { 0x41120000, 0x1000 }, - [IBEX_DEV_KEYMGR] = { 0x41130000, 0x1000 }, + [IBEX_DEV_OTBN] = { 0x41130000, 0x10000 }, + [IBEX_DEV_KEYMGR] = { 0x41140000, 0x1000 }, [IBEX_DEV_CSRNG] = { 0x41150000, 0x1000 }, [IBEX_DEV_ENTROPY] = { 0x41160000, 0x1000 }, [IBEX_DEV_EDNO] = { 0x41170000, 0x1000 }, [IBEX_DEV_EDN1] = { 0x41180000, 0x1000 }, [IBEX_DEV_ALERT_HANDLER] = { 0x411b0000, 0x1000 }, [IBEX_DEV_NMI_GEN] = { 0x411c0000, 0x1000 }, - [IBEX_DEV_OTBN] = { 0x411d0000, 0x10000 }, + [IBEX_DEV_PERI] = { 0x411f0000, 0x10000 }, + [IBEX_DEV_PLIC] = { 0x48000000, 0x4005000 }, + [IBEX_DEV_FLASH_VIRTUAL] = { 0x80000000, 0x80000 }, }; static void opentitan_board_init(MachineState *machine) { + MachineClass *mc = MACHINE_GET_CLASS(machine); const MemMapEntry *memmap = ibex_memmap; OpenTitanState *s = g_new0(OpenTitanState, 1); MemoryRegion *sys_mem = get_system_memory(); - MemoryRegion *main_mem = g_new(MemoryRegion, 1); + + if (machine->ram_size != mc->default_ram_size) { + char *sz = size_to_str(mc->default_ram_size); + error_report("Invalid RAM size, should be %s", sz); + g_free(sz); + exit(EXIT_FAILURE); + } /* Initialize SoC */ object_initialize_child(OBJECT(machine), "soc", &s->soc, TYPE_RISCV_IBEX_SOC); qdev_realize(DEVICE(&s->soc), NULL, &error_abort); - memory_region_init_ram(main_mem, NULL, "riscv.lowrisc.ibex.ram", - memmap[IBEX_DEV_RAM].size, &error_fatal); memory_region_add_subregion(sys_mem, - memmap[IBEX_DEV_RAM].base, main_mem); + memmap[IBEX_DEV_RAM].base, machine->ram); if (machine->firmware) { riscv_load_firmware(machine->firmware, memmap[IBEX_DEV_RAM].base, NULL); @@ -94,6 +101,8 @@ static void opentitan_machine_init(MachineClass *mc) mc->init = opentitan_board_init; mc->max_cpus = 1; mc->default_cpu_type = TYPE_RISCV_CPU_IBEX; + mc->default_ram_id = "riscv.lowrisc.ibex.ram"; + mc->default_ram_size = ibex_memmap[IBEX_DEV_RAM].size; } DEFINE_MACHINE("opentitan", opentitan_machine_init) @@ -104,9 +113,11 @@ static void lowrisc_ibex_soc_init(Object *obj) object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY); - object_initialize_child(obj, "plic", &s->plic, TYPE_IBEX_PLIC); + object_initialize_child(obj, "plic", &s->plic, TYPE_SIFIVE_PLIC); object_initialize_child(obj, "uart", &s->uart, TYPE_IBEX_UART); + + object_initialize_child(obj, "timer", &s->timer, TYPE_IBEX_TIMER); } static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) @@ -115,12 +126,13 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) MachineState *ms = MACHINE(qdev_get_machine()); LowRISCIbexSoCState *s = RISCV_IBEX_SOC(dev_soc); MemoryRegion *sys_mem = get_system_memory(); + int i; object_property_set_str(OBJECT(&s->cpus), "cpu-type", ms->cpu_type, &error_abort); object_property_set_int(OBJECT(&s->cpus), "num-harts", ms->smp.cpus, &error_abort); - object_property_set_int(OBJECT(&s->cpus), "resetvec", 0x8090, &error_abort); + object_property_set_int(OBJECT(&s->cpus), "resetvec", 0x8080, &error_abort); sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_abort); /* Boot ROM */ @@ -132,15 +144,39 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) /* Flash memory */ memory_region_init_rom(&s->flash_mem, OBJECT(dev_soc), "riscv.lowrisc.ibex.flash", memmap[IBEX_DEV_FLASH].size, &error_fatal); + memory_region_init_alias(&s->flash_alias, OBJECT(dev_soc), + "riscv.lowrisc.ibex.flash_virtual", &s->flash_mem, 0, + memmap[IBEX_DEV_FLASH_VIRTUAL].size); memory_region_add_subregion(sys_mem, memmap[IBEX_DEV_FLASH].base, &s->flash_mem); + memory_region_add_subregion(sys_mem, memmap[IBEX_DEV_FLASH_VIRTUAL].base, + &s->flash_alias); /* PLIC */ + qdev_prop_set_string(DEVICE(&s->plic), "hart-config", "M"); + qdev_prop_set_uint32(DEVICE(&s->plic), "hartid-base", 0); + qdev_prop_set_uint32(DEVICE(&s->plic), "num-sources", 180); + qdev_prop_set_uint32(DEVICE(&s->plic), "num-priorities", 3); + qdev_prop_set_uint32(DEVICE(&s->plic), "priority-base", 0x00); + qdev_prop_set_uint32(DEVICE(&s->plic), "pending-base", 0x1000); + qdev_prop_set_uint32(DEVICE(&s->plic), "enable-base", 0x2000); + qdev_prop_set_uint32(DEVICE(&s->plic), "enable-stride", 0x18); + qdev_prop_set_uint32(DEVICE(&s->plic), "context-base", 0x200000); + qdev_prop_set_uint32(DEVICE(&s->plic), "context-stride", 8); + qdev_prop_set_uint32(DEVICE(&s->plic), "aperture-size", memmap[IBEX_DEV_PLIC].size); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->plic), errp)) { return; } sysbus_mmio_map(SYS_BUS_DEVICE(&s->plic), 0, memmap[IBEX_DEV_PLIC].base); + for (i = 0; i < ms->smp.cpus; i++) { + CPUState *cpu = qemu_get_cpu(i); + + qdev_connect_gpio_out(DEVICE(&s->plic), ms->smp.cpus + i, + qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT)); + } + /* UART */ qdev_prop_set_chr(DEVICE(&(s->uart)), "chardev", serial_hd(0)); if (!sysbus_realize(SYS_BUS_DEVICE(&s->uart), errp)) { @@ -149,16 +185,27 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) sysbus_mmio_map(SYS_BUS_DEVICE(&s->uart), 0, memmap[IBEX_DEV_UART].base); sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart), 0, qdev_get_gpio_in(DEVICE(&s->plic), - IBEX_UART_TX_WATERMARK_IRQ)); + IBEX_UART0_TX_WATERMARK_IRQ)); sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart), 1, qdev_get_gpio_in(DEVICE(&s->plic), - IBEX_UART_RX_WATERMARK_IRQ)); + IBEX_UART0_RX_WATERMARK_IRQ)); sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart), 2, qdev_get_gpio_in(DEVICE(&s->plic), - IBEX_UART_TX_EMPTY_IRQ)); + IBEX_UART0_TX_EMPTY_IRQ)); sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart), 3, qdev_get_gpio_in(DEVICE(&s->plic), - IBEX_UART_RX_OVERFLOW_IRQ)); + IBEX_UART0_RX_OVERFLOW_IRQ)); + + if (!sysbus_realize(SYS_BUS_DEVICE(&s->timer), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->timer), 0, memmap[IBEX_DEV_TIMER].base); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->timer), + 0, qdev_get_gpio_in(DEVICE(&s->plic), + IBEX_TIMER_TIMEREXPIRED0_0)); + qdev_connect_gpio_out(DEVICE(&s->timer), 0, + qdev_get_gpio_in(DEVICE(qemu_get_cpu(0)), + IRQ_M_TIMER)); create_unimplemented_device("riscv.lowrisc.ibex.gpio", memmap[IBEX_DEV_GPIO].base, memmap[IBEX_DEV_GPIO].size); @@ -168,8 +215,6 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) memmap[IBEX_DEV_I2C].base, memmap[IBEX_DEV_I2C].size); create_unimplemented_device("riscv.lowrisc.ibex.pattgen", memmap[IBEX_DEV_PATTGEN].base, memmap[IBEX_DEV_PATTGEN].size); - create_unimplemented_device("riscv.lowrisc.ibex.rv_timer", - memmap[IBEX_DEV_RV_TIMER].base, memmap[IBEX_DEV_RV_TIMER].size); create_unimplemented_device("riscv.lowrisc.ibex.sensor_ctrl", memmap[IBEX_DEV_SENSOR_CTRL].base, memmap[IBEX_DEV_SENSOR_CTRL].size); create_unimplemented_device("riscv.lowrisc.ibex.otp_ctrl", @@ -210,6 +255,8 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) memmap[IBEX_DEV_NMI_GEN].base, memmap[IBEX_DEV_NMI_GEN].size); create_unimplemented_device("riscv.lowrisc.ibex.otbn", memmap[IBEX_DEV_OTBN].base, memmap[IBEX_DEV_OTBN].size); + create_unimplemented_device("riscv.lowrisc.ibex.peri", + memmap[IBEX_DEV_PERI].base, memmap[IBEX_DEV_PERI].size); } static void lowrisc_ibex_soc_class_init(ObjectClass *oc, void *data) diff --git a/hw/riscv/shakti_c.c b/hw/riscv/shakti_c.c new file mode 100644 index 00000000000..90e2cf609f3 --- /dev/null +++ b/hw/riscv/shakti_c.c @@ -0,0 +1,190 @@ +/* + * Shakti C-class SoC emulation + * + * Copyright (c) 2021 Vijai Kumar K + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "hw/boards.h" +#include "hw/riscv/shakti_c.h" +#include "qapi/error.h" +#include "hw/intc/sifive_plic.h" +#include "hw/intc/riscv_aclint.h" +#include "sysemu/sysemu.h" +#include "hw/qdev-properties.h" +#include "exec/address-spaces.h" +#include "hw/riscv/boot.h" + + +static const struct MemmapEntry { + hwaddr base; + hwaddr size; +} shakti_c_memmap[] = { + [SHAKTI_C_ROM] = { 0x00001000, 0x2000 }, + [SHAKTI_C_RAM] = { 0x80000000, 0x0 }, + [SHAKTI_C_UART] = { 0x00011300, 0x00040 }, + [SHAKTI_C_GPIO] = { 0x020d0000, 0x00100 }, + [SHAKTI_C_PLIC] = { 0x0c000000, 0x20000 }, + [SHAKTI_C_CLINT] = { 0x02000000, 0xc0000 }, + [SHAKTI_C_I2C] = { 0x20c00000, 0x00100 }, +}; + +static void shakti_c_machine_state_init(MachineState *mstate) +{ + ShaktiCMachineState *sms = RISCV_SHAKTI_MACHINE(mstate); + MemoryRegion *system_memory = get_system_memory(); + + /* Allow only Shakti C CPU for this platform */ + if (strcmp(mstate->cpu_type, TYPE_RISCV_CPU_SHAKTI_C) != 0) { + error_report("This board can only be used with Shakti C CPU"); + exit(1); + } + + /* Initialize SoC */ + object_initialize_child(OBJECT(mstate), "soc", &sms->soc, + TYPE_RISCV_SHAKTI_SOC); + qdev_realize(DEVICE(&sms->soc), NULL, &error_abort); + + /* register RAM */ + memory_region_add_subregion(system_memory, + shakti_c_memmap[SHAKTI_C_RAM].base, + mstate->ram); + + /* ROM reset vector */ + riscv_setup_rom_reset_vec(mstate, &sms->soc.cpus, + shakti_c_memmap[SHAKTI_C_RAM].base, + shakti_c_memmap[SHAKTI_C_ROM].base, + shakti_c_memmap[SHAKTI_C_ROM].size, 0, 0, + NULL); + if (mstate->firmware) { + riscv_load_firmware(mstate->firmware, + shakti_c_memmap[SHAKTI_C_RAM].base, + NULL); + } +} + +static void shakti_c_machine_instance_init(Object *obj) +{ +} + +static void shakti_c_machine_class_init(ObjectClass *klass, void *data) +{ + MachineClass *mc = MACHINE_CLASS(klass); + mc->desc = "RISC-V Board compatible with Shakti SDK"; + mc->init = shakti_c_machine_state_init; + mc->default_cpu_type = TYPE_RISCV_CPU_SHAKTI_C; + mc->default_ram_id = "riscv.shakti.c.ram"; +} + +static const TypeInfo shakti_c_machine_type_info = { + .name = TYPE_RISCV_SHAKTI_MACHINE, + .parent = TYPE_MACHINE, + .class_init = shakti_c_machine_class_init, + .instance_init = shakti_c_machine_instance_init, + .instance_size = sizeof(ShaktiCMachineState), +}; + +static void shakti_c_machine_type_info_register(void) +{ + type_register_static(&shakti_c_machine_type_info); +} +type_init(shakti_c_machine_type_info_register) + +static void shakti_c_soc_state_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + ShaktiCSoCState *sss = RISCV_SHAKTI_SOC(dev); + MemoryRegion *system_memory = get_system_memory(); + + sysbus_realize(SYS_BUS_DEVICE(&sss->cpus), &error_abort); + + sss->plic = sifive_plic_create(shakti_c_memmap[SHAKTI_C_PLIC].base, + (char *)SHAKTI_C_PLIC_HART_CONFIG, ms->smp.cpus, 0, + SHAKTI_C_PLIC_NUM_SOURCES, + SHAKTI_C_PLIC_NUM_PRIORITIES, + SHAKTI_C_PLIC_PRIORITY_BASE, + SHAKTI_C_PLIC_PENDING_BASE, + SHAKTI_C_PLIC_ENABLE_BASE, + SHAKTI_C_PLIC_ENABLE_STRIDE, + SHAKTI_C_PLIC_CONTEXT_BASE, + SHAKTI_C_PLIC_CONTEXT_STRIDE, + shakti_c_memmap[SHAKTI_C_PLIC].size); + + riscv_aclint_swi_create(shakti_c_memmap[SHAKTI_C_CLINT].base, + 0, 1, false); + riscv_aclint_mtimer_create(shakti_c_memmap[SHAKTI_C_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, 1, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false); + + qdev_prop_set_chr(DEVICE(&(sss->uart)), "chardev", serial_hd(0)); + if (!sysbus_realize(SYS_BUS_DEVICE(&sss->uart), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&sss->uart), 0, + shakti_c_memmap[SHAKTI_C_UART].base); + + /* ROM */ + memory_region_init_rom(&sss->rom, OBJECT(dev), "riscv.shakti.c.rom", + shakti_c_memmap[SHAKTI_C_ROM].size, &error_fatal); + memory_region_add_subregion(system_memory, + shakti_c_memmap[SHAKTI_C_ROM].base, &sss->rom); +} + +static void shakti_c_soc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = shakti_c_soc_state_realize; + /* + * Reasons: + * - Creates CPUS in riscv_hart_realize(), and can create unintended + * CPUs + * - Uses serial_hds in realize function, thus can't be used twice + */ + dc->user_creatable = false; +} + +static void shakti_c_soc_instance_init(Object *obj) +{ + ShaktiCSoCState *sss = RISCV_SHAKTI_SOC(obj); + + object_initialize_child(obj, "cpus", &sss->cpus, TYPE_RISCV_HART_ARRAY); + object_initialize_child(obj, "uart", &sss->uart, TYPE_SHAKTI_UART); + + /* + * CPU type is fixed and we are not supporting passing from commandline yet. + * So let it be in instance_init. When supported should use ms->cpu_type + * instead of TYPE_RISCV_CPU_SHAKTI_C + */ + object_property_set_str(OBJECT(&sss->cpus), "cpu-type", + TYPE_RISCV_CPU_SHAKTI_C, &error_abort); + object_property_set_int(OBJECT(&sss->cpus), "num-harts", 1, + &error_abort); +} + +static const TypeInfo shakti_c_type_info = { + .name = TYPE_RISCV_SHAKTI_SOC, + .parent = TYPE_DEVICE, + .class_init = shakti_c_soc_class_init, + .instance_init = shakti_c_soc_instance_init, + .instance_size = sizeof(ShaktiCSoCState), +}; + +static void shakti_c_type_info_register(void) +{ + type_register_static(&shakti_c_type_info); +} +type_init(shakti_c_type_info_register) diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c index f939bcf9ea6..9b206407a65 100644 --- a/hw/riscv/sifive_e.c +++ b/hw/riscv/sifive_e.c @@ -29,7 +29,7 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "hw/boards.h" @@ -42,15 +42,13 @@ #include "hw/riscv/sifive_e.h" #include "hw/riscv/boot.h" #include "hw/char/sifive_uart.h" -#include "hw/intc/sifive_clint.h" +#include "hw/intc/riscv_aclint.h" #include "hw/intc/sifive_plic.h" #include "hw/misc/sifive_e_prci.h" #include "chardev/char.h" -#include "sysemu/arch_init.h" #include "sysemu/sysemu.h" -#include "exec/address-spaces.h" -static MemMapEntry sifive_e_memmap[] = { +static const MemMapEntry sifive_e_memmap[] = { [SIFIVE_E_DEV_DEBUG] = { 0x0, 0x1000 }, [SIFIVE_E_DEV_MROM] = { 0x1000, 0x2000 }, [SIFIVE_E_DEV_OTP] = { 0x20000, 0x2000 }, @@ -74,22 +72,27 @@ static MemMapEntry sifive_e_memmap[] = { static void sifive_e_machine_init(MachineState *machine) { + MachineClass *mc = MACHINE_GET_CLASS(machine); const MemMapEntry *memmap = sifive_e_memmap; SiFiveEState *s = RISCV_E_MACHINE(machine); MemoryRegion *sys_mem = get_system_memory(); - MemoryRegion *main_mem = g_new(MemoryRegion, 1); int i; + if (machine->ram_size != mc->default_ram_size) { + char *sz = size_to_str(mc->default_ram_size); + error_report("Invalid RAM size, should be %s", sz); + g_free(sz); + exit(EXIT_FAILURE); + } + /* Initialize SoC */ object_initialize_child(OBJECT(machine), "soc", &s->soc, TYPE_RISCV_E_SOC); qdev_realize(DEVICE(&s->soc), NULL, &error_abort); /* Data Tightly Integrated Memory */ - memory_region_init_ram(main_mem, NULL, "riscv.sifive.e.ram", - memmap[SIFIVE_E_DEV_DTIM].size, &error_fatal); memory_region_add_subregion(sys_mem, - memmap[SIFIVE_E_DEV_DTIM].base, main_mem); + memmap[SIFIVE_E_DEV_DTIM].base, machine->ram); /* Mask ROM reset vector */ uint32_t reset_vec[4]; @@ -145,6 +148,8 @@ static void sifive_e_machine_class_init(ObjectClass *oc, void *data) mc->init = sifive_e_machine_init; mc->max_cpus = 1; mc->default_cpu_type = SIFIVE_E_CPU; + mc->default_ram_id = "riscv.sifive.e.ram"; + mc->default_ram_size = sifive_e_memmap[SIFIVE_E_DEV_DTIM].size; object_class_property_add_bool(oc, "revb", sifive_e_machine_get_revb, sifive_e_machine_set_revb); @@ -200,7 +205,7 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp) /* MMIO */ s->plic = sifive_plic_create(memmap[SIFIVE_E_DEV_PLIC].base, - (char *)SIFIVE_E_PLIC_HART_CONFIG, 0, + (char *)SIFIVE_E_PLIC_HART_CONFIG, ms->smp.cpus, 0, SIFIVE_E_PLIC_NUM_SOURCES, SIFIVE_E_PLIC_NUM_PRIORITIES, SIFIVE_E_PLIC_PRIORITY_BASE, @@ -210,10 +215,13 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp) SIFIVE_E_PLIC_CONTEXT_BASE, SIFIVE_E_PLIC_CONTEXT_STRIDE, memmap[SIFIVE_E_DEV_PLIC].size); - sifive_clint_create(memmap[SIFIVE_E_DEV_CLINT].base, - memmap[SIFIVE_E_DEV_CLINT].size, 0, ms->smp.cpus, - SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, - SIFIVE_CLINT_TIMEBASE_FREQ, false); + riscv_aclint_swi_create(memmap[SIFIVE_E_DEV_CLINT].base, + 0, ms->smp.cpus, false); + riscv_aclint_mtimer_create(memmap[SIFIVE_E_DEV_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false); create_unimplemented_device("riscv.sifive.e.aon", memmap[SIFIVE_E_DEV_AON].base, memmap[SIFIVE_E_DEV_AON].size); sifive_e_prci_create(memmap[SIFIVE_E_DEV_PRCI].base); diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index 6fa730b6674..8514c99e51d 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -17,6 +17,7 @@ * 7) DMA (Direct Memory Access Controller) * 8) SPI0 connected to an SPI flash * 9) SPI2 connected to an SD card + * 10) PWM0 and PWM1 * * This board currently generates devicetree dynamically that indicates at least * two harts and up to five harts. @@ -35,7 +36,6 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "qapi/visitor.h" @@ -52,17 +52,19 @@ #include "hw/riscv/sifive_u.h" #include "hw/riscv/boot.h" #include "hw/char/sifive_uart.h" -#include "hw/intc/sifive_clint.h" +#include "hw/intc/riscv_aclint.h" #include "hw/intc/sifive_plic.h" #include "chardev/char.h" #include "net/eth.h" -#include "sysemu/arch_init.h" #include "sysemu/device_tree.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include +/* CLINT timebase frequency */ +#define CLINT_TIMEBASE_FREQ 1000000 + static const MemMapEntry sifive_u_memmap[] = { [SIFIVE_U_DEV_DEBUG] = { 0x0, 0x100 }, [SIFIVE_U_DEV_MROM] = { 0x1000, 0xf000 }, @@ -74,6 +76,8 @@ static const MemMapEntry sifive_u_memmap[] = { [SIFIVE_U_DEV_PRCI] = { 0x10000000, 0x1000 }, [SIFIVE_U_DEV_UART0] = { 0x10010000, 0x1000 }, [SIFIVE_U_DEV_UART1] = { 0x10011000, 0x1000 }, + [SIFIVE_U_DEV_PWM0] = { 0x10020000, 0x1000 }, + [SIFIVE_U_DEV_PWM1] = { 0x10021000, 0x1000 }, [SIFIVE_U_DEV_QSPI0] = { 0x10040000, 0x1000 }, [SIFIVE_U_DEV_QSPI2] = { 0x10050000, 0x1000 }, [SIFIVE_U_DEV_GPIO] = { 0x10060000, 0x1000 }, @@ -96,9 +100,15 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, int cpu; uint32_t *cells; char *nodename; - char ethclk_names[] = "pclk\0hclk"; uint32_t plic_phandle, prci_phandle, gpio_phandle, phandle = 1; uint32_t hfclk_phandle, rtcclk_phandle, phy_phandle; + static const char * const ethclk_names[2] = { "pclk", "hclk" }; + static const char * const clint_compat[2] = { + "sifive,clint0", "riscv,clint0" + }; + static const char * const plic_compat[2] = { + "sifive,plic-1.0.0", "riscv,plic0" + }; if (ms->dtb) { fdt = s->fdt = load_device_tree(ms->dtb, &s->fdt_size); @@ -160,7 +170,7 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, qemu_fdt_add_subnode(fdt, "/cpus"); qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", - SIFIVE_CLINT_TIMEBASE_FREQ); + CLINT_TIMEBASE_FREQ); qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); @@ -210,7 +220,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, nodename = g_strdup_printf("/soc/clint@%lx", (long)memmap[SIFIVE_U_DEV_CLINT].base); qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv,clint0"); + qemu_fdt_setprop_string_array(fdt, nodename, "compatible", + (char **)&clint_compat, ARRAY_SIZE(clint_compat)); qemu_fdt_setprop_cells(fdt, nodename, "reg", 0x0, memmap[SIFIVE_U_DEV_CLINT].base, 0x0, memmap[SIFIVE_U_DEV_CLINT].size); @@ -267,7 +278,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, (long)memmap[SIFIVE_U_DEV_PLIC].base); qemu_fdt_add_subnode(fdt, nodename); qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", 1); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv,plic0"); + qemu_fdt_setprop_string_array(fdt, nodename, "compatible", + (char **)&plic_compat, ARRAY_SIZE(plic_compat)); qemu_fdt_setprop(fdt, nodename, "interrupt-controller", NULL, 0); qemu_fdt_setprop(fdt, nodename, "interrupts-extended", cells, (ms->smp.cpus * 4 - 2) * sizeof(uint32_t)); @@ -413,8 +425,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cell(fdt, nodename, "interrupts", SIFIVE_U_GEM_IRQ); qemu_fdt_setprop_cells(fdt, nodename, "clocks", prci_phandle, PRCI_CLK_GEMGXLPLL, prci_phandle, PRCI_CLK_GEMGXLPLL); - qemu_fdt_setprop(fdt, nodename, "clock-names", ethclk_names, - sizeof(ethclk_names)); + qemu_fdt_setprop_string_array(fdt, nodename, "clock-names", + (char **)ðclk_names, ARRAY_SIZE(ethclk_names)); qemu_fdt_setprop(fdt, nodename, "local-mac-address", s->soc.gem.conf.macaddr.a, ETH_ALEN); qemu_fdt_setprop_cell(fdt, nodename, "#address-cells", 1); @@ -432,6 +444,38 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cell(fdt, nodename, "reg", 0x0); g_free(nodename); + nodename = g_strdup_printf("/soc/pwm@%lx", + (long)memmap[SIFIVE_U_DEV_PWM0].base); + qemu_fdt_add_subnode(fdt, nodename); + qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0"); + qemu_fdt_setprop_cells(fdt, nodename, "reg", + 0x0, memmap[SIFIVE_U_DEV_PWM0].base, + 0x0, memmap[SIFIVE_U_DEV_PWM0].size); + qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle); + qemu_fdt_setprop_cells(fdt, nodename, "interrupts", + SIFIVE_U_PWM0_IRQ0, SIFIVE_U_PWM0_IRQ1, + SIFIVE_U_PWM0_IRQ2, SIFIVE_U_PWM0_IRQ3); + qemu_fdt_setprop_cells(fdt, nodename, "clocks", + prci_phandle, PRCI_CLK_TLCLK); + qemu_fdt_setprop_cell(fdt, nodename, "#pwm-cells", 0); + g_free(nodename); + + nodename = g_strdup_printf("/soc/pwm@%lx", + (long)memmap[SIFIVE_U_DEV_PWM1].base); + qemu_fdt_add_subnode(fdt, nodename); + qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0"); + qemu_fdt_setprop_cells(fdt, nodename, "reg", + 0x0, memmap[SIFIVE_U_DEV_PWM1].base, + 0x0, memmap[SIFIVE_U_DEV_PWM1].size); + qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle); + qemu_fdt_setprop_cells(fdt, nodename, "interrupts", + SIFIVE_U_PWM1_IRQ0, SIFIVE_U_PWM1_IRQ1, + SIFIVE_U_PWM1_IRQ2, SIFIVE_U_PWM1_IRQ3); + qemu_fdt_setprop_cells(fdt, nodename, "clocks", + prci_phandle, PRCI_CLK_TLCLK); + qemu_fdt_setprop_cell(fdt, nodename, "#pwm-cells", 0); + g_free(nodename); + nodename = g_strdup_printf("/soc/serial@%lx", (long)memmap[SIFIVE_U_DEV_UART1].base); qemu_fdt_add_subnode(fdt, nodename); @@ -484,7 +528,6 @@ static void sifive_u_machine_init(MachineState *machine) const MemMapEntry *memmap = sifive_u_memmap; SiFiveUState *s = RISCV_U_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); - MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *flash0 = g_new(MemoryRegion, 1); target_ulong start_addr = memmap[SIFIVE_U_DEV_DRAM].base; target_ulong firmware_end_addr, kernel_start_addr; @@ -505,10 +548,8 @@ static void sifive_u_machine_init(MachineState *machine) qdev_realize(DEVICE(&s->soc), NULL, &error_abort); /* register RAM */ - memory_region_init_ram(main_mem, NULL, "riscv.sifive.u.ram", - machine->ram_size, &error_fatal); memory_region_add_subregion(system_memory, memmap[SIFIVE_U_DEV_DRAM].base, - main_mem); + machine->ram); /* register QSPI0 Flash */ memory_region_init_ram(flash0, NULL, "riscv.sifive.u.flash0", @@ -558,7 +599,7 @@ static void sifive_u_machine_init(MachineState *machine) /* Use a purecap BBL as the BIOS for CHERI */ "bbl-riscv32cheri-generic-fw_jump.bin", #else - "opensbi-riscv32-generic-fw_dynamic.bin", + RISCV32_BIOS_BIN, #endif start_addr, NULL); } else { @@ -568,7 +609,7 @@ static void sifive_u_machine_init(MachineState *machine) /* Use a purecap BBL as the BIOS for CHERI */ "bbl-riscv64cheri-generic-fw_jump.bin", #else - "opensbi-riscv64-generic-fw_dynamic.bin", + RISCV64_BIOS_BIN, #endif start_addr, NULL); } @@ -606,10 +647,10 @@ static void sifive_u_machine_init(MachineState *machine) } /* reset vector */ - uint32_t reset_vec[11] = { + uint32_t reset_vec[12] = { s->msel, /* MSEL pin state */ 0x00000297, /* 1: auipc t0, %pcrel_hi(fw_dyn) */ - 0x02828613, /* addi a2, t0, %pcrel_lo(1b) */ + 0x02c28613, /* addi a2, t0, %pcrel_lo(1b) */ 0xf1402573, /* csrr a0, mhartid */ 0, 0, @@ -617,6 +658,7 @@ static void sifive_u_machine_init(MachineState *machine) start_addr, /* start: .dword */ start_addr_hi32, fdt_load_addr, /* fdt_laddr: .dword */ + 0x00000000, 0x00000000, /* fw_dyn: */ }; @@ -717,6 +759,7 @@ static void sifive_u_machine_class_init(ObjectClass *oc, void *data) mc->min_cpus = SIFIVE_U_MANAGEMENT_CPU_COUNT + 1; mc->default_cpu_type = SIFIVE_U_CPU; mc->default_cpus = mc->min_cpus; + mc->default_ram_id = "riscv.sifive.u.ram"; object_class_property_add_bool(oc, "start-in-flash", sifive_u_machine_get_start_in_flash, @@ -769,6 +812,8 @@ static void sifive_u_soc_instance_init(Object *obj) object_initialize_child(obj, "pdma", &s->dma, TYPE_SIFIVE_PDMA); object_initialize_child(obj, "spi0", &s->spi0, TYPE_SIFIVE_SPI); object_initialize_child(obj, "spi2", &s->spi2, TYPE_SIFIVE_SPI); + object_initialize_child(obj, "pwm0", &s->pwm[0], TYPE_SIFIVE_PWM); + object_initialize_child(obj, "pwm1", &s->pwm[1], TYPE_SIFIVE_PWM); } static void sifive_u_soc_realize(DeviceState *dev, Error **errp) @@ -780,8 +825,7 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp) MemoryRegion *mask_rom = g_new(MemoryRegion, 1); MemoryRegion *l2lim_mem = g_new(MemoryRegion, 1); char *plic_hart_config; - size_t plic_hart_config_len; - int i; + int i, j; NICInfo *nd = &nd_table[0]; qdev_prop_set_uint32(DEVICE(&s->u_cpus), "num-harts", ms->smp.cpus - 1); @@ -821,22 +865,11 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp) l2lim_mem); /* create PLIC hart topology configuration string */ - plic_hart_config_len = (strlen(SIFIVE_U_PLIC_HART_CONFIG) + 1) * - ms->smp.cpus; - plic_hart_config = g_malloc0(plic_hart_config_len); - for (i = 0; i < ms->smp.cpus; i++) { - if (i != 0) { - strncat(plic_hart_config, "," SIFIVE_U_PLIC_HART_CONFIG, - plic_hart_config_len); - } else { - strncat(plic_hart_config, "M", plic_hart_config_len); - } - plic_hart_config_len -= (strlen(SIFIVE_U_PLIC_HART_CONFIG) + 1); - } + plic_hart_config = riscv_plic_hart_config_string(ms->smp.cpus); /* MMIO */ s->plic = sifive_plic_create(memmap[SIFIVE_U_DEV_PLIC].base, - plic_hart_config, 0, + plic_hart_config, ms->smp.cpus, 0, SIFIVE_U_PLIC_NUM_SOURCES, SIFIVE_U_PLIC_NUM_PRIORITIES, SIFIVE_U_PLIC_PRIORITY_BASE, @@ -851,10 +884,13 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp) serial_hd(0), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_UART0_IRQ)); sifive_uart_create(system_memory, memmap[SIFIVE_U_DEV_UART1].base, serial_hd(1), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_UART1_IRQ)); - sifive_clint_create(memmap[SIFIVE_U_DEV_CLINT].base, - memmap[SIFIVE_U_DEV_CLINT].size, 0, ms->smp.cpus, - SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, - SIFIVE_CLINT_TIMEBASE_FREQ, false); + riscv_aclint_swi_create(memmap[SIFIVE_U_DEV_CLINT].base, 0, + ms->smp.cpus, false); + riscv_aclint_mtimer_create(memmap[SIFIVE_U_DEV_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, + CLINT_TIMEBASE_FREQ, false); if (!sysbus_realize(SYS_BUS_DEVICE(&s->prci), errp)) { return; @@ -908,6 +944,22 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->gem), 0, qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_GEM_IRQ)); + /* PWM */ + for (i = 0; i < 2; i++) { + if (!sysbus_realize(SYS_BUS_DEVICE(&s->pwm[i]), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->pwm[i]), 0, + memmap[SIFIVE_U_DEV_PWM0].base + (0x1000 * i)); + + /* Connect PWM interrupts to the PLIC */ + for (j = 0; j < SIFIVE_PWM_IRQS; j++) { + sysbus_connect_irq(SYS_BUS_DEVICE(&s->pwm[i]), j, + qdev_get_gpio_in(DEVICE(s->plic), + SIFIVE_U_PWM0_IRQ0 + (i * 4) + j)); + } + } + create_unimplemented_device("riscv.sifive.u.gem-mgmt", memmap[SIFIVE_U_DEV_GEM_MGMT].base, memmap[SIFIVE_U_DEV_GEM_MGMT].size); diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index a789b216d89..0541d75fac6 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -24,7 +24,6 @@ */ #include "qemu/osdep.h" -#include "qemu/log.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "hw/boards.h" @@ -36,9 +35,8 @@ #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" #include "hw/char/riscv_htif.h" -#include "hw/intc/sifive_clint.h" +#include "hw/intc/riscv_aclint.h" #include "chardev/char.h" -#include "sysemu/arch_init.h" #include "sysemu/device_tree.h" #include "sysemu/sysemu.h" #ifdef TARGET_CHERI @@ -63,6 +61,9 @@ static void create_fdt(SpikeState *s, const MemMapEntry *memmap, uint32_t cpu_phandle, intc_phandle, phandle = 1; char *name, *mem_name, *clint_name, *clust_name; char *core_name, *cpu_name, *intc_name; + static const char * const clint_compat[2] = { + "sifive,clint0", "riscv,clint0" + }; fdt = s->fdt = create_device_tree(&s->fdt_size); if (!fdt) { @@ -86,7 +87,7 @@ static void create_fdt(SpikeState *s, const MemMapEntry *memmap, qemu_fdt_add_subnode(fdt, "/cpus"); qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", - SIFIVE_CLINT_TIMEBASE_FREQ); + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ); qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); @@ -156,7 +157,8 @@ static void create_fdt(SpikeState *s, const MemMapEntry *memmap, (memmap[SPIKE_CLINT].size * socket); clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); qemu_fdt_add_subnode(fdt, clint_name); - qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0"); + qemu_fdt_setprop_string_array(fdt, clint_name, "compatible", + (char **)&clint_compat, ARRAY_SIZE(clint_compat)); qemu_fdt_setprop_cells(fdt, clint_name, "reg", 0x0, clint_addr, 0x0, memmap[SPIKE_CLINT].size); qemu_fdt_setprop(fdt, clint_name, "interrupts-extended", @@ -181,7 +183,6 @@ static void spike_board_init(MachineState *machine) const MemMapEntry *memmap = spike_memmap; SpikeState *s = SPIKE_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); - MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); target_ulong firmware_end_addr, kernel_start_addr; uint32_t fdt_load_addr; @@ -228,21 +229,20 @@ static void spike_board_init(MachineState *machine) sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort); /* Core Local Interruptor (timer and IPI) for each socket */ - sifive_clint_create( + riscv_aclint_swi_create( memmap[SPIKE_CLINT].base + i * memmap[SPIKE_CLINT].size, - memmap[SPIKE_CLINT].size, base_hartid, hart_count, - SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, - SIFIVE_CLINT_TIMEBASE_FREQ, false); + base_hartid, hart_count, false); + riscv_aclint_mtimer_create( + memmap[SPIKE_CLINT].base + i * memmap[SPIKE_CLINT].size + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false); } /* register system main memory (actual RAM) */ - memory_region_init_ram(main_mem, NULL, "riscv.spike.ram", - machine->ram_size, &error_fatal); memory_region_add_subregion(system_memory, memmap[SPIKE_DRAM].base, - main_mem); -#ifdef TARGET_CHERI - cheri_tag_init(main_mem, machine->ram_size); -#endif + machine->ram); /* create device tree */ create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline, @@ -261,13 +261,11 @@ static void spike_board_init(MachineState *machine) */ if (riscv_is_32bit(&s->soc[0])) { firmware_end_addr = riscv_find_and_load_firmware(machine, - "opensbi-riscv32-generic-fw_dynamic.elf", - memmap[SPIKE_DRAM].base, + RISCV32_BIOS_ELF, memmap[SPIKE_DRAM].base, htif_symbol_callback); } else { firmware_end_addr = riscv_find_and_load_firmware(machine, - "opensbi-riscv64-generic-fw_dynamic.elf", - memmap[SPIKE_DRAM].base, + RISCV64_BIOS_ELF, memmap[SPIKE_DRAM].base, htif_symbol_callback); } @@ -328,6 +326,7 @@ static void spike_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; mc->numa_mem_supported = true; + mc->default_ram_id = "riscv.spike.ram"; } static const TypeInfo spike_machine_typeinfo = { diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 29db70633f7..6ab2074e596 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/units.h" -#include "qemu/log.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "hw/boards.h" @@ -33,11 +32,10 @@ #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" -#include "hw/intc/sifive_clint.h" +#include "hw/intc/riscv_aclint.h" #include "hw/intc/sifive_plic.h" #include "hw/misc/sifive_test.h" #include "chardev/char.h" -#include "sysemu/arch_init.h" #include "sysemu/device_tree.h" #include "sysemu/sysemu.h" #include "hw/pci/pci.h" @@ -53,6 +51,7 @@ static const MemMapEntry virt_memmap[] = { [VIRT_TEST] = { 0x100000, 0x1000 }, [VIRT_RTC] = { 0x101000, 0x1000 }, [VIRT_CLINT] = { 0x2000000, 0x10000 }, + [VIRT_ACLINT_SSWI] = { 0x2F00000, 0x4000 }, [VIRT_PCIE_PIO] = { 0x3000000, 0x10000 }, [VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, [VIRT_UART0] = { 0x10000000, 0x100 }, @@ -181,206 +180,342 @@ static void create_pcie_irq_map(void *fdt, char *nodename, 0x1800, 0, 0, 0x7); } -static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, - uint64_t mem_size, const char *cmdline, bool is_32_bit) +static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, + char *clust_name, uint32_t *phandle, + bool is_32_bit, uint32_t *intc_phandles) { - void *fdt; - int i, cpu, socket; + int cpu; + uint32_t cpu_phandle; MachineState *mc = MACHINE(s); + char *name, *cpu_name, *core_name, *intc_name; + + for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { + cpu_phandle = (*phandle)++; + + cpu_name = g_strdup_printf("/cpus/cpu@%d", + s->soc[socket].hartid_base + cpu); + qemu_fdt_add_subnode(mc->fdt, cpu_name); + qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type", + (is_32_bit) ? "riscv,sv32" : "riscv,sv48"); + name = riscv_isa_string(&s->soc[socket].harts[cpu]); + qemu_fdt_setprop_string(mc->fdt, cpu_name, "riscv,isa", name); + g_free(name); + qemu_fdt_setprop_string(mc->fdt, cpu_name, "compatible", "riscv"); + qemu_fdt_setprop_string(mc->fdt, cpu_name, "status", "okay"); + qemu_fdt_setprop_cell(mc->fdt, cpu_name, "reg", + s->soc[socket].hartid_base + cpu); + qemu_fdt_setprop_string(mc->fdt, cpu_name, "device_type", "cpu"); + riscv_socket_fdt_write_id(mc, mc->fdt, cpu_name, socket); + qemu_fdt_setprop_cell(mc->fdt, cpu_name, "phandle", cpu_phandle); + + intc_phandles[cpu] = (*phandle)++; + + intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name); + qemu_fdt_add_subnode(mc->fdt, intc_name); + qemu_fdt_setprop_cell(mc->fdt, intc_name, "phandle", + intc_phandles[cpu]); + qemu_fdt_setprop_string(mc->fdt, intc_name, "compatible", + "riscv,cpu-intc"); + qemu_fdt_setprop(mc->fdt, intc_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mc->fdt, intc_name, "#interrupt-cells", 1); + + core_name = g_strdup_printf("%s/core%d", clust_name, cpu); + qemu_fdt_add_subnode(mc->fdt, core_name); + qemu_fdt_setprop_cell(mc->fdt, core_name, "cpu", cpu_phandle); + + g_free(core_name); + g_free(intc_name); + g_free(cpu_name); + } +} + +static void create_fdt_socket_memory(RISCVVirtState *s, + const MemMapEntry *memmap, int socket) +{ + char *mem_name; uint64_t addr, size; - uint32_t *clint_cells, *plic_cells; - unsigned long clint_addr, plic_addr; - uint32_t plic_phandle[MAX_NODES]; - uint32_t cpu_phandle, intc_phandle, test_phandle; - uint32_t phandle = 1, plic_mmio_phandle = 1; - uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1; - char *mem_name, *cpu_name, *core_name, *intc_name; - char *name, *clint_name, *plic_name, *clust_name; - hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; - hwaddr flashbase = virt_memmap[VIRT_FLASH].base; + MachineState *mc = MACHINE(s); - if (mc->dtb) { - fdt = mc->fdt = load_device_tree(mc->dtb, &s->fdt_size); - if (!fdt) { - error_report("load_device_tree() failed"); - exit(1); - } - goto update_bootargs; - } else { - fdt = mc->fdt = create_device_tree(&s->fdt_size); - if (!fdt) { - error_report("create_device_tree() failed"); - exit(1); - } + addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(mc, socket); + size = riscv_socket_mem_size(mc, socket); + mem_name = g_strdup_printf("/memory@%lx", (long)addr); + qemu_fdt_add_subnode(mc->fdt, mem_name); + qemu_fdt_setprop_cells(mc->fdt, mem_name, "reg", + addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_string(mc->fdt, mem_name, "device_type", "memory"); + riscv_socket_fdt_write_id(mc, mc->fdt, mem_name, socket); + g_free(mem_name); +} + +static void create_fdt_socket_clint(RISCVVirtState *s, + const MemMapEntry *memmap, int socket, + uint32_t *intc_phandles) +{ + int cpu; + char *clint_name; + uint32_t *clint_cells; + unsigned long clint_addr; + MachineState *mc = MACHINE(s); + static const char * const clint_compat[2] = { + "sifive,clint0", "riscv,clint0" + }; + + clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); + + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + clint_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]); + clint_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT); + clint_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]); + clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); } - qemu_fdt_setprop_string(fdt, "/", "model", "riscv-virtio,qemu"); - qemu_fdt_setprop_string(fdt, "/", "compatible", "riscv-virtio"); - qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2); - qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2); + clint_addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); + clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + qemu_fdt_add_subnode(mc->fdt, clint_name); + qemu_fdt_setprop_string_array(mc->fdt, clint_name, "compatible", + (char **)&clint_compat, + ARRAY_SIZE(clint_compat)); + qemu_fdt_setprop_cells(mc->fdt, clint_name, "reg", + 0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size); + qemu_fdt_setprop(mc->fdt, clint_name, "interrupts-extended", + clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); + riscv_socket_fdt_write_id(mc, mc->fdt, clint_name, socket); + g_free(clint_name); + + g_free(clint_cells); +} + +static void create_fdt_socket_aclint(RISCVVirtState *s, + const MemMapEntry *memmap, int socket, + uint32_t *intc_phandles) +{ + int cpu; + char *name; + unsigned long addr; + uint32_t aclint_cells_size; + uint32_t *aclint_mswi_cells; + uint32_t *aclint_sswi_cells; + uint32_t *aclint_mtimer_cells; + MachineState *mc = MACHINE(s); - qemu_fdt_add_subnode(fdt, "/soc"); - qemu_fdt_setprop(fdt, "/soc", "ranges", NULL, 0); - qemu_fdt_setprop_string(fdt, "/soc", "compatible", "simple-bus"); - qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2); - qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); + aclint_mswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + aclint_sswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + aclint_mswi_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aclint_mswi_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_SOFT); + aclint_mtimer_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER); + aclint_sswi_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aclint_sswi_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_SOFT); + } + aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2; - qemu_fdt_add_subnode(fdt, "/cpus"); - qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", - SIFIVE_CLINT_TIMEBASE_FREQ); - qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); - qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); - qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); + addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); + name = g_strdup_printf("/soc/mswi@%lx", addr); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-mswi"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", + aclint_mswi_cells, aclint_cells_size); + qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); + riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); + g_free(name); + + addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + + (memmap[VIRT_CLINT].size * socket); + name = g_strdup_printf("/soc/mtimer@%lx", addr); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", + "riscv,aclint-mtimer"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, + 0x0, memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE - + RISCV_ACLINT_DEFAULT_MTIME, + 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, + 0x0, RISCV_ACLINT_DEFAULT_MTIME); + qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", + aclint_mtimer_cells, aclint_cells_size); + riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); + g_free(name); + + addr = memmap[VIRT_ACLINT_SSWI].base + + (memmap[VIRT_ACLINT_SSWI].size * socket); + name = g_strdup_printf("/soc/sswi@%lx", addr); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-sswi"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", + aclint_sswi_cells, aclint_cells_size); + qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); + riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); + g_free(name); + + g_free(aclint_mswi_cells); + g_free(aclint_mtimer_cells); + g_free(aclint_sswi_cells); +} + +static void create_fdt_socket_plic(RISCVVirtState *s, + const MemMapEntry *memmap, int socket, + uint32_t *phandle, uint32_t *intc_phandles, + uint32_t *plic_phandles) +{ + int cpu; + char *plic_name; + uint32_t *plic_cells; + unsigned long plic_addr; + MachineState *mc = MACHINE(s); + static const char * const plic_compat[2] = { + "sifive,plic-1.0.0", "riscv,plic0" + }; + + plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); + + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]); + plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT); + plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]); + plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT); + } + + plic_phandles[socket] = (*phandle)++; + plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket); + plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); + qemu_fdt_add_subnode(mc->fdt, plic_name); + qemu_fdt_setprop_cell(mc->fdt, plic_name, + "#address-cells", FDT_PLIC_ADDR_CELLS); + qemu_fdt_setprop_cell(mc->fdt, plic_name, + "#interrupt-cells", FDT_PLIC_INT_CELLS); + qemu_fdt_setprop_string_array(mc->fdt, plic_name, "compatible", + (char **)&plic_compat, + ARRAY_SIZE(plic_compat)); + qemu_fdt_setprop(mc->fdt, plic_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop(mc->fdt, plic_name, "interrupts-extended", + plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); + qemu_fdt_setprop_cells(mc->fdt, plic_name, "reg", + 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size); + qemu_fdt_setprop_cell(mc->fdt, plic_name, "riscv,ndev", VIRTIO_NDEV); + riscv_socket_fdt_write_id(mc, mc->fdt, plic_name, socket); + qemu_fdt_setprop_cell(mc->fdt, plic_name, "phandle", + plic_phandles[socket]); + g_free(plic_name); + + g_free(plic_cells); +} + +static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, + bool is_32_bit, uint32_t *phandle, + uint32_t *irq_mmio_phandle, + uint32_t *irq_pcie_phandle, + uint32_t *irq_virtio_phandle) +{ + int socket; + char *clust_name; + uint32_t *intc_phandles; + MachineState *mc = MACHINE(s); + uint32_t xplic_phandles[MAX_NODES]; + + qemu_fdt_add_subnode(mc->fdt, "/cpus"); + qemu_fdt_setprop_cell(mc->fdt, "/cpus", "timebase-frequency", + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ); + qemu_fdt_setprop_cell(mc->fdt, "/cpus", "#size-cells", 0x0); + qemu_fdt_setprop_cell(mc->fdt, "/cpus", "#address-cells", 0x1); + qemu_fdt_add_subnode(mc->fdt, "/cpus/cpu-map"); for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); - qemu_fdt_add_subnode(fdt, clust_name); - - plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); - clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); - - for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { - cpu_phandle = phandle++; - - cpu_name = g_strdup_printf("/cpus/cpu@%d", - s->soc[socket].hartid_base + cpu); - qemu_fdt_add_subnode(fdt, cpu_name); - if (is_32_bit) { - qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv32"); - } else { - qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv48"); - } - name = riscv_isa_string(&s->soc[socket].harts[cpu]); - qemu_fdt_setprop_string(fdt, cpu_name, "riscv,isa", name); - g_free(name); - qemu_fdt_setprop_string(fdt, cpu_name, "compatible", "riscv"); - qemu_fdt_setprop_string(fdt, cpu_name, "status", "okay"); - qemu_fdt_setprop_cell(fdt, cpu_name, "reg", - s->soc[socket].hartid_base + cpu); - qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu"); - riscv_socket_fdt_write_id(mc, fdt, cpu_name, socket); - qemu_fdt_setprop_cell(fdt, cpu_name, "phandle", cpu_phandle); - - intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name); - qemu_fdt_add_subnode(fdt, intc_name); - intc_phandle = phandle++; - qemu_fdt_setprop_cell(fdt, intc_name, "phandle", intc_phandle); - qemu_fdt_setprop_string(fdt, intc_name, "compatible", - "riscv,cpu-intc"); - qemu_fdt_setprop(fdt, intc_name, "interrupt-controller", NULL, 0); - qemu_fdt_setprop_cell(fdt, intc_name, "#interrupt-cells", 1); - - clint_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); - clint_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT); - clint_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); - clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); - - plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); - plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT); - plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); - plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT); - - core_name = g_strdup_printf("%s/core%d", clust_name, cpu); - qemu_fdt_add_subnode(fdt, core_name); - qemu_fdt_setprop_cell(fdt, core_name, "cpu", cpu_phandle); - - g_free(core_name); - g_free(intc_name); - g_free(cpu_name); + qemu_fdt_add_subnode(mc->fdt, clust_name); + + intc_phandles = g_new0(uint32_t, s->soc[socket].num_harts); + + create_fdt_socket_cpus(s, socket, clust_name, phandle, + is_32_bit, intc_phandles); + + create_fdt_socket_memory(s, memmap, socket); + + if (s->have_aclint) { + create_fdt_socket_aclint(s, memmap, socket, intc_phandles); + } else { + create_fdt_socket_clint(s, memmap, socket, intc_phandles); } - addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(mc, socket); - size = riscv_socket_mem_size(mc, socket); - mem_name = g_strdup_printf("/memory@%lx", (long)addr); - qemu_fdt_add_subnode(fdt, mem_name); - qemu_fdt_setprop_cells(fdt, mem_name, "reg", - addr >> 32, addr, size >> 32, size); - qemu_fdt_setprop_string(fdt, mem_name, "device_type", "memory"); - riscv_socket_fdt_write_id(mc, fdt, mem_name, socket); - g_free(mem_name); - - clint_addr = memmap[VIRT_CLINT].base + - (memmap[VIRT_CLINT].size * socket); - clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); - qemu_fdt_add_subnode(fdt, clint_name); - qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0"); - qemu_fdt_setprop_cells(fdt, clint_name, "reg", - 0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size); - qemu_fdt_setprop(fdt, clint_name, "interrupts-extended", - clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); - riscv_socket_fdt_write_id(mc, fdt, clint_name, socket); - g_free(clint_name); - - plic_phandle[socket] = phandle++; - plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket); - plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); - qemu_fdt_add_subnode(fdt, plic_name); - qemu_fdt_setprop_cell(fdt, plic_name, - "#address-cells", FDT_PLIC_ADDR_CELLS); - qemu_fdt_setprop_cell(fdt, plic_name, - "#interrupt-cells", FDT_PLIC_INT_CELLS); - qemu_fdt_setprop_string(fdt, plic_name, "compatible", "riscv,plic0"); - qemu_fdt_setprop(fdt, plic_name, "interrupt-controller", NULL, 0); - qemu_fdt_setprop(fdt, plic_name, "interrupts-extended", - plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); - qemu_fdt_setprop_cells(fdt, plic_name, "reg", - 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size); - qemu_fdt_setprop_cell(fdt, plic_name, "riscv,ndev", VIRTIO_NDEV); - riscv_socket_fdt_write_id(mc, fdt, plic_name, socket); - qemu_fdt_setprop_cell(fdt, plic_name, "phandle", plic_phandle[socket]); - g_free(plic_name); - - g_free(clint_cells); - g_free(plic_cells); + create_fdt_socket_plic(s, memmap, socket, phandle, + intc_phandles, xplic_phandles); + + g_free(intc_phandles); g_free(clust_name); } for (socket = 0; socket < riscv_socket_count(mc); socket++) { if (socket == 0) { - plic_mmio_phandle = plic_phandle[socket]; - plic_virtio_phandle = plic_phandle[socket]; - plic_pcie_phandle = plic_phandle[socket]; + *irq_mmio_phandle = xplic_phandles[socket]; + *irq_virtio_phandle = xplic_phandles[socket]; + *irq_pcie_phandle = xplic_phandles[socket]; } if (socket == 1) { - plic_virtio_phandle = plic_phandle[socket]; - plic_pcie_phandle = plic_phandle[socket]; + *irq_virtio_phandle = xplic_phandles[socket]; + *irq_pcie_phandle = xplic_phandles[socket]; } if (socket == 2) { - plic_pcie_phandle = plic_phandle[socket]; + *irq_pcie_phandle = xplic_phandles[socket]; } } - riscv_socket_fdt_write_distance_matrix(mc, fdt); + riscv_socket_fdt_write_distance_matrix(mc, mc->fdt); +} + +static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t irq_virtio_phandle) +{ + int i; + char *name; + MachineState *mc = MACHINE(s); for (i = 0; i < VIRTIO_COUNT; i++) { name = g_strdup_printf("/soc/virtio_mmio@%lx", (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size)); - qemu_fdt_add_subnode(fdt, name); - qemu_fdt_setprop_string(fdt, name, "compatible", "virtio,mmio"); - qemu_fdt_setprop_cells(fdt, name, "reg", + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", "virtio,mmio"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, 0x0, memmap[VIRT_VIRTIO].size); - qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", - plic_virtio_phandle); - qemu_fdt_setprop_cell(fdt, name, "interrupts", VIRTIO_IRQ + i); + qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", + irq_virtio_phandle); + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", VIRTIO_IRQ + i); g_free(name); } +} + +static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t irq_pcie_phandle) +{ + char *name; + MachineState *mc = MACHINE(s); name = g_strdup_printf("/soc/pci@%lx", (long) memmap[VIRT_PCIE_ECAM].base); - qemu_fdt_add_subnode(fdt, name); - qemu_fdt_setprop_cell(fdt, name, "#address-cells", FDT_PCI_ADDR_CELLS); - qemu_fdt_setprop_cell(fdt, name, "#interrupt-cells", FDT_PCI_INT_CELLS); - qemu_fdt_setprop_cell(fdt, name, "#size-cells", 0x2); - qemu_fdt_setprop_string(fdt, name, "compatible", "pci-host-ecam-generic"); - qemu_fdt_setprop_string(fdt, name, "device_type", "pci"); - qemu_fdt_setprop_cell(fdt, name, "linux,pci-domain", 0); - qemu_fdt_setprop_cells(fdt, name, "bus-range", 0, + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_cell(mc->fdt, name, "#address-cells", + FDT_PCI_ADDR_CELLS); + qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", + FDT_PCI_INT_CELLS); + qemu_fdt_setprop_cell(mc->fdt, name, "#size-cells", 0x2); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", + "pci-host-ecam-generic"); + qemu_fdt_setprop_string(mc->fdt, name, "device_type", "pci"); + qemu_fdt_setprop_cell(mc->fdt, name, "linux,pci-domain", 0); + qemu_fdt_setprop_cells(mc->fdt, name, "bus-range", 0, memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); - qemu_fdt_setprop(fdt, name, "dma-coherent", NULL, 0); - qemu_fdt_setprop_cells(fdt, name, "reg", 0, + qemu_fdt_setprop(mc->fdt, name, "dma-coherent", NULL, 0); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0, memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size); - qemu_fdt_setprop_sized_cells(fdt, name, "ranges", + qemu_fdt_setprop_sized_cells(mc->fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, 2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size, 1, FDT_PCI_RANGE_MMIO, @@ -390,65 +525,98 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size); - create_pcie_irq_map(fdt, name, plic_pcie_phandle); + create_pcie_irq_map(mc->fdt, name, irq_pcie_phandle); g_free(name); +} + +static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t *phandle) +{ + char *name; + uint32_t test_phandle; + MachineState *mc = MACHINE(s); - test_phandle = phandle++; + test_phandle = (*phandle)++; name = g_strdup_printf("/soc/test@%lx", (long)memmap[VIRT_TEST].base); - qemu_fdt_add_subnode(fdt, name); + qemu_fdt_add_subnode(mc->fdt, name); { - const char compat[] = "sifive,test1\0sifive,test0\0syscon"; - qemu_fdt_setprop(fdt, name, "compatible", compat, sizeof(compat)); + static const char * const compat[3] = { + "sifive,test1", "sifive,test0", "syscon" + }; + qemu_fdt_setprop_string_array(mc->fdt, name, "compatible", + (char **)&compat, ARRAY_SIZE(compat)); } - qemu_fdt_setprop_cells(fdt, name, "reg", - 0x0, memmap[VIRT_TEST].base, - 0x0, memmap[VIRT_TEST].size); - qemu_fdt_setprop_cell(fdt, name, "phandle", test_phandle); - test_phandle = qemu_fdt_get_phandle(fdt, name); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, memmap[VIRT_TEST].base, 0x0, memmap[VIRT_TEST].size); + qemu_fdt_setprop_cell(mc->fdt, name, "phandle", test_phandle); + test_phandle = qemu_fdt_get_phandle(mc->fdt, name); g_free(name); name = g_strdup_printf("/soc/reboot"); - qemu_fdt_add_subnode(fdt, name); - qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-reboot"); - qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle); - qemu_fdt_setprop_cell(fdt, name, "offset", 0x0); - qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_RESET); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", "syscon-reboot"); + qemu_fdt_setprop_cell(mc->fdt, name, "regmap", test_phandle); + qemu_fdt_setprop_cell(mc->fdt, name, "offset", 0x0); + qemu_fdt_setprop_cell(mc->fdt, name, "value", FINISHER_RESET); g_free(name); name = g_strdup_printf("/soc/poweroff"); - qemu_fdt_add_subnode(fdt, name); - qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-poweroff"); - qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle); - qemu_fdt_setprop_cell(fdt, name, "offset", 0x0); - qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_PASS); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", "syscon-poweroff"); + qemu_fdt_setprop_cell(mc->fdt, name, "regmap", test_phandle); + qemu_fdt_setprop_cell(mc->fdt, name, "offset", 0x0); + qemu_fdt_setprop_cell(mc->fdt, name, "value", FINISHER_PASS); g_free(name); +} + +static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t irq_mmio_phandle) +{ + char *name; + MachineState *mc = MACHINE(s); name = g_strdup_printf("/soc/uart@%lx", (long)memmap[VIRT_UART0].base); - qemu_fdt_add_subnode(fdt, name); - qemu_fdt_setprop_string(fdt, name, "compatible", "ns16550a"); - qemu_fdt_setprop_cells(fdt, name, "reg", + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", "ns16550a"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0x0, memmap[VIRT_UART0].base, 0x0, memmap[VIRT_UART0].size); - qemu_fdt_setprop_cell(fdt, name, "clock-frequency", 3686400); - qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle); - qemu_fdt_setprop_cell(fdt, name, "interrupts", UART0_IRQ); + qemu_fdt_setprop_cell(mc->fdt, name, "clock-frequency", 3686400); + qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_mmio_phandle); + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ); - qemu_fdt_add_subnode(fdt, "/chosen"); - qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", name); + qemu_fdt_add_subnode(mc->fdt, "/chosen"); + qemu_fdt_setprop_string(mc->fdt, "/chosen", "stdout-path", name); g_free(name); +} + +static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t irq_mmio_phandle) +{ + char *name; + MachineState *mc = MACHINE(s); name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base); - qemu_fdt_add_subnode(fdt, name); - qemu_fdt_setprop_string(fdt, name, "compatible", "google,goldfish-rtc"); - qemu_fdt_setprop_cells(fdt, name, "reg", - 0x0, memmap[VIRT_RTC].base, - 0x0, memmap[VIRT_RTC].size); - qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle); - qemu_fdt_setprop_cell(fdt, name, "interrupts", RTC_IRQ); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", + "google,goldfish-rtc"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size); + qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", + irq_mmio_phandle); + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ); g_free(name); +} + +static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) +{ + char *name; + MachineState *mc = MACHINE(s); + hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; + hwaddr flashbase = virt_memmap[VIRT_FLASH].base; - name = g_strdup_printf("/soc/flash@%" PRIx64, flashbase); + name = g_strdup_printf("/flash@%" PRIx64, flashbase); qemu_fdt_add_subnode(mc->fdt, name); qemu_fdt_setprop_string(mc->fdt, name, "compatible", "cfi-flash"); qemu_fdt_setprop_sized_cells(mc->fdt, name, "reg", @@ -456,10 +624,59 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, 2, flashbase + flashsize, 2, flashsize); qemu_fdt_setprop_cell(mc->fdt, name, "bank-width", 4); g_free(name); +} + +static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, + uint64_t mem_size, const char *cmdline, bool is_32_bit) +{ + MachineState *mc = MACHINE(s); + uint32_t phandle = 1, irq_mmio_phandle = 1; + uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1; + + if (mc->dtb) { + mc->fdt = load_device_tree(mc->dtb, &s->fdt_size); + if (!mc->fdt) { + error_report("load_device_tree() failed"); + exit(1); + } + goto update_bootargs; + } else { + mc->fdt = create_device_tree(&s->fdt_size); + if (!mc->fdt) { + error_report("create_device_tree() failed"); + exit(1); + } + } + + qemu_fdt_setprop_string(mc->fdt, "/", "model", "riscv-virtio,qemu"); + qemu_fdt_setprop_string(mc->fdt, "/", "compatible", "riscv-virtio"); + qemu_fdt_setprop_cell(mc->fdt, "/", "#size-cells", 0x2); + qemu_fdt_setprop_cell(mc->fdt, "/", "#address-cells", 0x2); + + qemu_fdt_add_subnode(mc->fdt, "/soc"); + qemu_fdt_setprop(mc->fdt, "/soc", "ranges", NULL, 0); + qemu_fdt_setprop_string(mc->fdt, "/soc", "compatible", "simple-bus"); + qemu_fdt_setprop_cell(mc->fdt, "/soc", "#size-cells", 0x2); + qemu_fdt_setprop_cell(mc->fdt, "/soc", "#address-cells", 0x2); + + create_fdt_sockets(s, memmap, is_32_bit, &phandle, + &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle); + + create_fdt_virtio(s, memmap, irq_virtio_phandle); + + create_fdt_pcie(s, memmap, irq_pcie_phandle); + + create_fdt_reset(s, memmap, &phandle); + + create_fdt_uart(s, memmap, irq_mmio_phandle); + + create_fdt_rtc(s, memmap, irq_mmio_phandle); + + create_fdt_flash(s, memmap); update_bootargs: if (cmdline) { - qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline); + qemu_fdt_setprop_string(mc->fdt, "/chosen", "bootargs", cmdline); } } @@ -539,16 +756,14 @@ static void virt_machine_init(MachineState *machine) const MemMapEntry *memmap = virt_memmap; RISCVVirtState *s = RISCV_VIRT_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); - MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); char *plic_hart_config, *soc_name; - size_t plic_hart_config_len; target_ulong start_addr = memmap[VIRT_DRAM].base; target_ulong firmware_end_addr, kernel_start_addr; uint32_t fdt_load_addr; uint64_t kernel_entry; DeviceState *mmio_plic, *virtio_plic, *pcie_plic; - int i, j, base_hartid, hart_count; + int i, base_hartid, hart_count; /* Check socket count limit */ if (VIRT_SOCKETS_MAX < riscv_socket_count(machine)) { @@ -590,29 +805,31 @@ static void virt_machine_init(MachineState *machine) sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort); /* Per-socket CLINT */ - sifive_clint_create( + riscv_aclint_swi_create( memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, - memmap[VIRT_CLINT].size, base_hartid, hart_count, - SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, - SIFIVE_CLINT_TIMEBASE_FREQ, true); + base_hartid, hart_count, false); + riscv_aclint_mtimer_create( + memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); + + /* Per-socket ACLINT SSWI */ + if (s->have_aclint) { + riscv_aclint_swi_create( + memmap[VIRT_ACLINT_SSWI].base + + i * memmap[VIRT_ACLINT_SSWI].size, + base_hartid, hart_count, true); + } /* Per-socket PLIC hart topology configuration string */ - plic_hart_config_len = - (strlen(VIRT_PLIC_HART_CONFIG) + 1) * hart_count; - plic_hart_config = g_malloc0(plic_hart_config_len); - for (j = 0; j < hart_count; j++) { - if (j != 0) { - strncat(plic_hart_config, ",", plic_hart_config_len); - } - strncat(plic_hart_config, VIRT_PLIC_HART_CONFIG, - plic_hart_config_len); - plic_hart_config_len -= (strlen(VIRT_PLIC_HART_CONFIG) + 1); - } + plic_hart_config = riscv_plic_hart_config_string(hart_count); /* Per-socket PLIC */ s->plic[i] = sifive_plic_create( memmap[VIRT_PLIC].base + i * memmap[VIRT_PLIC].size, - plic_hart_config, base_hartid, + plic_hart_config, hart_count, base_hartid, VIRT_PLIC_NUM_SOURCES, VIRT_PLIC_NUM_PRIORITIES, VIRT_PLIC_PRIORITY_BASE, @@ -657,13 +874,8 @@ static void virt_machine_init(MachineState *machine) } /* register system main memory (actual RAM) */ - memory_region_init_ram(main_mem, NULL, "riscv_virt_board.ram", - machine->ram_size, &error_fatal); memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base, - main_mem); -#ifdef TARGET_CHERI - cheri_tag_init(main_mem, machine->ram_size); -#endif + machine->ram); /* create device tree */ create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline, @@ -682,7 +894,7 @@ static void virt_machine_init(MachineState *machine) /* Use a purecap BBL as the BIOS for CHERI. */ "bbl-riscv32cheri-virt-fw_jump.bin", #else - "opensbi-riscv32-generic-fw_dynamic.bin", + RISCV32_BIOS_BIN, #endif start_addr, NULL); } else { @@ -692,7 +904,7 @@ static void virt_machine_init(MachineState *machine) /* Use a purecap BBL as the BIOS for CHERI. */ "bbl-riscv64cheri-virt-fw_jump.bin", #else - "opensbi-riscv64-generic-fw_dynamic.bin", + RISCV64_BIOS_BIN, #endif start_addr, NULL); } @@ -787,6 +999,22 @@ static void virt_machine_instance_init(Object *obj) { } +static bool virt_get_aclint(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + RISCVVirtState *s = RISCV_VIRT_MACHINE(ms); + + return s->have_aclint; +} + +static void virt_set_aclint(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + RISCVVirtState *s = RISCV_VIRT_MACHINE(ms); + + s->have_aclint = value; +} + static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -800,8 +1028,15 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; mc->numa_mem_supported = true; + mc->default_ram_id = "riscv_virt_board.ram"; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); + + object_class_property_add_bool(oc, "aclint", virt_get_aclint, + virt_set_aclint); + object_class_property_set_description(oc, "aclint", + "Set on/off to enable/disable " + "emulating ACLINT devices"); } static const TypeInfo virt_machine_typeinfo = { diff --git a/hw/rtc/m48t59.c b/hw/rtc/m48t59.c index d54929e8612..690f4e071a1 100644 --- a/hw/rtc/m48t59.c +++ b/hw/rtc/m48t59.c @@ -32,7 +32,6 @@ #include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "hw/sysbus.h" -#include "exec/address-spaces.h" #include "qapi/error.h" #include "qemu/bcd.h" #include "qemu/module.h" diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c index 5d0fcacd0c0..4fbafddb226 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -42,7 +42,6 @@ #include "qapi/error.h" #include "qapi/qapi-events-misc-target.h" #include "qapi/visitor.h" -#include "exec/address-spaces.h" #include "hw/rtc/mc146818rtc_regs.h" #ifdef TARGET_I386 @@ -872,22 +871,6 @@ static void rtc_notify_suspend(Notifier *notifier, void *data) rtc_set_memory(ISA_DEVICE(s), 0xF, 0xFE); } -static void rtc_reset(void *opaque) -{ - RTCState *s = opaque; - - s->cmos_data[RTC_REG_B] &= ~(REG_B_PIE | REG_B_AIE | REG_B_SQWE); - s->cmos_data[RTC_REG_C] &= ~(REG_C_UF | REG_C_IRQF | REG_C_PF | REG_C_AF); - check_update_timer(s); - - qemu_irq_lower(s->irq); - - if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { - s->irq_coalesced = 0; - s->irq_reinject_on_ack_count = 0; - } -} - static const MemoryRegionOps cmos_ops = { .read = cmos_ioport_read, .write = cmos_ioport_write, @@ -962,7 +945,6 @@ static void rtc_realizefn(DeviceState *dev, Error **errp) memory_region_add_coalescing(&s->coalesced_io, 0, 1); qdev_set_legacy_instance_id(dev, RTC_ISA_BASE, 3); - qemu_register_reset(rtc_reset, s); object_property_add_tm(OBJECT(s), "date", rtc_get_date); @@ -998,15 +980,32 @@ static Property mc146818rtc_properties[] = { DEFINE_PROP_END_OF_LIST(), }; -static void rtc_resetdev(DeviceState *d) +static void rtc_reset_enter(Object *obj, ResetType type) { - RTCState *s = MC146818_RTC(d); + RTCState *s = MC146818_RTC(obj); /* Reason: VM do suspend self will set 0xfe * Reset any values other than 0xfe(Guest suspend case) */ if (s->cmos_data[0x0f] != 0xfe) { s->cmos_data[0x0f] = 0x00; } + + s->cmos_data[RTC_REG_B] &= ~(REG_B_PIE | REG_B_AIE | REG_B_SQWE); + s->cmos_data[RTC_REG_C] &= ~(REG_C_UF | REG_C_IRQF | REG_C_PF | REG_C_AF); + check_update_timer(s); + + + if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { + s->irq_coalesced = 0; + s->irq_reinject_on_ack_count = 0; + } +} + +static void rtc_reset_hold(Object *obj) +{ + RTCState *s = MC146818_RTC(obj); + + qemu_irq_lower(s->irq); } static void rtc_build_aml(ISADevice *isadev, Aml *scope) @@ -1033,13 +1032,16 @@ static void rtc_build_aml(ISADevice *isadev, Aml *scope) static void rtc_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); dc->realize = rtc_realizefn; - dc->reset = rtc_resetdev; dc->vmsd = &vmstate_rtc; + rc->phases.enter = rtc_reset_enter; + rc->phases.hold = rtc_reset_hold; isa->build_aml = rtc_build_aml; device_class_set_props(dc, mc146818rtc_properties); + set_bit(DEVICE_CATEGORY_MISC, dc->categories); } static const TypeInfo mc146818rtc_info = { diff --git a/hw/rtc/meson.build b/hw/rtc/meson.build index 7cecdee5ddb..8fd8d8f9a71 100644 --- a/hw/rtc/meson.build +++ b/hw/rtc/meson.build @@ -2,7 +2,7 @@ softmmu_ss.add(when: 'CONFIG_DS1338', if_true: files('ds1338.c')) softmmu_ss.add(when: 'CONFIG_M41T80', if_true: files('m41t80.c')) softmmu_ss.add(when: 'CONFIG_M48T59', if_true: files('m48t59.c')) -softmmu_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c')) +specific_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c')) softmmu_ss.add(when: 'CONFIG_TWL92230', if_true: files('twl92230.c')) softmmu_ss.add(when: ['CONFIG_ISA_BUS', 'CONFIG_M48T59'], if_true: files('m48t59-isa.c')) softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP', if_true: files('xlnx-zynqmp-rtc.c')) diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c index 2bbb2062ac8..e7ced90b025 100644 --- a/hw/rtc/pl031.c +++ b/hw/rtc/pl031.c @@ -24,6 +24,7 @@ #include "qemu/log.h" #include "qemu/module.h" #include "trace.h" +#include "qapi/qapi-events-misc-target.h" #define RTC_DR 0x00 /* Data read register */ #define RTC_MR 0x04 /* Match register */ @@ -136,10 +137,17 @@ static void pl031_write(void * opaque, hwaddr offset, trace_pl031_write(offset, value); switch (offset) { - case RTC_LR: + case RTC_LR: { + struct tm tm; + s->tick_offset += value - pl031_get_count(s); + + qemu_get_timedate(&tm, s->tick_offset); + qapi_event_send_rtc_change(qemu_timedate_diff(&tm)); + pl031_set_alarm(s); break; + } case RTC_MR: s->mr = value; pl031_set_alarm(s); diff --git a/hw/rtc/trace-events b/hw/rtc/trace-events index 8bdcf742640..ebb311a5b0e 100644 --- a/hw/rtc/trace-events +++ b/hw/rtc/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # allwinner-rtc.c allwinner_rtc_read(uint64_t addr, uint64_t value) "addr 0x%" PRIx64 " value 0x%" PRIx64 diff --git a/hw/rx/rx-gdbsim.c b/hw/rx/rx-gdbsim.c index b1d7c2488ff..75d1fec6ca4 100644 --- a/hw/rx/rx-gdbsim.c +++ b/hw/rx/rx-gdbsim.c @@ -21,12 +21,8 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "qemu-common.h" -#include "cpu.h" -#include "hw/hw.h" -#include "hw/sysbus.h" #include "hw/loader.h" #include "hw/rx/rx62n.h" -#include "sysemu/sysemu.h" #include "sysemu/qtest.h" #include "sysemu/device_tree.h" #include "hw/boards.h" @@ -93,6 +89,7 @@ static void rx_gdbsim_init(MachineState *machine) char *sz = size_to_str(mc->default_ram_size); error_report("Invalid RAM size, should be more than %s", sz); g_free(sz); + exit(1); } /* Allocate memory space */ diff --git a/hw/rx/rx62n.c b/hw/rx/rx62n.c index 9c34ce14de6..fa5add9f9db 100644 --- a/hw/rx/rx62n.c +++ b/hw/rx/rx62n.c @@ -23,13 +23,11 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "hw/hw.h" #include "hw/rx/rx62n.h" #include "hw/loader.h" #include "hw/sysbus.h" #include "hw/qdev-properties.h" #include "sysemu/sysemu.h" -#include "cpu.h" #include "qom/object.h" /* diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c index f3e7342b1e8..69e6783ade5 100644 --- a/hw/s390x/3270-ccw.c +++ b/hw/s390x/3270-ccw.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/module.h" -#include "cpu.h" #include "hw/s390x/css.h" #include "hw/s390x/css-bridge.h" #include "hw/qdev-properties.h" @@ -130,6 +129,7 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp) EMULATED_CCW_3270_CHPID_TYPE); sch->do_subchannel_work = do_subchannel_work_virtual; sch->ccw_cb = emulated_ccw_3270_cb; + sch->irb_cb = build_irb_virtual; ck->init(dev, &err); if (err) { @@ -159,7 +159,6 @@ static void emulated_ccw_3270_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); device_class_set_props(dc, emulated_ccw_3270_properties); - dc->bus_type = TYPE_VIRTUAL_CSS_BUS; dc->realize = emulated_ccw_3270_realize; dc->hotpluggable = false; set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); diff --git a/hw/s390x/ap-bridge.c b/hw/s390x/ap-bridge.c index 8bcf8ece9dd..ef8fa2b15be 100644 --- a/hw/s390x/ap-bridge.c +++ b/hw/s390x/ap-bridge.c @@ -55,7 +55,7 @@ void s390_init_ap(void) sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); /* Create bus on bridge device */ - bus = qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS); + bus = qbus_new(TYPE_AP_BUS, dev, TYPE_AP_BUS); /* Enable hotplugging */ qbus_set_hotplug_handler(bus, OBJECT(dev)); diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c index c9707110e9c..95f269ab441 100644 --- a/hw/s390x/ccw-device.c +++ b/hw/s390x/ccw-device.c @@ -59,6 +59,7 @@ static void ccw_device_class_init(ObjectClass *klass, void *data) k->refill_ids = ccw_device_refill_ids; device_class_set_props(dc, ccw_device_properties); dc->reset = ccw_device_reset; + dc->bus_type = TYPE_VIRTUAL_CSS_BUS; } const VMStateDescription vmstate_ccw_dev = { diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h index 832c78cd421..6dff95225df 100644 --- a/hw/s390x/ccw-device.h +++ b/hw/s390x/ccw-device.h @@ -14,6 +14,7 @@ #include "qom/object.h" #include "hw/qdev-core.h" #include "hw/s390x/css.h" +#include "hw/s390x/css-bridge.h" struct CcwDevice { DeviceState parent_obj; diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index 9d793d671e1..4017081d49c 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -20,7 +20,6 @@ #include "hw/s390x/css.h" #include "ccw-device.h" #include "hw/s390x/css-bridge.h" -#include "cpu.h" /* * Invoke device-specific unplug handler, disable the subchannel @@ -107,7 +106,7 @@ VirtualCssBus *virtual_css_bus_init(void) sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); /* Create bus on bridge device */ - bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css"); + bus = qbus_new(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css"); cbus = VIRTUAL_CSS_BUS(bus); /* Enable hotplugging */ diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 4149b8e5a79..7d9523f8113 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -15,7 +15,6 @@ #include "qemu/bitops.h" #include "qemu/error-report.h" #include "exec/address-spaces.h" -#include "cpu.h" #include "hw/s390x/ioinst.h" #include "hw/qdev-properties.h" #include "hw/s390x/css.h" @@ -1207,23 +1206,53 @@ static void sch_handle_start_func_virtual(SubchDev *sch) } -static void sch_handle_halt_func_passthrough(SubchDev *sch) +static IOInstEnding sch_handle_halt_func_passthrough(SubchDev *sch) { int ret; ret = s390_ccw_halt(sch); if (ret == -ENOSYS) { sch_handle_halt_func(sch); + return IOINST_CC_EXPECTED; + } + /* + * Some conditions may have been detected prior to starting the halt + * function; map them to the correct cc. + * Note that we map both -ENODEV and -EACCES to cc 3 (there's not really + * anything else we can do.) + */ + switch (ret) { + case -EBUSY: + return IOINST_CC_BUSY; + case -ENODEV: + case -EACCES: + return IOINST_CC_NOT_OPERATIONAL; + default: + return IOINST_CC_EXPECTED; } } -static void sch_handle_clear_func_passthrough(SubchDev *sch) +static IOInstEnding sch_handle_clear_func_passthrough(SubchDev *sch) { int ret; ret = s390_ccw_clear(sch); if (ret == -ENOSYS) { sch_handle_clear_func(sch); + return IOINST_CC_EXPECTED; + } + /* + * Some conditions may have been detected prior to starting the clear + * function; map them to the correct cc. + * Note that we map both -ENODEV and -EACCES to cc 3 (there's not really + * anything else we can do.) + */ + switch (ret) { + case -ENODEV: + case -EACCES: + return IOINST_CC_NOT_OPERATIONAL; + default: + return IOINST_CC_EXPECTED; } } @@ -1266,9 +1295,9 @@ IOInstEnding do_subchannel_work_passthrough(SubchDev *sch) SCHIB *schib = &sch->curr_status; if (schib->scsw.ctrl & SCSW_FCTL_CLEAR_FUNC) { - sch_handle_clear_func_passthrough(sch); + return sch_handle_clear_func_passthrough(sch); } else if (schib->scsw.ctrl & SCSW_FCTL_HALT_FUNC) { - sch_handle_halt_func_passthrough(sch); + return sch_handle_halt_func_passthrough(sch); } else if (schib->scsw.ctrl & SCSW_FCTL_START_FUNC) { return sch_handle_start_func_passthrough(sch); } @@ -1336,6 +1365,14 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) } } +void copy_esw_to_guest(ESW *dest, const ESW *src) +{ + dest->word0 = cpu_to_be32(src->word0); + dest->erw = cpu_to_be32(src->erw); + dest->word2 = cpu_to_be64(src->word2); + dest->word4 = cpu_to_be32(src->word4); +} + IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib) { int ret; @@ -1605,9 +1642,8 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw, copy_scsw_to_guest(&dest->scsw, &src->scsw); - for (i = 0; i < ARRAY_SIZE(dest->esw); i++) { - dest->esw[i] = cpu_to_be32(src->esw[i]); - } + copy_esw_to_guest(&dest->esw, &src->esw); + for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) { dest->ecw[i] = cpu_to_be32(src->ecw[i]); } @@ -1633,6 +1669,55 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw, *irb_len = sizeof(*dest); } +static void build_irb_sense_data(SubchDev *sch, IRB *irb) +{ + int i; + + /* Attention: sense_data is already BE! */ + memcpy(irb->ecw, sch->sense_data, sizeof(sch->sense_data)); + for (i = 0; i < ARRAY_SIZE(irb->ecw); i++) { + irb->ecw[i] = be32_to_cpu(irb->ecw[i]); + } +} + +void build_irb_passthrough(SubchDev *sch, IRB *irb) +{ + /* Copy ESW from hardware */ + irb->esw = sch->esw; + + /* + * If (irb->esw.erw & ESW_ERW_SENSE) is true, then the contents + * of the ECW is sense data. If false, then it is model-dependent + * information. Either way, copy it into the IRB for the guest to + * read/decide what to do with. + */ + build_irb_sense_data(sch, irb); +} + +void build_irb_virtual(SubchDev *sch, IRB *irb) +{ + SCHIB *schib = &sch->curr_status; + uint16_t stctl = schib->scsw.ctrl & SCSW_CTRL_MASK_STCTL; + + if (stctl & SCSW_STCTL_STATUS_PEND) { + if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK | + SCSW_CSTAT_CHN_CTRL_CHK | + SCSW_CSTAT_INTF_CTRL_CHK)) { + irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF; + irb->esw.word0 = 0x04804000; + } else { + irb->esw.word0 = 0x00800000; + } + /* If a unit check is pending, copy sense data. */ + if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && + (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { + irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; + build_irb_sense_data(sch, irb); + irb->esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); + } + } +} + int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) { SCHIB *schib = &sch->curr_status; @@ -1651,29 +1736,12 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) /* Copy scsw from current status. */ irb.scsw = schib->scsw; - if (stctl & SCSW_STCTL_STATUS_PEND) { - if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK | - SCSW_CSTAT_CHN_CTRL_CHK | - SCSW_CSTAT_INTF_CTRL_CHK)) { - irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF; - irb.esw[0] = 0x04804000; - } else { - irb.esw[0] = 0x00800000; - } - /* If a unit check is pending, copy sense data. */ - if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && - (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { - int i; - irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; - /* Attention: sense_data is already BE! */ - memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data)); - for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) { - irb.ecw[i] = be32_to_cpu(irb.ecw[i]); - } - irb.esw[1] = 0x01000000 | (sizeof(sch->sense_data) << 8); - } + /* Build other IRB data, if necessary */ + if (sch->irb_cb) { + sch->irb_cb(sch, &irb); } + /* Store the irb to the guest. */ p = schib->pmcw; copy_irb_to_guest(target_irb, &irb, &p, irb_len); diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c index ed92ce510d9..6fa47b889ca 100644 --- a/hw/s390x/event-facility.c +++ b/hw/s390x/event-facility.c @@ -427,8 +427,8 @@ static void init_event_facility(Object *obj) sclp_event_set_allow_all_mask_sizes); /* Spawn a new bus for SCLP events */ - qbus_create_inplace(&event_facility->sbus, sizeof(event_facility->sbus), - TYPE_SCLP_EVENTS_BUS, sdev, NULL); + qbus_init(&event_facility->sbus, sizeof(event_facility->sbus), + TYPE_SCLP_EVENTS_BUS, sdev, NULL); object_initialize_child(obj, TYPE_SCLP_QUIESCE, &event_facility->quiesce, diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index ff6b55e8167..7ddca0127fc 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -18,9 +18,7 @@ #include "qapi/error.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" -#include "sysemu/sysemu.h" #include "sysemu/tcg.h" -#include "cpu.h" #include "elf.h" #include "hw/loader.h" #include "hw/qdev-properties.h" @@ -40,6 +38,7 @@ #define KERN_IMAGE_START 0x010000UL #define LINUX_MAGIC_ADDR 0x010008UL #define KERN_PARM_AREA 0x010480UL +#define KERN_PARM_AREA_SIZE 0x000380UL #define INITRD_START 0x800000UL #define INITRD_PARM_START 0x010408UL #define PARMFILE_START 0x001000UL @@ -192,10 +191,19 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp) * loader) and it won't work. For this case we force it to 0x10000, too. */ if (pentry == KERN_IMAGE_START || pentry == 0x800) { - char *parm_area = rom_ptr(KERN_PARM_AREA, strlen(ipl->cmdline) + 1); + size_t cmdline_size = strlen(ipl->cmdline) + 1; + char *parm_area = rom_ptr(KERN_PARM_AREA, cmdline_size); + ipl->start_addr = KERN_IMAGE_START; /* Overwrite parameters in the kernel image, which are "rom" */ if (parm_area) { + if (cmdline_size > KERN_PARM_AREA_SIZE) { + error_setg(errp, + "kernel command line exceeds maximum size: %zu > %lu", + cmdline_size, KERN_PARM_AREA_SIZE); + return; + } + strcpy(parm_area, ipl->cmdline); } } else { @@ -713,7 +721,6 @@ int s390_ipl_pv_unpack(void) void s390_ipl_prepare_cpu(S390CPU *cpu) { S390IPLState *ipl = get_ipl_device(); - Error *err = NULL; cpu->env.psw.addr = ipl->start_addr; cpu->env.psw.mask = IPL_PSW_MASK; @@ -725,10 +732,7 @@ void s390_ipl_prepare_cpu(S390CPU *cpu) } } if (ipl->netboot) { - if (load_netboot_image(&err) < 0) { - error_report_err(err); - exit(1); - } + load_netboot_image(&error_fatal); ipl->qipl.netboot_start_addr = cpu_to_be64(ipl->start_addr); } s390_ipl_set_boot_menu(ipl); diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build index 327e9c93afa..28484256ec0 100644 --- a/hw/s390x/meson.build +++ b/hw/s390x/meson.build @@ -16,7 +16,6 @@ s390x_ss.add(files( 'sclp.c', 'sclpcpu.c', 'sclpquiesce.c', - 'tod-qemu.c', 'tod.c', )) s390x_ss.add(when: 'CONFIG_KVM', if_true: files( @@ -25,6 +24,9 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( 's390-stattrib-kvm.c', 'pv.c', )) +s390x_ss.add(when: 'CONFIG_TCG', if_true: files( + 'tod-tcg.c', +)) s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files('s390-virtio-ccw.c')) s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c')) s390x_ss.add(when: 'CONFIG_VFIO', if_true: files('s390-pci-vfio.c')) diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c index 93eccfc05d5..401b63d6cb6 100644 --- a/hw/s390x/pv.c +++ b/hw/s390x/pv.c @@ -13,7 +13,6 @@ #include -#include "cpu.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/kvm.h" diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c index b497571863f..2fc8bb9c232 100644 --- a/hw/s390x/s390-ccw.c +++ b/hw/s390x/s390-ccw.c @@ -15,7 +15,6 @@ #include #include "qapi/error.h" #include "qemu/module.h" -#include "hw/sysbus.h" #include "hw/s390x/css.h" #include "hw/s390x/css-bridge.h" #include "hw/s390x/s390-ccw.h" @@ -125,6 +124,7 @@ static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) } sch->driver_data = cdev; sch->do_subchannel_work = do_subchannel_work_passthrough; + sch->irb_cb = build_irb_passthrough; ccw_dev->sch = sch; ret = css_sch_build_schib(sch, &cdev->hostid); @@ -177,10 +177,8 @@ static void s390_ccw_instance_init(Object *obj) static void s390_ccw_class_init(ObjectClass *klass, void *data) { - DeviceClass *dc = DEVICE_CLASS(klass); S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); - dc->bus_type = TYPE_VIRTUAL_CSS_BUS; cdc->realize = s390_ccw_realize; cdc->unrealize = s390_ccw_unrealize; } diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index dd138dae94a..1b51a728385 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/visitor.h" -#include "cpu.h" #include "hw/s390x/s390-pci-bus.h" #include "hw/s390x/s390-pci-inst.h" #include "hw/s390x/s390-pci-vfio.h" @@ -331,7 +330,7 @@ static unsigned int calc_sx(dma_addr_t ptr) static unsigned int calc_px(dma_addr_t ptr) { - return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; + return ((unsigned long) ptr >> TARGET_PAGE_BITS) & ZPCI_PT_MASK; } static uint64_t get_rt_sto(uint64_t entry) @@ -507,7 +506,7 @@ uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr, int8_t ett = 1; uint16_t error = 0; - entry->iova = addr & PAGE_MASK; + entry->iova = addr & TARGET_PAGE_MASK; entry->translated_addr = 0; entry->perm = IOMMU_RW; @@ -527,7 +526,7 @@ static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr, { S390PCIIOMMU *iommu = container_of(mr, S390PCIIOMMU, iommu_mr); S390IOTLBEntry *entry; - uint64_t iova = addr & PAGE_MASK; + uint64_t iova = addr & TARGET_PAGE_MASK; uint16_t error = 0; IOMMUTLBEntry ret = { .target_as = &address_space_memory, @@ -563,7 +562,7 @@ static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr, ret.perm = entry->perm; } else { ret.iova = iova; - ret.addr_mask = ~PAGE_MASK; + ret.addr_mask = ~TARGET_PAGE_MASK; ret.perm = IOMMU_NONE; } @@ -814,7 +813,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) qbus_set_hotplug_handler(bus, OBJECT(dev)); phb->bus = b; - s->bus = S390_PCI_BUS(qbus_create(TYPE_S390_PCI_BUS, dev, NULL)); + s->bus = S390_PCI_BUS(qbus_new(TYPE_S390_PCI_BUS, dev, NULL)); qbus_set_hotplug_handler(BUS(s->bus), OBJECT(dev)); s->iommu_table = g_hash_table_new_full(g_int64_hash, g_int64_equal, @@ -869,7 +868,7 @@ static int s390_pci_msix_init(S390PCIBusDevice *pbdev) name = g_strdup_printf("msix-s390-%04x", pbdev->uid); memory_region_init_io(&pbdev->msix_notify_mr, OBJECT(pbdev), - &s390_msi_ctrl_ops, pbdev, name, PAGE_SIZE); + &s390_msi_ctrl_ops, pbdev, name, TARGET_PAGE_SIZE); memory_region_add_subregion(&pbdev->iommu->mr, pbdev->pci_group->zpci_group.msia, &pbdev->msix_notify_mr); @@ -1164,8 +1163,7 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, } /* Assign numbers to all child bridges. The last is the highest number. */ - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - s390_pci_enumerate_bridge, s); + pci_for_each_device_under_bus(sec_bus, s390_pci_enumerate_bridge, s); pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1); } @@ -1194,7 +1192,7 @@ static void s390_pcihost_reset(DeviceState *dev) * on every system reset, we also have to reassign numbers. */ s->bus_no = 0; - pci_for_each_device(bus, pci_bus_num(bus), s390_pci_enumerate_bridge, s); + pci_for_each_device_under_bus(bus, s390_pci_enumerate_bridge, s); } static void s390_pcihost_class_init(ObjectClass *klass, void *data) diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 4b8326afa4f..1c8ad91175b 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -12,7 +12,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "exec/memop.h" #include "exec/memory-internal.h" #include "qemu/error-report.h" @@ -614,7 +613,7 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, .iova = entry->iova, .translated_addr = entry->translated_addr, .perm = entry->perm, - .addr_mask = ~PAGE_MASK, + .addr_mask = ~TARGET_PAGE_MASK, }, }; @@ -641,7 +640,7 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, cache = g_new(S390IOTLBEntry, 1); cache->iova = entry->iova; cache->translated_addr = entry->translated_addr; - cache->len = PAGE_SIZE; + cache->len = TARGET_PAGE_SIZE; cache->perm = entry->perm; g_hash_table_replace(iommu->iotlb, &cache->iova, cache); dec_dma_avail(iommu); @@ -726,8 +725,8 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) while (entry.iova < start && entry.iova < end && (dma_avail > 0 || entry.perm == IOMMU_NONE)) { dma_avail = s390_pci_update_iotlb(iommu, &entry); - entry.iova += PAGE_SIZE; - entry.translated_addr += PAGE_SIZE; + entry.iova += TARGET_PAGE_SIZE; + entry.translated_addr += TARGET_PAGE_SIZE; } } err: diff --git a/hw/s390x/s390-skeys-kvm.c b/hw/s390x/s390-skeys-kvm.c index 1c4d805ad8f..3ff9d94b802 100644 --- a/hw/s390x/s390-skeys-kvm.c +++ b/hw/s390x/s390-skeys-kvm.c @@ -15,7 +15,7 @@ #include "qemu/error-report.h" #include "qemu/module.h" -static int kvm_s390_skeys_enabled(S390SKeysState *ss) +static bool kvm_s390_skeys_are_enabled(S390SKeysState *ss) { S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss); uint8_t single_key; @@ -57,7 +57,7 @@ static void kvm_s390_skeys_class_init(ObjectClass *oc, void *data) S390SKeysClass *skeyclass = S390_SKEYS_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - skeyclass->skeys_enabled = kvm_s390_skeys_enabled; + skeyclass->skeys_are_enabled = kvm_s390_skeys_are_enabled; skeyclass->get_skeys = kvm_s390_skeys_get; skeyclass->set_skeys = kvm_s390_skeys_set; diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index 9a8d60d1d9a..5024faf4113 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -17,6 +17,8 @@ #include "qapi/qapi-commands-misc-target.h" #include "qapi/qmp/qdict.h" #include "qemu/error-report.h" +#include "sysemu/memory_mapping.h" +#include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "migration/qemu-file-types.h" #include "migration/register.h" @@ -80,11 +82,18 @@ void hmp_info_skeys(Monitor *mon, const QDict *qdict) int r; /* Quick check to see if guest is using storage keys*/ - if (!skeyclass->skeys_enabled(ss)) { + if (!skeyclass->skeys_are_enabled(ss)) { monitor_printf(mon, "Error: This guest is not using storage keys\n"); return; } + if (!address_space_access_valid(&address_space_memory, + addr & TARGET_PAGE_MASK, TARGET_PAGE_SIZE, + false, MEMTXATTRS_UNSPECIFIED)) { + monitor_printf(mon, "Error: The given address is not valid\n"); + return; + } + r = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key); if (r < 0) { monitor_printf(mon, "Error: %s\n", strerror(-r)); @@ -109,18 +118,17 @@ void qmp_dump_skeys(const char *filename, Error **errp) { S390SKeysState *ss = s390_get_skeys_device(); S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss); - MachineState *ms = MACHINE(qdev_get_machine()); - const uint64_t total_count = ms->ram_size / TARGET_PAGE_SIZE; - uint64_t handled_count = 0, cur_count; + GuestPhysBlockList guest_phys_blocks; + GuestPhysBlock *block; + uint64_t pages, gfn; Error *lerr = NULL; - vaddr cur_gfn = 0; uint8_t *buf; int ret; int fd; FILE *f; /* Quick check to see if guest is using storage keys*/ - if (!skeyclass->skeys_enabled(ss)) { + if (!skeyclass->skeys_are_enabled(ss)) { error_setg(errp, "This guest is not using storage keys - " "nothing to dump"); return; @@ -144,53 +152,86 @@ void qmp_dump_skeys(const char *filename, Error **errp) goto out; } - /* we'll only dump initial memory for now */ - while (handled_count < total_count) { - /* Calculate how many keys to ask for & handle overflow case */ - cur_count = MIN(total_count - handled_count, S390_SKEYS_BUFFER_SIZE); + assert(qemu_mutex_iothread_locked()); + guest_phys_blocks_init(&guest_phys_blocks); + guest_phys_blocks_append(&guest_phys_blocks); - ret = skeyclass->get_skeys(ss, cur_gfn, cur_count, buf); - if (ret < 0) { - error_setg(errp, "get_keys error %d", ret); - goto out_free; - } + QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) { + assert(QEMU_IS_ALIGNED(block->target_start, TARGET_PAGE_SIZE)); + assert(QEMU_IS_ALIGNED(block->target_end, TARGET_PAGE_SIZE)); - /* write keys to stream */ - write_keys(f, buf, cur_gfn, cur_count, &lerr); - if (lerr) { - goto out_free; - } + gfn = block->target_start / TARGET_PAGE_SIZE; + pages = (block->target_end - block->target_start) / TARGET_PAGE_SIZE; + + while (pages) { + const uint64_t cur_pages = MIN(pages, S390_SKEYS_BUFFER_SIZE); - cur_gfn += cur_count; - handled_count += cur_count; + ret = skeyclass->get_skeys(ss, gfn, cur_pages, buf); + if (ret < 0) { + error_setg_errno(errp, -ret, "get_keys error"); + goto out_free; + } + + /* write keys to stream */ + write_keys(f, buf, gfn, cur_pages, &lerr); + if (lerr) { + goto out_free; + } + + gfn += cur_pages; + pages -= cur_pages; + } } out_free: + guest_phys_blocks_free(&guest_phys_blocks); error_propagate(errp, lerr); g_free(buf); out: fclose(f); } -static void qemu_s390_skeys_init(Object *obj) +static bool qemu_s390_skeys_are_enabled(S390SKeysState *ss) { - QEMUS390SKeysState *skeys = QEMU_S390_SKEYS(obj); - MachineState *machine = MACHINE(qdev_get_machine()); + QEMUS390SKeysState *skeys = QEMU_S390_SKEYS(ss); - skeys->key_count = machine->ram_size / TARGET_PAGE_SIZE; - skeys->keydata = g_malloc0(skeys->key_count); + /* Lockless check is sufficient. */ + return !!skeys->keydata; } -static int qemu_s390_skeys_enabled(S390SKeysState *ss) +static bool qemu_s390_enable_skeys(S390SKeysState *ss) { - return 1; + QEMUS390SKeysState *skeys = QEMU_S390_SKEYS(ss); + static gsize initialized; + + if (likely(skeys->keydata)) { + return true; + } + + /* + * TODO: Modern Linux doesn't use storage keys unless running KVM guests + * that use storage keys. Therefore, we keep it simple for now. + * + * 1) We should initialize to "referenced+changed" for an initial + * over-indication. Let's avoid touching megabytes of data for now and + * assume that any sane user will issue a storage key instruction before + * actually relying on this data. + * 2) Relying on ram_size and allocating a big array is ugly. We should + * allocate and manage storage key data per RAMBlock or optimally using + * some sparse data structure. + * 3) We only ever have a single S390SKeysState, so relying on + * g_once_init_enter() is good enough. + */ + if (g_once_init_enter(&initialized)) { + MachineState *machine = MACHINE(qdev_get_machine()); + + skeys->key_count = machine->ram_size / TARGET_PAGE_SIZE; + skeys->keydata = g_malloc0(skeys->key_count); + g_once_init_leave(&initialized, 1); + } + return false; } -/* - * TODO: for memory hotplug support qemu_s390_skeys_set and qemu_s390_skeys_get - * will have to make sure that the given gfn belongs to a memory region and not - * a memory hole. - */ static int qemu_s390_skeys_set(S390SKeysState *ss, uint64_t start_gfn, uint64_t count, uint8_t *keys) { @@ -198,9 +239,10 @@ static int qemu_s390_skeys_set(S390SKeysState *ss, uint64_t start_gfn, int i; /* Check for uint64 overflow and access beyond end of key data */ - if (start_gfn + count > skeydev->key_count || start_gfn + count < count) { - error_report("Error: Setting storage keys for page beyond the end " - "of memory: gfn=%" PRIx64 " count=%" PRId64, + if (unlikely(!skeydev->keydata || start_gfn + count > skeydev->key_count || + start_gfn + count < count)) { + error_report("Error: Setting storage keys for pages with unallocated " + "storage key memory: gfn=%" PRIx64 " count=%" PRId64, start_gfn, count); return -EINVAL; } @@ -218,9 +260,10 @@ static int qemu_s390_skeys_get(S390SKeysState *ss, uint64_t start_gfn, int i; /* Check for uint64 overflow and access beyond end of key data */ - if (start_gfn + count > skeydev->key_count || start_gfn + count < count) { - error_report("Error: Getting storage keys for page beyond the end " - "of memory: gfn=%" PRIx64 " count=%" PRId64, + if (unlikely(!skeydev->keydata || start_gfn + count > skeydev->key_count || + start_gfn + count < count)) { + error_report("Error: Getting storage keys for pages with unallocated " + "storage key memory: gfn=%" PRIx64 " count=%" PRId64, start_gfn, count); return -EINVAL; } @@ -236,7 +279,8 @@ static void qemu_s390_skeys_class_init(ObjectClass *oc, void *data) S390SKeysClass *skeyclass = S390_SKEYS_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - skeyclass->skeys_enabled = qemu_s390_skeys_enabled; + skeyclass->skeys_are_enabled = qemu_s390_skeys_are_enabled; + skeyclass->enable_skeys = qemu_s390_enable_skeys; skeyclass->get_skeys = qemu_s390_skeys_get; skeyclass->set_skeys = qemu_s390_skeys_set; @@ -247,7 +291,6 @@ static void qemu_s390_skeys_class_init(ObjectClass *oc, void *data) static const TypeInfo qemu_s390_skeys_info = { .name = TYPE_QEMU_S390_SKEYS, .parent = TYPE_S390_SKEYS, - .instance_init = qemu_s390_skeys_init, .instance_size = sizeof(QEMUS390SKeysState), .class_init = qemu_s390_skeys_class_init, .class_size = sizeof(S390SKeysClass), @@ -257,14 +300,13 @@ static void s390_storage_keys_save(QEMUFile *f, void *opaque) { S390SKeysState *ss = S390_SKEYS(opaque); S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss); - MachineState *ms = MACHINE(qdev_get_machine()); - uint64_t pages_left = ms->ram_size / TARGET_PAGE_SIZE; - uint64_t read_count, eos = S390_SKEYS_SAVE_FLAG_EOS; - vaddr cur_gfn = 0; + GuestPhysBlockList guest_phys_blocks; + GuestPhysBlock *block; + uint64_t pages, gfn; int error = 0; uint8_t *buf; - if (!skeyclass->skeys_enabled(ss)) { + if (!skeyclass->skeys_are_enabled(ss)) { goto end_stream; } @@ -274,36 +316,52 @@ static void s390_storage_keys_save(QEMUFile *f, void *opaque) goto end_stream; } - /* We only support initial memory. Standby memory is not handled yet. */ - qemu_put_be64(f, (cur_gfn * TARGET_PAGE_SIZE) | S390_SKEYS_SAVE_FLAG_SKEYS); - qemu_put_be64(f, pages_left); - - while (pages_left) { - read_count = MIN(pages_left, S390_SKEYS_BUFFER_SIZE); - - if (!error) { - error = skeyclass->get_skeys(ss, cur_gfn, read_count, buf); - if (error) { - /* - * If error: we want to fill the stream with valid data instead - * of stopping early so we pad the stream with 0x00 values and - * use S390_SKEYS_SAVE_FLAG_ERROR to indicate failure to the - * reading side. - */ - error_report("S390_GET_KEYS error %d", error); - memset(buf, 0, S390_SKEYS_BUFFER_SIZE); - eos = S390_SKEYS_SAVE_FLAG_ERROR; + guest_phys_blocks_init(&guest_phys_blocks); + guest_phys_blocks_append(&guest_phys_blocks); + + /* Send each contiguous physical memory range separately. */ + QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) { + assert(QEMU_IS_ALIGNED(block->target_start, TARGET_PAGE_SIZE)); + assert(QEMU_IS_ALIGNED(block->target_end, TARGET_PAGE_SIZE)); + + gfn = block->target_start / TARGET_PAGE_SIZE; + pages = (block->target_end - block->target_start) / TARGET_PAGE_SIZE; + qemu_put_be64(f, block->target_start | S390_SKEYS_SAVE_FLAG_SKEYS); + qemu_put_be64(f, pages); + + while (pages) { + const uint64_t cur_pages = MIN(pages, S390_SKEYS_BUFFER_SIZE); + + if (!error) { + error = skeyclass->get_skeys(ss, gfn, cur_pages, buf); + if (error) { + /* + * Create a valid stream with all 0x00 and indicate + * S390_SKEYS_SAVE_FLAG_ERROR to the destination. + */ + error_report("S390_GET_KEYS error %d", error); + memset(buf, 0, S390_SKEYS_BUFFER_SIZE); + } } + + qemu_put_buffer(f, buf, cur_pages); + gfn += cur_pages; + pages -= cur_pages; } - qemu_put_buffer(f, buf, read_count); - cur_gfn += read_count; - pages_left -= read_count; + if (error) { + break; + } } + guest_phys_blocks_free(&guest_phys_blocks); g_free(buf); end_stream: - qemu_put_be64(f, eos); + if (error) { + qemu_put_be64(f, S390_SKEYS_SAVE_FLAG_ERROR); + } else { + qemu_put_be64(f, S390_SKEYS_SAVE_FLAG_EOS); + } } static int s390_storage_keys_load(QEMUFile *f, void *opaque, int version_id) @@ -312,6 +370,14 @@ static int s390_storage_keys_load(QEMUFile *f, void *opaque, int version_id) S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss); int ret = 0; + /* + * Make sure to lazy-enable if required to be done explicitly. No need to + * flush any TLB as the VM is not running yet. + */ + if (skeyclass->enable_skeys) { + skeyclass->enable_skeys(ss); + } + while (!ret) { ram_addr_t addr; int flags; diff --git a/hw/s390x/s390-stattrib-kvm.c b/hw/s390x/s390-stattrib-kvm.c index f89d8d9d169..24cd01382e2 100644 --- a/hw/s390x/s390-stattrib-kvm.c +++ b/hw/s390x/s390-stattrib-kvm.c @@ -16,8 +16,7 @@ #include "qemu/error-report.h" #include "sysemu/kvm.h" #include "exec/ram_addr.h" -#include "cpu.h" -#include "kvm_s390x.h" +#include "kvm/kvm_s390x.h" Object *kvm_s390_stattrib_create(void) { diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c index 4441e1d331c..9eda1c3b2a2 100644 --- a/hw/s390x/s390-stattrib.c +++ b/hw/s390x/s390-stattrib.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "qemu/units.h" -#include "cpu.h" #include "migration/qemu-file.h" #include "migration/register.h" #include "hw/s390x/storage-attributes.h" diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 2972b607f36..653587ea62f 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -13,11 +13,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" -#include "hw/boards.h" -#include "exec/address-spaces.h" #include "exec/ram_addr.h" -#include "hw/boards.h" #include "hw/s390x/s390-virtio-hcall.h" #include "hw/s390x/sclp.h" #include "hw/s390x/s390_flic.h" @@ -795,14 +791,47 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_6_2_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_6_2_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(6_2, "6.2", true); + +static void ccw_machine_6_1_instance_options(MachineState *machine) +{ + ccw_machine_6_2_instance_options(machine); + s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); + s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); + s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); + s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); + s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); +} + +static void ccw_machine_6_1_class_options(MachineClass *mc) +{ + ccw_machine_6_2_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len); + mc->smp_props.prefer_sockets = true; +} +DEFINE_CCW_MACHINE(6_1, "6.1", false); + static void ccw_machine_6_0_instance_options(MachineState *machine) { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; + + ccw_machine_6_1_instance_options(machine); + s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); } static void ccw_machine_6_0_class_options(MachineClass *mc) { + ccw_machine_6_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); } -DEFINE_CCW_MACHINE(6_0, "6.0", true); +DEFINE_CCW_MACHINE(6_0, "6.0", false); static void ccw_machine_5_2_instance_options(MachineState *machine) { diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c index 0cf22908267..89c30a8a91a 100644 --- a/hw/s390x/sclp.c +++ b/hw/s390x/sclp.c @@ -15,8 +15,6 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qapi/error.h" -#include "cpu.h" -#include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/s390x/sclp.h" #include "hw/s390x/event-facility.h" @@ -53,7 +51,7 @@ static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len, uint32_t code) { uint64_t sccb_max_addr = sccb_addr + sccb_len - 1; - uint64_t sccb_boundary = (sccb_addr & PAGE_MASK) + PAGE_SIZE; + uint64_t sccb_boundary = (sccb_addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; switch (code & SCLP_CMD_CODE_MASK) { case SCLP_CMDW_READ_SCP_INFO: diff --git a/hw/s390x/sclpcpu.c b/hw/s390x/sclpcpu.c index 62806d32737..f2b1a4b0371 100644 --- a/hw/s390x/sclpcpu.c +++ b/hw/s390x/sclpcpu.c @@ -17,7 +17,6 @@ #include "hw/s390x/sclp.h" #include "qemu/module.h" #include "hw/s390x/event-facility.h" -#include "cpu.h" #include "sysemu/cpus.h" typedef struct ConfigMgtData { diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c index 0b944774861..ec855811aeb 100644 --- a/hw/s390x/tod-kvm.c +++ b/hw/s390x/tod-kvm.c @@ -13,7 +13,7 @@ #include "qemu/module.h" #include "sysemu/runstate.h" #include "hw/s390x/tod.h" -#include "kvm_s390x.h" +#include "kvm/kvm_s390x.h" static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp) { diff --git a/hw/s390x/tod-qemu.c b/hw/s390x/tod-qemu.c deleted file mode 100644 index e91b9590f58..00000000000 --- a/hw/s390x/tod-qemu.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * TOD (Time Of Day) clock - QEMU implementation - * - * Copyright 2018 Red Hat, Inc. - * Author(s): David Hildenbrand - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "qapi/error.h" -#include "hw/s390x/tod.h" -#include "qemu/timer.h" -#include "qemu/cutils.h" -#include "qemu/module.h" -#include "cpu.h" -#include "tcg_s390x.h" - -static void qemu_s390_tod_get(const S390TODState *td, S390TOD *tod, - Error **errp) -{ - *tod = td->base; - - tod->low += time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - if (tod->low < td->base.low) { - tod->high++; - } -} - -static void qemu_s390_tod_set(S390TODState *td, const S390TOD *tod, - Error **errp) -{ - CPUState *cpu; - - td->base = *tod; - - td->base.low -= time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - if (td->base.low > tod->low) { - td->base.high--; - } - - /* - * The TOD has been changed and we have to recalculate the CKC values - * for all CPUs. We do this asynchronously, as "SET CLOCK should be - * issued only while all other activity on all CPUs .. has been - * suspended". - */ - CPU_FOREACH(cpu) { - async_run_on_cpu(cpu, tcg_s390_tod_updated, RUN_ON_CPU_NULL); - } -} - -static void qemu_s390_tod_class_init(ObjectClass *oc, void *data) -{ - S390TODClass *tdc = S390_TOD_CLASS(oc); - - tdc->get = qemu_s390_tod_get; - tdc->set = qemu_s390_tod_set; -} - -static void qemu_s390_tod_init(Object *obj) -{ - S390TODState *td = S390_TOD(obj); - struct tm tm; - - qemu_get_timedate(&tm, 0); - td->base.high = 0; - td->base.low = TOD_UNIX_EPOCH + (time2tod(mktimegm(&tm)) * 1000000000ULL); - if (td->base.low < TOD_UNIX_EPOCH) { - td->base.high += 1; - } -} - -static TypeInfo qemu_s390_tod_info = { - .name = TYPE_QEMU_S390_TOD, - .parent = TYPE_S390_TOD, - .instance_size = sizeof(S390TODState), - .instance_init = qemu_s390_tod_init, - .class_init = qemu_s390_tod_class_init, - .class_size = sizeof(S390TODClass), -}; - -static void register_types(void) -{ - type_register_static(&qemu_s390_tod_info); -} -type_init(register_types); diff --git a/hw/s390x/tod-tcg.c b/hw/s390x/tod-tcg.c new file mode 100644 index 00000000000..9bb94ff72bc --- /dev/null +++ b/hw/s390x/tod-tcg.c @@ -0,0 +1,89 @@ +/* + * TOD (Time Of Day) clock - TCG implementation + * + * Copyright 2018 Red Hat, Inc. + * Author(s): David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "hw/s390x/tod.h" +#include "qemu/timer.h" +#include "qemu/cutils.h" +#include "qemu/module.h" +#include "cpu.h" +#include "tcg/tcg_s390x.h" + +static void qemu_s390_tod_get(const S390TODState *td, S390TOD *tod, + Error **errp) +{ + *tod = td->base; + + tod->low += time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + if (tod->low < td->base.low) { + tod->high++; + } +} + +static void qemu_s390_tod_set(S390TODState *td, const S390TOD *tod, + Error **errp) +{ + CPUState *cpu; + + td->base = *tod; + + td->base.low -= time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + if (td->base.low > tod->low) { + td->base.high--; + } + + /* + * The TOD has been changed and we have to recalculate the CKC values + * for all CPUs. We do this asynchronously, as "SET CLOCK should be + * issued only while all other activity on all CPUs .. has been + * suspended". + */ + CPU_FOREACH(cpu) { + async_run_on_cpu(cpu, tcg_s390_tod_updated, RUN_ON_CPU_NULL); + } +} + +static void qemu_s390_tod_class_init(ObjectClass *oc, void *data) +{ + S390TODClass *tdc = S390_TOD_CLASS(oc); + + tdc->get = qemu_s390_tod_get; + tdc->set = qemu_s390_tod_set; +} + +static void qemu_s390_tod_init(Object *obj) +{ + S390TODState *td = S390_TOD(obj); + struct tm tm; + + qemu_get_timedate(&tm, 0); + td->base.high = 0; + td->base.low = TOD_UNIX_EPOCH + (time2tod(mktimegm(&tm)) * 1000000000ULL); + if (td->base.low < TOD_UNIX_EPOCH) { + td->base.high += 1; + } +} + +static TypeInfo qemu_s390_tod_info = { + .name = TYPE_QEMU_S390_TOD, + .parent = TYPE_S390_TOD, + .instance_size = sizeof(S390TODState), + .instance_init = qemu_s390_tod_init, + .class_init = qemu_s390_tod_class_init, + .class_size = sizeof(S390TODClass), +}; + +static void register_types(void) +{ + type_register_static(&qemu_s390_tod_info); +} +type_init(register_types); diff --git a/hw/s390x/tod.c b/hw/s390x/tod.c index 3c2979175ef..fd5a36bf24e 100644 --- a/hw/s390x/tod.c +++ b/hw/s390x/tod.c @@ -14,6 +14,8 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "sysemu/kvm.h" +#include "sysemu/tcg.h" +#include "sysemu/qtest.h" #include "migration/qemu-file-types.h" #include "migration/register.h" @@ -23,8 +25,13 @@ void s390_init_tod(void) if (kvm_enabled()) { obj = object_new(TYPE_KVM_S390_TOD); - } else { + } else if (tcg_enabled()) { obj = object_new(TYPE_QEMU_S390_TOD); + } else if (qtest_enabled()) { + return; + } else { + error_report("current accelerator not handled in s390_init_tod!"); + abort(); } object_property_add_child(qdev_get_machine(), TYPE_S390_TOD, obj); object_unref(obj); diff --git a/hw/s390x/trace-events b/hw/s390x/trace-events index 8156693749d..8b9213eab90 100644 --- a/hw/s390x/trace-events +++ b/hw/s390x/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # css.c css_enable_facility(const char *facility) "CSS: enable %s" diff --git a/hw/s390x/virtio-ccw-gpu.c b/hw/s390x/virtio-ccw-gpu.c index 75a9e4bb390..5868a2a0709 100644 --- a/hw/s390x/virtio-ccw-gpu.c +++ b/hw/s390x/virtio-ccw-gpu.c @@ -59,6 +59,7 @@ static const TypeInfo virtio_ccw_gpu = { .instance_init = virtio_ccw_gpu_instance_init, .class_init = virtio_ccw_gpu_class_init, }; +module_obj(TYPE_VIRTIO_GPU_CCW); static void virtio_ccw_gpu_register(void) { @@ -68,3 +69,5 @@ static void virtio_ccw_gpu_register(void) } type_init(virtio_ccw_gpu_register) + +module_arch("s390x"); diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 8195f3546e4..c845a92c3a8 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -17,7 +17,6 @@ #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" #include "hw/virtio/virtio-net.h" -#include "hw/sysbus.h" #include "qemu/bitops.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -32,6 +31,7 @@ #include "trace.h" #include "hw/s390x/css-bridge.h" #include "hw/s390x/s390-virtio-ccw.h" +#include "sysemu/replay.h" #define NR_CLASSIC_INDICATOR_BITS 64 @@ -754,6 +754,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) sch->id.reserved = 0xff; sch->id.cu_type = VIRTIO_CCW_CU_TYPE; sch->do_subchannel_work = do_subchannel_work_virtual; + sch->irb_cb = build_irb_virtual; ccw_dev->sch = sch; dev->indicators = NULL; dev->revision = -1; @@ -770,6 +771,11 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; } + /* fd-based ioevents can't be synchronized in record/replay */ + if (replay_mode != REPLAY_MODE_NONE) { + dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; + } + if (k->realize) { k->realize(dev, &err); if (err) { @@ -1235,7 +1241,6 @@ static void virtio_ccw_device_class_init(ObjectClass *klass, void *data) k->unplug = virtio_ccw_busdev_unplug; dc->realize = virtio_ccw_busdev_realize; dc->unrealize = virtio_ccw_busdev_unrealize; - dc->bus_type = TYPE_VIRTUAL_CSS_BUS; device_class_set_parent_reset(dc, virtio_ccw_reset, &vdc->parent_reset); } @@ -1256,8 +1261,7 @@ static void virtio_ccw_bus_new(VirtioBusState *bus, size_t bus_size, DeviceState *qdev = DEVICE(dev); char virtio_bus_name[] = "virtio-bus"; - qbus_create_inplace(bus, bus_size, TYPE_VIRTIO_CCW_BUS, - qdev, virtio_bus_name); + qbus_init(bus, bus_size, TYPE_VIRTIO_CCW_BUS, qdev, virtio_bus_name); } static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data) diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c index 9db10b1a487..dac054aeed4 100644 --- a/hw/scsi/esp-pci.c +++ b/hw/scsi/esp-pci.c @@ -388,7 +388,7 @@ static void esp_pci_scsi_realize(PCIDevice *dev, Error **errp) pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pci->io); s->irq = pci_allocate_irq(dev); - scsi_bus_new(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info); } static void esp_pci_scsi_exit(PCIDevice *d) diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index b668acef82d..58d0edbd56d 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -204,16 +204,11 @@ static int esp_select(ESPState *s) s->ti_size = 0; fifo8_reset(&s->fifo); - if (s->current_req) { - /* Started a new command before the old one finished. Cancel it. */ - scsi_req_cancel(s->current_req); - } - s->current_dev = scsi_device_find(&s->bus, 0, target, 0); if (!s->current_dev) { /* No such drive */ s->rregs[ESP_RSTAT] = 0; - s->rregs[ESP_RINTR] |= INTR_DC; + s->rregs[ESP_RINTR] = INTR_DC; s->rregs[ESP_RSEQ] = SEQ_0; esp_raise_irq(s); return -1; @@ -221,7 +216,7 @@ static int esp_select(ESPState *s) /* * Note that we deliberately don't raise the IRQ here: this will be done - * either in do_busid_cmd() for DATA OUT transfers or by the deferred + * either in do_command_phase() for DATA OUT transfers or by the deferred * IRQ mechanism in esp_transfer_data() for DATA IN transfers */ s->rregs[ESP_RINTR] |= INTR_FC; @@ -235,6 +230,11 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen) uint32_t dmalen, n; int target; + if (s->current_req) { + /* Started a new command before the old one finished. Cancel it. */ + scsi_req_cancel(s->current_req); + } + target = s->wregs[ESP_WBUSID] & BUSID_DID; if (s->dma) { dmalen = MIN(esp_get_tc(s), maxlen); @@ -260,9 +260,6 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen) return 0; } n = esp_fifo_pop_buf(&s->fifo, buf, dmalen); - if (n >= 3) { - buf[0] = buf[2] >> 5; - } n = MIN(fifo8_num_free(&s->cmdfifo), n); fifo8_push_all(&s->cmdfifo, buf, n); } @@ -275,24 +272,22 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen) return dmalen; } -static void do_busid_cmd(ESPState *s, uint8_t busid) +static void do_command_phase(ESPState *s) { uint32_t cmdlen; int32_t datalen; - int lun; SCSIDevice *current_lun; uint8_t buf[ESP_CMDFIFO_SZ]; - trace_esp_do_busid_cmd(busid); - lun = busid & 7; + trace_esp_do_command_phase(s->lun); cmdlen = fifo8_num_used(&s->cmdfifo); if (!cmdlen || !s->current_dev) { return; } esp_fifo_pop_buf(&s->cmdfifo, buf, cmdlen); - current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun); - s->current_req = scsi_req_new(current_lun, 0, lun, buf, s); + current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, s->lun); + s->current_req = scsi_req_new(current_lun, 0, s->lun, buf, s); datalen = scsi_req_enqueue(s->current_req); s->ti_size = datalen; fifo8_reset(&s->cmdfifo); @@ -319,28 +314,36 @@ static void do_busid_cmd(ESPState *s, uint8_t busid) } } -static void do_cmd(ESPState *s) +static void do_message_phase(ESPState *s) { - uint8_t busid = esp_fifo_pop(&s->cmdfifo); - int len; + if (s->cmdfifo_cdb_offset) { + uint8_t message = esp_fifo_pop(&s->cmdfifo); - s->cmdfifo_cdb_offset--; + trace_esp_do_identify(message); + s->lun = message & 7; + s->cmdfifo_cdb_offset--; + } /* Ignore extended messages for now */ if (s->cmdfifo_cdb_offset) { - len = MIN(s->cmdfifo_cdb_offset, fifo8_num_used(&s->cmdfifo)); + int len = MIN(s->cmdfifo_cdb_offset, fifo8_num_used(&s->cmdfifo)); esp_fifo_pop_buf(&s->cmdfifo, NULL, len); s->cmdfifo_cdb_offset = 0; } +} - do_busid_cmd(s, busid); +static void do_cmd(ESPState *s) +{ + do_message_phase(s); + assert(s->cmdfifo_cdb_offset == 0); + do_command_phase(s); } static void satn_pdma_cb(ESPState *s) { - s->do_cmd = 0; - if (!fifo8_is_empty(&s->cmdfifo)) { + if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) { s->cmdfifo_cdb_offset = 1; + s->do_cmd = 0; do_cmd(s); } } @@ -369,13 +372,10 @@ static void handle_satn(ESPState *s) static void s_without_satn_pdma_cb(ESPState *s) { - uint32_t len; - - s->do_cmd = 0; - len = fifo8_num_used(&s->cmdfifo); - if (len) { + if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) { s->cmdfifo_cdb_offset = 0; - do_busid_cmd(s, 0); + s->do_cmd = 0; + do_cmd(s); } } @@ -392,7 +392,7 @@ static void handle_s_without_atn(ESPState *s) if (cmdlen > 0) { s->cmdfifo_cdb_offset = 0; s->do_cmd = 0; - do_busid_cmd(s, 0); + do_cmd(s); } else if (cmdlen == 0) { s->do_cmd = 1; /* Target present, but no cmd yet - switch to command phase */ @@ -403,8 +403,7 @@ static void handle_s_without_atn(ESPState *s) static void satn_stop_pdma_cb(ESPState *s) { - s->do_cmd = 0; - if (!fifo8_is_empty(&s->cmdfifo)) { + if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) { trace_esp_handle_satn_stop(fifo8_num_used(&s->cmdfifo)); s->do_cmd = 1; s->cmdfifo_cdb_offset = 1; @@ -481,7 +480,6 @@ static void esp_dma_done(ESPState *s) { s->rregs[ESP_RSTAT] |= STAT_TC; s->rregs[ESP_RINTR] |= INTR_BS; - s->rregs[ESP_RSEQ] = 0; s->rregs[ESP_RFLAGS] = 0; esp_set_tc(s, 0); esp_raise_irq(s); @@ -494,10 +492,32 @@ static void do_dma_pdma_cb(ESPState *s) uint32_t n; if (s->do_cmd) { + /* Ensure we have received complete command after SATN and stop */ + if (esp_get_tc(s) || fifo8_is_empty(&s->cmdfifo)) { + return; + } + s->ti_size = 0; - s->do_cmd = 0; - do_cmd(s); - esp_lower_drq(s); + if ((s->rregs[ESP_RSTAT] & 7) == STAT_CD) { + /* No command received */ + if (s->cmdfifo_cdb_offset == fifo8_num_used(&s->cmdfifo)) { + return; + } + + /* Command has been received */ + s->do_cmd = 0; + do_cmd(s); + } else { + /* + * Extra message out bytes received: update cmdfifo_cdb_offset + * and then switch to commmand phase + */ + s->cmdfifo_cdb_offset = fifo8_num_used(&s->cmdfifo); + s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD; + s->rregs[ESP_RSEQ] = SEQ_CD; + s->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(s); + } return; } @@ -740,20 +760,17 @@ static void esp_do_nodma(ESPState *s) s->async_len -= len; s->ti_size += len; } else { - len = MIN(s->ti_size, s->async_len); - len = MIN(len, fifo8_num_free(&s->fifo)); - fifo8_push_all(&s->fifo, s->async_buf, len); - s->async_buf += len; - s->async_len -= len; - s->ti_size -= len; + if (fifo8_is_empty(&s->fifo)) { + fifo8_push(&s->fifo, s->async_buf[0]); + s->async_buf++; + s->async_len--; + s->ti_size--; + } } if (s->async_len == 0) { scsi_req_continue(s->current_req); - - if (to_device || s->ti_size == 0) { - return; - } + return; } s->rregs[ESP_RINTR] |= INTR_BS; @@ -763,20 +780,37 @@ static void esp_do_nodma(ESPState *s) void esp_command_complete(SCSIRequest *req, size_t resid) { ESPState *s = req->hba_private; + int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO); trace_esp_command_complete(); - if (s->ti_size != 0) { - trace_esp_command_complete_unexpected(); + + /* + * Non-DMA transfers from the target will leave the last byte in + * the FIFO so don't reset ti_size in this case + */ + if (s->dma || to_device) { + if (s->ti_size != 0) { + trace_esp_command_complete_unexpected(); + } + s->ti_size = 0; } - s->ti_size = 0; + s->async_len = 0; if (req->status) { trace_esp_command_complete_fail(); } s->status = req->status; - s->rregs[ESP_RSTAT] = STAT_ST; - esp_dma_done(s); - esp_lower_drq(s); + + /* + * If the transfer is finished, switch to status phase. For non-DMA + * transfers from the target the last byte is still in the FIFO + */ + if (s->ti_size == 0) { + s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST; + esp_dma_done(s); + esp_lower_drq(s); + } + if (s->current_req) { scsi_req_unref(s->current_req); s->current_req = NULL; @@ -804,16 +838,6 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) s->rregs[ESP_RSTAT] |= STAT_TC; s->rregs[ESP_RINTR] |= INTR_BS; esp_raise_irq(s); - - /* - * If data is ready to transfer and the TI command has already - * been executed, start DMA immediately. Otherwise DMA will start - * when host sends the TI command - */ - if (s->ti_size && (s->rregs[ESP_CMD] == (CMD_TI | CMD_DMA))) { - esp_do_dma(s); - } - return; } if (s->ti_cmd == 0) { @@ -827,7 +851,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) return; } - if (s->ti_cmd & CMD_DMA) { + if (s->ti_cmd == (CMD_TI | CMD_DMA)) { if (dmalen) { esp_do_dma(s); } else if (s->ti_size <= 0) { @@ -838,7 +862,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) esp_dma_done(s); esp_lower_drq(s); } - } else { + } else if (s->ti_cmd == CMD_TI) { esp_do_nodma(s); } } @@ -870,6 +894,7 @@ void esp_hard_reset(ESPState *s) memset(s->wregs, 0, ESP_REGS); s->tchi_written = 0; s->ti_size = 0; + s->async_len = 0; fifo8_reset(&s->fifo); fifo8_reset(&s->cmdfifo); s->dma = 0; @@ -905,6 +930,17 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr) qemu_log_mask(LOG_UNIMP, "esp: PIO data read not implemented\n"); s->rregs[ESP_FIFO] = 0; } else { + if ((s->rregs[ESP_RSTAT] & 0x7) == STAT_DI) { + if (s->ti_size) { + esp_do_nodma(s); + } else { + /* + * The last byte of a non-DMA transfer has been read out + * of the FIFO so switch to status phase + */ + s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST; + } + } s->rregs[ESP_FIFO] = esp_fifo_pop(&s->fifo); } val = s->rregs[ESP_FIFO]; @@ -917,7 +953,15 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr) val = s->rregs[ESP_RINTR]; s->rregs[ESP_RINTR] = 0; s->rregs[ESP_RSTAT] &= ~STAT_TC; - s->rregs[ESP_RSEQ] = SEQ_0; + /* + * According to the datasheet ESP_RSEQ should be cleared, but as the + * emulation currently defers information transfers to the next TI + * command leave it for now so that pedantic guests such as the old + * Linux 2.6 driver see the correct flags before the next SCSI phase + * transition. + * + * s->rregs[ESP_RSEQ] = SEQ_0; + */ esp_lower_irq(s); break; case ESP_TCHI: @@ -955,15 +999,18 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val) case ESP_FIFO: if (s->do_cmd) { esp_fifo_push(&s->cmdfifo, val); + + /* + * If any unexpected message out/command phase data is + * transferred using non-DMA, raise the interrupt + */ + if (s->rregs[ESP_CMD] == CMD_TI) { + s->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(s); + } } else { esp_fifo_push(&s->fifo, val); } - - /* Non-DMA transfers raise an interrupt after every byte */ - if (s->rregs[ESP_CMD] == CMD_TI) { - s->rregs[ESP_RINTR] |= INTR_FC | INTR_BS; - esp_raise_irq(s); - } break; case ESP_CMD: s->rregs[saddr] = val; @@ -1088,7 +1135,15 @@ static bool esp_is_version_5(void *opaque, int version_id) ESPState *s = ESP(opaque); version_id = MIN(version_id, s->mig_version_id); - return version_id == 5; + return version_id >= 5; +} + +static bool esp_is_version_6(void *opaque, int version_id) +{ + ESPState *s = ESP(opaque); + + version_id = MIN(version_id, s->mig_version_id); + return version_id >= 6; } int esp_pre_save(void *opaque) @@ -1128,7 +1183,7 @@ static int esp_post_load(void *opaque, int version_id) const VMStateDescription vmstate_esp = { .name = "esp", - .version_id = 5, + .version_id = 6, .minimum_version_id = 3, .post_load = esp_post_load, .fields = (VMStateField[]) { @@ -1157,6 +1212,7 @@ const VMStateDescription vmstate_esp = { VMSTATE_FIFO8_TEST(fifo, ESPState, esp_is_version_5), VMSTATE_FIFO8_TEST(cmdfifo, ESPState, esp_is_version_5), VMSTATE_UINT8_TEST(ti_cmd, ESPState, esp_is_version_5), + VMSTATE_UINT8_TEST(lun, ESPState, esp_is_version_6), VMSTATE_END_OF_LIST() }, }; @@ -1195,7 +1251,6 @@ static void sysbus_esp_pdma_write(void *opaque, hwaddr addr, { SysBusESPState *sysbus = opaque; ESPState *s = ESP(&sysbus->esp); - uint32_t dmalen; trace_esp_pdma_write(size); @@ -1208,10 +1263,7 @@ static void sysbus_esp_pdma_write(void *opaque, hwaddr addr, esp_pdma_write(s, val); break; } - dmalen = esp_get_tc(s); - if (dmalen == 0 || fifo8_num_free(&s->fifo) < 2) { - s->pdma_cb(s); - } + s->pdma_cb(s); } static uint64_t sysbus_esp_pdma_read(void *opaque, hwaddr addr, @@ -1297,7 +1349,7 @@ static void sysbus_esp_realize(DeviceState *dev, Error **errp) qdev_init_gpio_in(dev, sysbus_esp_gpio_demux, 2); - scsi_bus_new(&s->bus, sizeof(s->bus), dev, &esp_scsi_info, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), dev, &esp_scsi_info); } static void sysbus_esp_hard_reset(DeviceState *dev) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index e2c19180a0d..85e907a7854 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -2309,7 +2309,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) pci_register_bar(dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ram_io); QTAILQ_INIT(&s->queue); - scsi_bus_new(&s->bus, sizeof(s->bus), d, &lsi_scsi_info, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), d, &lsi_scsi_info); } static void lsi_scsi_exit(PCIDevice *dev) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 8f2389d2c6a..4ff51221d4c 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -2416,8 +2416,7 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp) s->frames[i].state = s; } - scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev), - &megasas_scsi_info, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &megasas_scsi_info); } static Property megasas_properties_gen1[] = { diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c index db3219e7d20..f6c77655443 100644 --- a/hw/scsi/mptsas.c +++ b/hw/scsi/mptsas.c @@ -1315,7 +1315,7 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); - scsi_bus_new(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); } static void mptsas_scsi_uninit(PCIDevice *dev) diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 2a0a98cac91..77325d8cc7a 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -134,10 +134,10 @@ void scsi_device_unit_attention_reported(SCSIDevice *s) } /* Create a scsi bus, and attach devices to it. */ -void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, - const SCSIBusInfo *info, const char *bus_name) +void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, + const SCSIBusInfo *info, const char *bus_name) { - qbus_create_inplace(bus, bus_size, TYPE_SCSI_BUS, host, bus_name); + qbus_init(bus, bus_size, TYPE_SCSI_BUS, host, bus_name); bus->busnr = next_scsi_bus++; bus->info = info; qbus_set_bus_hotplug_handler(BUS(bus)); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 3580e7ee619..d4914178ea0 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -1087,6 +1087,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf, uint8_t *p = *p_outbuf + 2; int length; + assert(page < ARRAY_SIZE(mode_sense_valid)); if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) { return -1; } @@ -1428,6 +1429,11 @@ static int scsi_disk_check_mode_select(SCSIDiskState *s, int page, return -1; } + /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */ + if (page == MODE_PAGE_ALLS) { + return -1; + } + p = mode_current; memset(mode_current, 0, inlen + 2); len = mode_sense_page(s, page, &p, 0); @@ -1582,6 +1588,7 @@ static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf) scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); } +/* sector_num and nb_sectors expected to be in qdev blocksize */ static inline bool check_lba_range(SCSIDiskState *s, uint64_t sector_num, uint32_t nb_sectors) { @@ -1614,11 +1621,12 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, int ret) assert(r->req.aiocb == NULL); if (data->count > 0) { - r->sector = ldq_be_p(&data->inbuf[0]) - * (s->qdev.blocksize / BDRV_SECTOR_SIZE); - r->sector_count = (ldl_be_p(&data->inbuf[8]) & 0xffffffffULL) - * (s->qdev.blocksize / BDRV_SECTOR_SIZE); - if (!check_lba_range(s, r->sector, r->sector_count)) { + uint64_t sector_num = ldq_be_p(&data->inbuf[0]); + uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL; + r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE); + r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE); + + if (!check_lba_range(s, sector_num, nb_sectors)) { block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP); scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index 98c30c5d5c3..0306ccc7b1e 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -147,7 +147,7 @@ static int execute_command(BlockBackend *blk, return 0; } -static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) +static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) { uint8_t page, page_idx; @@ -179,10 +179,12 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) (r->req.cmd.buf[1] & 0x01)) { page = r->req.cmd.buf[2]; if (page == 0xb0) { - uint32_t max_transfer = - blk_get_max_transfer(s->conf.blk) / s->blocksize; + uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); + uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); assert(max_transfer); + max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) + / s->blocksize; stl_be_p(&r->buf[8], max_transfer); /* Also take care of the opt xfer len. */ stl_be_p(&r->buf[12], @@ -213,8 +215,13 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) r->buf[page_idx] = 0xb0; } stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1); + + if (len < r->buflen) { + len++; + } } } + return len; } static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) @@ -332,7 +339,7 @@ static void scsi_read_complete(void * opaque, int ret) } } if (r->req.cmd.buf[0] == INQUIRY) { - scsi_handle_inquiry_reply(r, s); + len = scsi_handle_inquiry_reply(r, s, len); } req_complete: diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c index ca5c13c4a8e..a07a8e1523f 100644 --- a/hw/scsi/spapr_vscsi.c +++ b/hw/scsi/spapr_vscsi.c @@ -34,7 +34,6 @@ #include "qemu/osdep.h" #include "qemu/module.h" -#include "cpu.h" #include "hw/scsi/scsi.h" #include "migration/vmstate.h" #include "scsi/constants.h" @@ -1217,8 +1216,7 @@ static void spapr_vscsi_realize(SpaprVioDevice *dev, Error **errp) dev->crq.SendFunc = vscsi_do_crq; - scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev), - &vscsi_scsi_info, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &vscsi_scsi_info); /* ibmvscsi SCSI bus does not allow hotplug. */ qbus_set_hotplug_handler(BUS(&s->bus), NULL); diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events index 1c331fb1896..92d5b40f892 100644 --- a/hw/scsi/trace-events +++ b/hw/scsi/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # scsi-bus.c scsi_req_alloc(int target, int lun, int tag) "target %d lun %d tag %d" @@ -166,7 +166,8 @@ esp_dma_disable(void) "Lower enable" esp_pdma_read(int size) "pDMA read %u bytes" esp_pdma_write(int size) "pDMA write %u bytes" esp_get_cmd(uint32_t dmalen, int target) "len %d target %d" -esp_do_busid_cmd(uint8_t busid) "busid 0x%x" +esp_do_command_phase(uint8_t busid) "busid 0x%x" +esp_do_identify(uint8_t byte) "0x%x" esp_handle_satn_stop(uint32_t cmdlen) "cmdlen %d" esp_write_response(uint32_t status) "Transfer status (status=%d)" esp_do_dma(uint32_t cmdlen, uint32_t len) "command len %d + %d" diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 4d70fa036bb..039caf2614e 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -208,7 +208,6 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) "target SCSI device state or use shared storage over network), " "set 'migratable' property to true to enable migration."); if (migrate_add_blocker(vsc->migration_blocker, errp) < 0) { - error_free(vsc->migration_blocker); goto free_virtio; } } @@ -219,10 +218,8 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) vsc->dev.backend_features = 0; ret = vhost_dev_init(&vsc->dev, (void *)(uintptr_t)vhostfd, - VHOST_BACKEND_TYPE_KERNEL, 0); + VHOST_BACKEND_TYPE_KERNEL, 0, errp); if (ret < 0) { - error_setg(errp, "vhost-scsi: vhost initialization failed: %s", - strerror(-ret)); goto free_vqs; } @@ -235,11 +232,12 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) return; free_vqs: + g_free(vsc->dev.vqs); if (!vsc->migratable) { migrate_del_blocker(vsc->migration_blocker); } - g_free(vsc->dev.vqs); free_virtio: + error_free(vsc->migration_blocker); virtio_scsi_common_unrealize(dev); close_fd: close(vhostfd); diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 46660194428..1b2f7eed988 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -122,10 +122,8 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) vqs = vsc->dev.vqs; ret = vhost_dev_init(&vsc->dev, &s->vhost_user, - VHOST_BACKEND_TYPE_USER, 0); + VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { - error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s", - strerror(-ret)); goto free_vhost; } diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 4ad87934064..18eb824c97f 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -94,8 +94,7 @@ static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev, return progress; } -static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, - VirtIOHandleAIOOutput fn) +static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); int rc; @@ -109,7 +108,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, return rc; } - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, fn); return 0; } @@ -154,40 +152,63 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) goto fail_guest_notifiers; } - aio_context_acquire(s->ctx); - rc = virtio_scsi_vring_init(s, vs->ctrl_vq, 0, - virtio_scsi_data_plane_handle_ctrl); - if (rc) { - goto fail_vrings; + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + + rc = virtio_scsi_set_host_notifier(s, vs->ctrl_vq, 0); + if (rc != 0) { + goto fail_host_notifiers; } vq_init_count++; - rc = virtio_scsi_vring_init(s, vs->event_vq, 1, - virtio_scsi_data_plane_handle_event); - if (rc) { - goto fail_vrings; + rc = virtio_scsi_set_host_notifier(s, vs->event_vq, 1); + if (rc != 0) { + goto fail_host_notifiers; } vq_init_count++; + for (i = 0; i < vs->conf.num_queues; i++) { - rc = virtio_scsi_vring_init(s, vs->cmd_vqs[i], i + 2, - virtio_scsi_data_plane_handle_cmd); + rc = virtio_scsi_set_host_notifier(s, vs->cmd_vqs[i], i + 2); if (rc) { - goto fail_vrings; + goto fail_host_notifiers; } vq_init_count++; } + memory_region_transaction_commit(); + + aio_context_acquire(s->ctx); + virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, + virtio_scsi_data_plane_handle_ctrl); + virtio_queue_aio_set_host_notifier_handler(vs->event_vq, s->ctx, + virtio_scsi_data_plane_handle_event); + + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_set_host_notifier_handler(vs->cmd_vqs[i], s->ctx, + virtio_scsi_data_plane_handle_cmd); + } + s->dataplane_starting = false; s->dataplane_started = true; aio_context_release(s->ctx); return 0; -fail_vrings: - aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); - aio_context_release(s->ctx); +fail_host_notifiers: for (i = 0; i < vq_init_count; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + for (i = 0; i < vq_init_count; i++) { virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); @@ -225,8 +246,23 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) blk_drain_all(); /* ensure there are no in-flight requests */ + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + for (i = 0; i < vs->conf.num_queues + 2; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + for (i = 0; i < vs->conf.num_queues + 2; i++) { virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 6d807302870..51fd09522ac 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1019,8 +1019,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) return; } - scsi_bus_new(&s->bus, sizeof(s->bus), dev, - &virtio_scsi_scsi_info, vdev->bus_name); + scsi_bus_init_named(&s->bus, sizeof(s->bus), dev, + &virtio_scsi_scsi_info, vdev->bus_name); /* override default SCSI bus hotplug-handler, with virtio-scsi's one */ qbus_set_hotplug_handler(BUS(&s->bus), OBJECT(dev)); diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index 1f30cb020a1..cd76bd67ab7 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -1180,8 +1180,7 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); - scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(pci_dev), - &pvscsi_scsi_info, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); /* override default SCSI bus hotplug-handler, with pvscsi's one */ qbus_set_hotplug_handler(BUS(&s->bus), OBJECT(s)); pvscsi_reset_state(s); diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c index bea6d97ef87..9166d6638de 100644 --- a/hw/sd/allwinner-sdhost.c +++ b/hw/sd/allwinner-sdhost.c @@ -738,8 +738,8 @@ static void allwinner_sdhost_init(Object *obj) { AwSdHostState *s = AW_SDHOST(obj); - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), - TYPE_AW_SDHOST_BUS, DEVICE(s), "sd-bus"); + qbus_init(&s->sdbus, sizeof(s->sdbus), + TYPE_AW_SDHOST_BUS, DEVICE(s), "sd-bus"); memory_region_init_io(&s->iomem, obj, &allwinner_sdhost_ops, s, TYPE_AW_SDHOST, 4 * KiB); diff --git a/hw/sd/aspeed_sdhci.c b/hw/sd/aspeed_sdhci.c index 3299844de6d..df1bdf1fa4e 100644 --- a/hw/sd/aspeed_sdhci.c +++ b/hw/sd/aspeed_sdhci.c @@ -14,6 +14,7 @@ #include "hw/irq.h" #include "migration/vmstate.h" #include "hw/qdev-properties.h" +#include "trace.h" #define ASPEED_SDHCI_INFO 0x00 #define ASPEED_SDHCI_INFO_SLOT1 (1 << 17) @@ -60,6 +61,8 @@ static uint64_t aspeed_sdhci_read(void *opaque, hwaddr addr, unsigned int size) } } + trace_aspeed_sdhci_read(addr, size, (uint64_t) val); + return (uint64_t)val; } @@ -68,6 +71,8 @@ static void aspeed_sdhci_write(void *opaque, hwaddr addr, uint64_t val, { AspeedSDHCIState *sdhci = opaque; + trace_aspeed_sdhci_write(addr, size, val); + switch (addr) { case ASPEED_SDHCI_INFO: /* The RESET bit automatically clears. */ diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c index 50f5fdb88bc..088a7ac6ed4 100644 --- a/hw/sd/bcm2835_sdhost.c +++ b/hw/sd/bcm2835_sdhost.c @@ -403,8 +403,8 @@ static void bcm2835_sdhost_init(Object *obj) { BCM2835SDHostState *s = BCM2835_SDHOST(obj); - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), - TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus"); + qbus_init(&s->sdbus, sizeof(s->sdbus), + TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus"); memory_region_init_io(&s->iomem, obj, &bcm2835_sdhost_ops, s, TYPE_BCM2835_SDHOST, 0x1000); diff --git a/hw/sd/cadence_sdhci.c b/hw/sd/cadence_sdhci.c index 0b371c843d8..56b8bae1c3f 100644 --- a/hw/sd/cadence_sdhci.c +++ b/hw/sd/cadence_sdhci.c @@ -23,10 +23,8 @@ #include "qemu/osdep.h" #include "qemu/bitops.h" #include "qemu/error-report.h" -#include "qemu/log.h" #include "qapi/error.h" #include "migration/vmstate.h" -#include "hw/irq.h" #include "hw/sd/cadence_sdhci.h" #include "sdhci-internal.h" diff --git a/hw/sd/meson.build b/hw/sd/meson.build index 9c29691e13e..807ca07b7cc 100644 --- a/hw/sd/meson.build +++ b/hw/sd/meson.build @@ -4,10 +4,10 @@ softmmu_ss.add(when: 'CONFIG_SDHCI', if_true: files('sdhci.c')) softmmu_ss.add(when: 'CONFIG_SDHCI_PCI', if_true: files('sdhci-pci.c')) softmmu_ss.add(when: 'CONFIG_SSI_SD', if_true: files('ssi-sd.c')) -softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-memcard.c')) softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_mmc.c')) softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_mmci.c')) softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_sdhost.c')) softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_sdhci.c')) softmmu_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-sdhost.c')) +softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_sdhci.c')) softmmu_ss.add(when: 'CONFIG_CADENCE_SDHCI', if_true: files('cadence_sdhci.c')) diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c deleted file mode 100644 index a1235aa46c1..00000000000 --- a/hw/sd/milkymist-memcard.c +++ /dev/null @@ -1,335 +0,0 @@ -/* - * QEMU model of the Milkymist SD Card Controller. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/memcard.pdf - */ - -#include "qemu/osdep.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "qapi/error.h" -#include "sysemu/block-backend.h" -#include "sysemu/blockdev.h" -#include "hw/qdev-properties.h" -#include "hw/sd/sd.h" -#include "qom/object.h" - -enum { - ENABLE_CMD_TX = (1<<0), - ENABLE_CMD_RX = (1<<1), - ENABLE_DAT_TX = (1<<2), - ENABLE_DAT_RX = (1<<3), -}; - -enum { - PENDING_CMD_TX = (1<<0), - PENDING_CMD_RX = (1<<1), - PENDING_DAT_TX = (1<<2), - PENDING_DAT_RX = (1<<3), -}; - -enum { - START_CMD_TX = (1<<0), - START_DAT_RX = (1<<1), -}; - -enum { - R_CLK2XDIV = 0, - R_ENABLE, - R_PENDING, - R_START, - R_CMD, - R_DAT, - R_MAX -}; - -#define TYPE_MILKYMIST_MEMCARD "milkymist-memcard" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistMemcardState, MILKYMIST_MEMCARD) - -#define TYPE_MILKYMIST_SDBUS "milkymist-sdbus" - -struct MilkymistMemcardState { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - SDBus sdbus; - - int command_write_ptr; - int response_read_ptr; - int response_len; - int ignore_next_cmd; - int enabled; - uint8_t command[6]; - uint8_t response[17]; - uint32_t regs[R_MAX]; -}; - -static void update_pending_bits(MilkymistMemcardState *s) -{ - /* transmits are instantaneous, thus tx pending bits are never set */ - s->regs[R_PENDING] = 0; - /* if rx is enabled the corresponding pending bits are always set */ - if (s->regs[R_ENABLE] & ENABLE_CMD_RX) { - s->regs[R_PENDING] |= PENDING_CMD_RX; - } - if (s->regs[R_ENABLE] & ENABLE_DAT_RX) { - s->regs[R_PENDING] |= PENDING_DAT_RX; - } -} - -static void memcard_sd_command(MilkymistMemcardState *s) -{ - SDRequest req; - - req.cmd = s->command[0] & 0x3f; - req.arg = ldl_be_p(s->command + 1); - req.crc = s->command[5]; - - s->response[0] = req.cmd; - s->response_len = sdbus_do_command(&s->sdbus, &req, s->response + 1); - s->response_read_ptr = 0; - - if (s->response_len == 16) { - /* R2 response */ - s->response[0] = 0x3f; - s->response_len += 1; - } else if (s->response_len == 4) { - /* no crc calculation, insert dummy byte */ - s->response[5] = 0; - s->response_len += 2; - } - - if (req.cmd == 0) { - /* next write is a dummy byte to clock the initialization of the sd - * card */ - s->ignore_next_cmd = 1; - } -} - -static uint64_t memcard_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistMemcardState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_CMD: - if (!s->enabled) { - r = 0xff; - } else { - r = s->response[s->response_read_ptr++]; - if (s->response_read_ptr > s->response_len) { - qemu_log_mask(LOG_GUEST_ERROR, "milkymist_memcard: " - "read more cmd bytes than available: clipping\n"); - s->response_read_ptr = 0; - } - } - break; - case R_DAT: - if (!s->enabled) { - r = 0xffffffff; - } else { - sdbus_read_data(&s->sdbus, &r, sizeof(r)); - be32_to_cpus(&r); - } - break; - case R_CLK2XDIV: - case R_ENABLE: - case R_PENDING: - case R_START: - r = s->regs[addr]; - break; - - default: - qemu_log_mask(LOG_UNIMP, "milkymist_memcard: " - "read access to unknown register 0x%" HWADDR_PRIx "\n", - addr << 2); - break; - } - - trace_milkymist_memcard_memory_read(addr << 2, r); - - return r; -} - -static void memcard_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistMemcardState *s = opaque; - uint32_t val32; - - trace_milkymist_memcard_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_PENDING: - /* clear rx pending bits */ - s->regs[R_PENDING] &= ~(value & (PENDING_CMD_RX | PENDING_DAT_RX)); - update_pending_bits(s); - break; - case R_CMD: - if (!s->enabled) { - break; - } - if (s->ignore_next_cmd) { - s->ignore_next_cmd = 0; - break; - } - s->command[s->command_write_ptr] = value & 0xff; - s->command_write_ptr = (s->command_write_ptr + 1) % 6; - if (s->command_write_ptr == 0) { - memcard_sd_command(s); - } - break; - case R_DAT: - if (!s->enabled) { - break; - } - val32 = cpu_to_be32(value); - sdbus_write_data(&s->sdbus, &val32, sizeof(val32)); - break; - case R_ENABLE: - s->regs[addr] = value; - update_pending_bits(s); - break; - case R_CLK2XDIV: - case R_START: - s->regs[addr] = value; - break; - - default: - qemu_log_mask(LOG_UNIMP, "milkymist_memcard: " - "write access to unknown register 0x%" HWADDR_PRIx " " - "(value 0x%" PRIx64 ")\n", addr << 2, value); - break; - } -} - -static const MemoryRegionOps memcard_mmio_ops = { - .read = memcard_read, - .write = memcard_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void milkymist_memcard_reset(DeviceState *d) -{ - MilkymistMemcardState *s = MILKYMIST_MEMCARD(d); - int i; - - s->command_write_ptr = 0; - s->response_read_ptr = 0; - s->response_len = 0; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } -} - -static void milkymist_memcard_set_readonly(DeviceState *dev, bool level) -{ - qemu_log_mask(LOG_UNIMP, - "milkymist_memcard: read-only mode not supported\n"); -} - -static void milkymist_memcard_set_inserted(DeviceState *dev, bool level) -{ - MilkymistMemcardState *s = MILKYMIST_MEMCARD(dev); - - s->enabled = !!level; -} - -static void milkymist_memcard_init(Object *obj) -{ - MilkymistMemcardState *s = MILKYMIST_MEMCARD(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); - - memory_region_init_io(&s->regs_region, OBJECT(s), &memcard_mmio_ops, s, - "milkymist-memcard", R_MAX * 4); - sysbus_init_mmio(dev, &s->regs_region); - - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS, - DEVICE(obj), "sd-bus"); -} - -static const VMStateDescription vmstate_milkymist_memcard = { - .name = "milkymist-memcard", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_INT32(command_write_ptr, MilkymistMemcardState), - VMSTATE_INT32(response_read_ptr, MilkymistMemcardState), - VMSTATE_INT32(response_len, MilkymistMemcardState), - VMSTATE_INT32(ignore_next_cmd, MilkymistMemcardState), - VMSTATE_INT32(enabled, MilkymistMemcardState), - VMSTATE_UINT8_ARRAY(command, MilkymistMemcardState, 6), - VMSTATE_UINT8_ARRAY(response, MilkymistMemcardState, 17), - VMSTATE_UINT32_ARRAY(regs, MilkymistMemcardState, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static void milkymist_memcard_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = milkymist_memcard_reset; - dc->vmsd = &vmstate_milkymist_memcard; - /* Reason: output IRQs should be wired up */ - dc->user_creatable = false; -} - -static const TypeInfo milkymist_memcard_info = { - .name = TYPE_MILKYMIST_MEMCARD, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistMemcardState), - .instance_init = milkymist_memcard_init, - .class_init = milkymist_memcard_class_init, -}; - -static void milkymist_sdbus_class_init(ObjectClass *klass, void *data) -{ - SDBusClass *sbc = SD_BUS_CLASS(klass); - - sbc->set_inserted = milkymist_memcard_set_inserted; - sbc->set_readonly = milkymist_memcard_set_readonly; -} - -static const TypeInfo milkymist_sdbus_info = { - .name = TYPE_MILKYMIST_SDBUS, - .parent = TYPE_SD_BUS, - .instance_size = sizeof(SDBus), - .class_init = milkymist_sdbus_class_init, -}; - -static void milkymist_memcard_register_types(void) -{ - type_register_static(&milkymist_memcard_info); - type_register_static(&milkymist_sdbus_info); -} - -type_init(milkymist_memcard_register_types) diff --git a/hw/sd/npcm7xx_sdhci.c b/hw/sd/npcm7xx_sdhci.c new file mode 100644 index 00000000000..ef503365dfb --- /dev/null +++ b/hw/sd/npcm7xx_sdhci.c @@ -0,0 +1,182 @@ +/* + * NPCM7xx SD-3.0 / eMMC-4.51 Host Controller + * + * Copyright (c) 2021 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" + +#include "hw/sd/npcm7xx_sdhci.h" +#include "migration/vmstate.h" +#include "sdhci-internal.h" +#include "qemu/log.h" + +static uint64_t npcm7xx_sdhci_read(void *opaque, hwaddr addr, unsigned int size) +{ + NPCM7xxSDHCIState *s = opaque; + uint64_t val = 0; + + switch (addr) { + case NPCM7XX_PRSTVALS_0: + case NPCM7XX_PRSTVALS_1: + case NPCM7XX_PRSTVALS_2: + case NPCM7XX_PRSTVALS_3: + case NPCM7XX_PRSTVALS_4: + case NPCM7XX_PRSTVALS_5: + val = s->regs.prstvals[(addr - NPCM7XX_PRSTVALS_0) / 2]; + break; + case NPCM7XX_BOOTTOCTRL: + val = s->regs.boottoctrl; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "SDHCI read of nonexistent reg: 0x%02" + HWADDR_PRIx, addr); + break; + } + + return val; +} + +static void npcm7xx_sdhci_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int size) +{ + NPCM7xxSDHCIState *s = opaque; + + switch (addr) { + case NPCM7XX_BOOTTOCTRL: + s->regs.boottoctrl = val; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "SDHCI write of nonexistent reg: 0x%02" + HWADDR_PRIx, addr); + break; + } +} + +static bool npcm7xx_sdhci_check_mem_op(void *opaque, hwaddr addr, + unsigned size, bool is_write, + MemTxAttrs attrs) +{ + switch (addr) { + case NPCM7XX_PRSTVALS_0: + case NPCM7XX_PRSTVALS_1: + case NPCM7XX_PRSTVALS_2: + case NPCM7XX_PRSTVALS_3: + case NPCM7XX_PRSTVALS_4: + case NPCM7XX_PRSTVALS_5: + /* RO Word */ + return !is_write && size == 2; + case NPCM7XX_BOOTTOCTRL: + /* R/W Dword */ + return size == 4; + default: + return false; + } +} + +static const MemoryRegionOps npcm7xx_sdhci_ops = { + .read = npcm7xx_sdhci_read, + .write = npcm7xx_sdhci_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + .unaligned = false, + .accepts = npcm7xx_sdhci_check_mem_op, + }, +}; + +static void npcm7xx_sdhci_realize(DeviceState *dev, Error **errp) +{ + NPCM7xxSDHCIState *s = NPCM7XX_SDHCI(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + SysBusDevice *sbd_sdhci = SYS_BUS_DEVICE(&s->sdhci); + + memory_region_init(&s->container, OBJECT(s), + "npcm7xx.sdhci-container", 0x1000); + sysbus_init_mmio(sbd, &s->container); + + memory_region_init_io(&s->iomem, OBJECT(s), &npcm7xx_sdhci_ops, s, + TYPE_NPCM7XX_SDHCI, NPCM7XX_SDHCI_REGSIZE); + memory_region_add_subregion_overlap(&s->container, NPCM7XX_PRSTVALS, + &s->iomem, 1); + + sysbus_realize(sbd_sdhci, errp); + memory_region_add_subregion(&s->container, 0, + sysbus_mmio_get_region(sbd_sdhci, 0)); + + /* propagate irq and "sd-bus" from generic-sdhci */ + sysbus_pass_irq(sbd, sbd_sdhci); + s->bus = qdev_get_child_bus(DEVICE(sbd_sdhci), "sd-bus"); + + /* Set the read only preset values. */ + memset(s->regs.prstvals, 0, sizeof(s->regs.prstvals)); + s->regs.prstvals[0] = NPCM7XX_PRSTVALS_0_RESET; + s->regs.prstvals[1] = NPCM7XX_PRSTVALS_1_RESET; + s->regs.prstvals[3] = NPCM7XX_PRSTVALS_3_RESET; +} + +static void npcm7xx_sdhci_reset(DeviceState *dev) +{ + NPCM7xxSDHCIState *s = NPCM7XX_SDHCI(dev); + device_cold_reset(DEVICE(&s->sdhci)); + s->regs.boottoctrl = 0; + + s->sdhci.prnsts = NPCM7XX_PRSNTS_RESET; + s->sdhci.blkgap = NPCM7XX_BLKGAP_RESET; + s->sdhci.capareg = NPCM7XX_CAPAB_RESET; + s->sdhci.maxcurr = NPCM7XX_MAXCURR_RESET; + s->sdhci.version = NPCM7XX_HCVER_RESET; +} + +static const VMStateDescription vmstate_npcm7xx_sdhci = { + .name = TYPE_NPCM7XX_SDHCI, + .version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(regs.boottoctrl, NPCM7xxSDHCIState), + VMSTATE_END_OF_LIST(), + }, +}; + +static void npcm7xx_sdhci_class_init(ObjectClass *classp, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(classp); + + dc->desc = "NPCM7xx SD/eMMC Host Controller"; + dc->realize = npcm7xx_sdhci_realize; + dc->reset = npcm7xx_sdhci_reset; + dc->vmsd = &vmstate_npcm7xx_sdhci; +} + +static void npcm7xx_sdhci_instance_init(Object *obj) +{ + NPCM7xxSDHCIState *s = NPCM7XX_SDHCI(obj); + + object_initialize_child(OBJECT(s), "generic-sdhci", &s->sdhci, + TYPE_SYSBUS_SDHCI); +} + +static TypeInfo npcm7xx_sdhci_info = { + .name = TYPE_NPCM7XX_SDHCI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(NPCM7xxSDHCIState), + .instance_init = npcm7xx_sdhci_instance_init, + .class_init = npcm7xx_sdhci_class_init, +}; + +static void npcm7xx_sdhci_register_types(void) +{ + type_register_static(&npcm7xx_sdhci_info); +} + +type_init(npcm7xx_sdhci_register_types) diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c index 1f946908fe1..b67def63813 100644 --- a/hw/sd/omap_mmc.c +++ b/hw/sd/omap_mmc.c @@ -318,7 +318,7 @@ void omap_mmc_reset(struct omap_mmc_s *host) * into any bus, and we must reset it manually. When omap_mmc is * QOMified this must move into the QOM reset function. */ - device_legacy_reset(DEVICE(host->card)); + device_cold_reset(DEVICE(host->card)); } static uint64_t omap_mmc_read(void *opaque, hwaddr offset, diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c index 960f1550981..5e554bd4676 100644 --- a/hw/sd/pl181.c +++ b/hw/sd/pl181.c @@ -506,8 +506,7 @@ static void pl181_init(Object *obj) qdev_init_gpio_out_named(dev, &s->card_readonly, "card-read-only", 1); qdev_init_gpio_out_named(dev, &s->card_inserted, "card-inserted", 1); - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), - TYPE_PL181_BUS, dev, "sd-bus"); + qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_PL181_BUS, dev, "sd-bus"); } static void pl181_class_init(ObjectClass *klass, void *data) diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c index 3dd2fc7a83f..124fbf8bbd4 100644 --- a/hw/sd/pxa2xx_mmci.c +++ b/hw/sd/pxa2xx_mmci.c @@ -560,8 +560,8 @@ static void pxa2xx_mmci_instance_init(Object *obj) qdev_init_gpio_out_named(dev, &s->rx_dma, "rx-dma", 1); qdev_init_gpio_out_named(dev, &s->tx_dma, "tx-dma", 1); - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), - TYPE_PXA2XX_MMCI_BUS, DEVICE(obj), "sd-bus"); + qbus_init(&s->sdbus, sizeof(s->sdbus), + TYPE_PXA2XX_MMCI_BUS, DEVICE(obj), "sd-bus"); } static void pxa2xx_mmci_class_init(ObjectClass *klass, void *data) diff --git a/hw/sd/sd.c b/hw/sd/sd.c index 282d39a7042..bb5dbff68c0 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -821,8 +821,15 @@ static uint32_t sd_wpbits(SDState *sd, uint64_t addr) wpnum = sd_addr_to_wpnum(addr); for (i = 0; i < 32; i++, wpnum++, addr += WPGROUP_SIZE) { + if (addr >= sd->size) { + /* + * If the addresses of the last groups are outside the valid range, + * then the corresponding write protection bits shall be set to 0. + */ + continue; + } assert(wpnum < sd->wpgrps_size); - if (addr < sd->size && test_bit(wpnum, sd->wp_groups)) { + if (test_bit(wpnum, sd->wp_groups)) { ret |= (1 << i); } } @@ -937,6 +944,19 @@ static void sd_lock_command(SDState *sd) sd->card_status &= ~CARD_IS_LOCKED; } +static bool address_in_range(SDState *sd, const char *desc, + uint64_t addr, uint32_t length) +{ + if (addr + length > sd->size) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s offset %"PRIu64" > card %"PRIu64" [%%%u]\n", + desc, addr, sd->size, length); + sd->card_status |= ADDRESS_ERROR; + return false; + } + return true; +} + static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) { uint32_t rca = 0x0000; @@ -1218,8 +1238,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) switch (sd->state) { case sd_transfer_state: - if (addr + sd->blk_len > sd->size) { - sd->card_status |= ADDRESS_ERROR; + if (!address_in_range(sd, "READ_BLOCK", addr, sd->blk_len)) { return sd_r1; } @@ -1264,8 +1283,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) switch (sd->state) { case sd_transfer_state: - if (addr + sd->blk_len > sd->size) { - sd->card_status |= ADDRESS_ERROR; + if (!address_in_range(sd, "WRITE_BLOCK", addr, sd->blk_len)) { return sd_r1; } @@ -1325,8 +1343,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) switch (sd->state) { case sd_transfer_state: - if (addr >= sd->size) { - sd->card_status |= ADDRESS_ERROR; + if (!address_in_range(sd, "SET_WRITE_PROT", addr, 1)) { return sd_r1b; } @@ -1348,8 +1365,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) switch (sd->state) { case sd_transfer_state: - if (addr >= sd->size) { - sd->card_status |= ADDRESS_ERROR; + if (!address_in_range(sd, "CLR_WRITE_PROT", addr, 1)) { return sd_r1b; } @@ -1371,6 +1387,11 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) switch (sd->state) { case sd_transfer_state: + if (!address_in_range(sd, "SEND_WRITE_PROT", + req.arg, sd->blk_len)) { + return sd_r1; + } + sd->state = sd_sendingdata_state; *(uint32_t *) sd->data = sd_wpbits(sd, req.arg); sd->data_start = addr; @@ -1504,7 +1525,8 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) return sd_illegal; } - qemu_log_mask(LOG_GUEST_ERROR, "SD: CMD%i in a wrong state\n", req.cmd); + qemu_log_mask(LOG_GUEST_ERROR, "SD: CMD%i in a wrong state: %s\n", + req.cmd, sd_state_name(sd->state)); return sd_illegal; } @@ -1825,8 +1847,8 @@ void sd_write_byte(SDState *sd, uint8_t value) case 25: /* CMD25: WRITE_MULTIPLE_BLOCK */ if (sd->data_offset == 0) { /* Start of the block - let's check the address is valid */ - if (sd->data_start + sd->blk_len > sd->size) { - sd->card_status |= ADDRESS_ERROR; + if (!address_in_range(sd, "WRITE_MULTIPLE_BLOCK", + sd->data_start, sd->blk_len)) { break; } if (sd->size <= SDSC_MAX_CAPACITY) { @@ -1998,8 +2020,8 @@ uint8_t sd_read_byte(SDState *sd) case 18: /* CMD18: READ_MULTIPLE_BLOCK */ if (sd->data_offset == 0) { - if (sd->data_start + io_len > sd->size) { - sd->card_status |= ADDRESS_ERROR; + if (!address_in_range(sd, "READ_MULTIPLE_BLOCK", + sd->data_start, io_len)) { return 0x00; } BLK_READ_BLOCK(sd->data_start, io_len); diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 5b8678110b0..c9dc065cc52 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -1337,8 +1337,7 @@ static void sdhci_init_readonly_registers(SDHCIState *s, Error **errp) void sdhci_initfn(SDHCIState *s) { - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), - TYPE_SDHCI_BUS, DEVICE(s), "sd-bus"); + qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_SDHCI_BUS, DEVICE(s), "sd-bus"); s->insert_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_raise_insertion_irq, s); s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_data_transfer, s); diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c index 97ee58e20cf..e60854eeefc 100644 --- a/hw/sd/ssi-sd.c +++ b/hw/sd/ssi-sd.c @@ -373,8 +373,7 @@ static void ssi_sd_realize(SSIPeripheral *d, Error **errp) DeviceState *carddev; DriveInfo *dinfo; - qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS, - DEVICE(d), "sd-bus"); + qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS, DEVICE(d), "sd-bus"); /* Create and plug in the sd card */ /* FIXME use a qdev drive property instead of drive_get_next() */ diff --git a/hw/sd/trace-events b/hw/sd/trace-events index 4140e485403..94a00557b26 100644 --- a/hw/sd/trace-events +++ b/hw/sd/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # allwinner-sdhost.c allwinner_sdhost_set_inserted(bool inserted) "inserted %u" @@ -55,10 +55,6 @@ sdcard_write_data(const char *proto, const char *cmd_desc, uint8_t cmd, uint8_t sdcard_read_data(const char *proto, const char *cmd_desc, uint8_t cmd, uint32_t length) "%s %20s/ CMD%02d len %" PRIu32 sdcard_set_voltage(uint16_t millivolts) "%u mV" -# milkymist-memcard.c -milkymist_memcard_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_memcard_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" - # pxa2xx_mmci.c pxa2xx_mmci_read(uint8_t size, uint32_t addr, uint32_t value) "size %d addr 0x%02x value 0x%08x" pxa2xx_mmci_write(uint8_t size, uint32_t addr, uint32_t value) "size %d addr 0x%02x value 0x%08x" @@ -72,3 +68,7 @@ pl181_fifo_push(uint32_t data) "FIFO push 0x%08" PRIx32 pl181_fifo_pop(uint32_t data) "FIFO pop 0x%08" PRIx32 pl181_fifo_transfer_complete(void) "FIFO transfer complete" pl181_data_engine_idle(void) "data engine idle" + +# aspeed_sdhci.c +aspeed_sdhci_read(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64 +aspeed_sdhci_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64 diff --git a/hw/sensor/Kconfig b/hw/sensor/Kconfig new file mode 100644 index 00000000000..9c8a049b068 --- /dev/null +++ b/hw/sensor/Kconfig @@ -0,0 +1,23 @@ +config TMP105 + bool + depends on I2C + +config TMP421 + bool + depends on I2C + +config DPS310 + bool + depends on I2C + +config EMC141X + bool + depends on I2C + +config ADM1272 + bool + depends on I2C + +config MAX34451 + bool + depends on I2C diff --git a/hw/sensor/adm1272.c b/hw/sensor/adm1272.c new file mode 100644 index 00000000000..7310c769be2 --- /dev/null +++ b/hw/sensor/adm1272.c @@ -0,0 +1,543 @@ +/* + * Analog Devices ADM1272 High Voltage Positive Hot Swap Controller and Digital + * Power Monitor with PMBus + * + * Copyright 2021 Google LLC + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include +#include "hw/i2c/pmbus_device.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qemu/log.h" +#include "qemu/module.h" + +#define TYPE_ADM1272 "adm1272" +#define ADM1272(obj) OBJECT_CHECK(ADM1272State, (obj), TYPE_ADM1272) + +#define ADM1272_RESTART_TIME 0xCC +#define ADM1272_MFR_PEAK_IOUT 0xD0 +#define ADM1272_MFR_PEAK_VIN 0xD1 +#define ADM1272_MFR_PEAK_VOUT 0xD2 +#define ADM1272_MFR_PMON_CONTROL 0xD3 +#define ADM1272_MFR_PMON_CONFIG 0xD4 +#define ADM1272_MFR_ALERT1_CONFIG 0xD5 +#define ADM1272_MFR_ALERT2_CONFIG 0xD6 +#define ADM1272_MFR_PEAK_TEMPERATURE 0xD7 +#define ADM1272_MFR_DEVICE_CONFIG 0xD8 +#define ADM1272_MFR_POWER_CYCLE 0xD9 +#define ADM1272_MFR_PEAK_PIN 0xDA +#define ADM1272_MFR_READ_PIN_EXT 0xDB +#define ADM1272_MFR_READ_EIN_EXT 0xDC + +#define ADM1272_HYSTERESIS_LOW 0xF2 +#define ADM1272_HYSTERESIS_HIGH 0xF3 +#define ADM1272_STATUS_HYSTERESIS 0xF4 +#define ADM1272_STATUS_GPIO 0xF5 +#define ADM1272_STRT_UP_IOUT_LIM 0xF6 + +/* Defaults */ +#define ADM1272_OPERATION_DEFAULT 0x80 +#define ADM1272_CAPABILITY_DEFAULT 0xB0 +#define ADM1272_CAPABILITY_NO_PEC 0x30 +#define ADM1272_DIRECT_MODE 0x40 +#define ADM1272_HIGH_LIMIT_DEFAULT 0x0FFF +#define ADM1272_PIN_OP_DEFAULT 0x7FFF +#define ADM1272_PMBUS_REVISION_DEFAULT 0x22 +#define ADM1272_MFR_ID_DEFAULT "ADI" +#define ADM1272_MODEL_DEFAULT "ADM1272-A1" +#define ADM1272_MFR_DEFAULT_REVISION "25" +#define ADM1272_DEFAULT_DATE "160301" +#define ADM1272_RESTART_TIME_DEFAULT 0x64 +#define ADM1272_PMON_CONTROL_DEFAULT 0x1 +#define ADM1272_PMON_CONFIG_DEFAULT 0x3F35 +#define ADM1272_DEVICE_CONFIG_DEFAULT 0x8 +#define ADM1272_HYSTERESIS_HIGH_DEFAULT 0xFFFF +#define ADM1272_STRT_UP_IOUT_LIM_DEFAULT 0x000F +#define ADM1272_VOLT_DEFAULT 12000 +#define ADM1272_IOUT_DEFAULT 25000 +#define ADM1272_PWR_DEFAULT 300 /* 12V 25A */ +#define ADM1272_SHUNT 300 /* micro-ohms */ +#define ADM1272_VOLTAGE_COEFF_DEFAULT 1 +#define ADM1272_CURRENT_COEFF_DEFAULT 3 +#define ADM1272_PWR_COEFF_DEFAULT 7 +#define ADM1272_IOUT_OFFSET 0x5000 +#define ADM1272_IOUT_OFFSET 0x5000 + + +typedef struct ADM1272State { + PMBusDevice parent; + + uint64_t ein_ext; + uint32_t pin_ext; + uint8_t restart_time; + + uint16_t peak_vin; + uint16_t peak_vout; + uint16_t peak_iout; + uint16_t peak_temperature; + uint16_t peak_pin; + + uint8_t pmon_control; + uint16_t pmon_config; + uint16_t alert1_config; + uint16_t alert2_config; + uint16_t device_config; + + uint16_t hysteresis_low; + uint16_t hysteresis_high; + uint8_t status_hysteresis; + uint8_t status_gpio; + + uint16_t strt_up_iout_lim; + +} ADM1272State; + +static const PMBusCoefficients adm1272_coefficients[] = { + [0] = { 6770, 0, -2 }, /* voltage, vrange 60V */ + [1] = { 4062, 0, -2 }, /* voltage, vrange 100V */ + [2] = { 1326, 20480, -1 }, /* current, vsense range 15mV */ + [3] = { 663, 20480, -1 }, /* current, vsense range 30mV */ + [4] = { 3512, 0, -2 }, /* power, vrange 60V, irange 15mV */ + [5] = { 21071, 0, -3 }, /* power, vrange 100V, irange 15mV */ + [6] = { 17561, 0, -3 }, /* power, vrange 60V, irange 30mV */ + [7] = { 10535, 0, -3 }, /* power, vrange 100V, irange 30mV */ + [8] = { 42, 31871, -1 }, /* temperature */ +}; + +static void adm1272_check_limits(ADM1272State *s) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(s); + + pmbus_check_limits(pmdev); + + if (pmdev->pages[0].read_vout > s->peak_vout) { + s->peak_vout = pmdev->pages[0].read_vout; + } + + if (pmdev->pages[0].read_vin > s->peak_vin) { + s->peak_vin = pmdev->pages[0].read_vin; + } + + if (pmdev->pages[0].read_iout > s->peak_iout) { + s->peak_iout = pmdev->pages[0].read_iout; + } + + if (pmdev->pages[0].read_temperature_1 > s->peak_temperature) { + s->peak_temperature = pmdev->pages[0].read_temperature_1; + } + + if (pmdev->pages[0].read_pin > s->peak_pin) { + s->peak_pin = pmdev->pages[0].read_pin; + } +} + +static uint16_t adm1272_millivolts_to_direct(uint32_t value) +{ + PMBusCoefficients c = adm1272_coefficients[ADM1272_VOLTAGE_COEFF_DEFAULT]; + c.b = c.b * 1000; + c.R = c.R - 3; + return pmbus_data2direct_mode(c, value); +} + +static uint32_t adm1272_direct_to_millivolts(uint16_t value) +{ + PMBusCoefficients c = adm1272_coefficients[ADM1272_VOLTAGE_COEFF_DEFAULT]; + c.b = c.b * 1000; + c.R = c.R - 3; + return pmbus_direct_mode2data(c, value); +} + +static uint16_t adm1272_milliamps_to_direct(uint32_t value) +{ + PMBusCoefficients c = adm1272_coefficients[ADM1272_CURRENT_COEFF_DEFAULT]; + /* Y = (m * r_sense * x - b) * 10^R */ + c.m = c.m * ADM1272_SHUNT / 1000; /* micro-ohms */ + c.b = c.b * 1000; + c.R = c.R - 3; + return pmbus_data2direct_mode(c, value); +} + +static uint32_t adm1272_direct_to_milliamps(uint16_t value) +{ + PMBusCoefficients c = adm1272_coefficients[ADM1272_CURRENT_COEFF_DEFAULT]; + c.m = c.m * ADM1272_SHUNT / 1000; + c.b = c.b * 1000; + c.R = c.R - 3; + return pmbus_direct_mode2data(c, value); +} + +static uint16_t adm1272_watts_to_direct(uint32_t value) +{ + PMBusCoefficients c = adm1272_coefficients[ADM1272_PWR_COEFF_DEFAULT]; + c.m = c.m * ADM1272_SHUNT / 1000; + return pmbus_data2direct_mode(c, value); +} + +static uint32_t adm1272_direct_to_watts(uint16_t value) +{ + PMBusCoefficients c = adm1272_coefficients[ADM1272_PWR_COEFF_DEFAULT]; + c.m = c.m * ADM1272_SHUNT / 1000; + return pmbus_direct_mode2data(c, value); +} + +static void adm1272_exit_reset(Object *obj) +{ + ADM1272State *s = ADM1272(obj); + PMBusDevice *pmdev = PMBUS_DEVICE(obj); + + pmdev->page = 0; + pmdev->pages[0].operation = ADM1272_OPERATION_DEFAULT; + + + pmdev->capability = ADM1272_CAPABILITY_NO_PEC; + pmdev->pages[0].revision = ADM1272_PMBUS_REVISION_DEFAULT; + pmdev->pages[0].vout_mode = ADM1272_DIRECT_MODE; + pmdev->pages[0].vout_ov_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT; + pmdev->pages[0].vout_uv_warn_limit = 0; + pmdev->pages[0].iout_oc_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT; + pmdev->pages[0].ot_fault_limit = ADM1272_HIGH_LIMIT_DEFAULT; + pmdev->pages[0].ot_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT; + pmdev->pages[0].vin_ov_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT; + pmdev->pages[0].vin_uv_warn_limit = 0; + pmdev->pages[0].pin_op_warn_limit = ADM1272_PIN_OP_DEFAULT; + + pmdev->pages[0].status_word = 0; + pmdev->pages[0].status_vout = 0; + pmdev->pages[0].status_iout = 0; + pmdev->pages[0].status_input = 0; + pmdev->pages[0].status_temperature = 0; + pmdev->pages[0].status_mfr_specific = 0; + + pmdev->pages[0].read_vin + = adm1272_millivolts_to_direct(ADM1272_VOLT_DEFAULT); + pmdev->pages[0].read_vout + = adm1272_millivolts_to_direct(ADM1272_VOLT_DEFAULT); + pmdev->pages[0].read_iout + = adm1272_milliamps_to_direct(ADM1272_IOUT_DEFAULT); + pmdev->pages[0].read_temperature_1 = 0; + pmdev->pages[0].read_pin = adm1272_watts_to_direct(ADM1272_PWR_DEFAULT); + pmdev->pages[0].revision = ADM1272_PMBUS_REVISION_DEFAULT; + pmdev->pages[0].mfr_id = ADM1272_MFR_ID_DEFAULT; + pmdev->pages[0].mfr_model = ADM1272_MODEL_DEFAULT; + pmdev->pages[0].mfr_revision = ADM1272_MFR_DEFAULT_REVISION; + pmdev->pages[0].mfr_date = ADM1272_DEFAULT_DATE; + + s->pin_ext = 0; + s->ein_ext = 0; + s->restart_time = ADM1272_RESTART_TIME_DEFAULT; + + s->peak_vin = 0; + s->peak_vout = 0; + s->peak_iout = 0; + s->peak_temperature = 0; + s->peak_pin = 0; + + s->pmon_control = ADM1272_PMON_CONTROL_DEFAULT; + s->pmon_config = ADM1272_PMON_CONFIG_DEFAULT; + s->alert1_config = 0; + s->alert2_config = 0; + s->device_config = ADM1272_DEVICE_CONFIG_DEFAULT; + + s->hysteresis_low = 0; + s->hysteresis_high = ADM1272_HYSTERESIS_HIGH_DEFAULT; + s->status_hysteresis = 0; + s->status_gpio = 0; + + s->strt_up_iout_lim = ADM1272_STRT_UP_IOUT_LIM_DEFAULT; +} + +static uint8_t adm1272_read_byte(PMBusDevice *pmdev) +{ + ADM1272State *s = ADM1272(pmdev); + + switch (pmdev->code) { + case ADM1272_RESTART_TIME: + pmbus_send8(pmdev, s->restart_time); + break; + + case ADM1272_MFR_PEAK_IOUT: + pmbus_send16(pmdev, s->peak_iout); + break; + + case ADM1272_MFR_PEAK_VIN: + pmbus_send16(pmdev, s->peak_vin); + break; + + case ADM1272_MFR_PEAK_VOUT: + pmbus_send16(pmdev, s->peak_vout); + break; + + case ADM1272_MFR_PMON_CONTROL: + pmbus_send8(pmdev, s->pmon_control); + break; + + case ADM1272_MFR_PMON_CONFIG: + pmbus_send16(pmdev, s->pmon_config); + break; + + case ADM1272_MFR_ALERT1_CONFIG: + pmbus_send16(pmdev, s->alert1_config); + break; + + case ADM1272_MFR_ALERT2_CONFIG: + pmbus_send16(pmdev, s->alert2_config); + break; + + case ADM1272_MFR_PEAK_TEMPERATURE: + pmbus_send16(pmdev, s->peak_temperature); + break; + + case ADM1272_MFR_DEVICE_CONFIG: + pmbus_send16(pmdev, s->device_config); + break; + + case ADM1272_MFR_PEAK_PIN: + pmbus_send16(pmdev, s->peak_pin); + break; + + case ADM1272_MFR_READ_PIN_EXT: + pmbus_send32(pmdev, s->pin_ext); + break; + + case ADM1272_MFR_READ_EIN_EXT: + pmbus_send64(pmdev, s->ein_ext); + break; + + case ADM1272_HYSTERESIS_LOW: + pmbus_send16(pmdev, s->hysteresis_low); + break; + + case ADM1272_HYSTERESIS_HIGH: + pmbus_send16(pmdev, s->hysteresis_high); + break; + + case ADM1272_STATUS_HYSTERESIS: + pmbus_send16(pmdev, s->status_hysteresis); + break; + + case ADM1272_STATUS_GPIO: + pmbus_send16(pmdev, s->status_gpio); + break; + + case ADM1272_STRT_UP_IOUT_LIM: + pmbus_send16(pmdev, s->strt_up_iout_lim); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: reading from unsupported register: 0x%02x\n", + __func__, pmdev->code); + return 0xFF; + break; + } + + return 0; +} + +static int adm1272_write_data(PMBusDevice *pmdev, const uint8_t *buf, + uint8_t len) +{ + ADM1272State *s = ADM1272(pmdev); + + if (len == 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__); + return -1; + } + + pmdev->code = buf[0]; /* PMBus command code */ + + if (len == 1) { + return 0; + } + + /* Exclude command code from buffer */ + buf++; + len--; + + switch (pmdev->code) { + + case ADM1272_RESTART_TIME: + s->restart_time = pmbus_receive8(pmdev); + break; + + case ADM1272_MFR_PMON_CONTROL: + s->pmon_control = pmbus_receive8(pmdev); + break; + + case ADM1272_MFR_PMON_CONFIG: + s->pmon_config = pmbus_receive16(pmdev); + break; + + case ADM1272_MFR_ALERT1_CONFIG: + s->alert1_config = pmbus_receive16(pmdev); + break; + + case ADM1272_MFR_ALERT2_CONFIG: + s->alert2_config = pmbus_receive16(pmdev); + break; + + case ADM1272_MFR_DEVICE_CONFIG: + s->device_config = pmbus_receive16(pmdev); + break; + + case ADM1272_MFR_POWER_CYCLE: + adm1272_exit_reset((Object *)s); + break; + + case ADM1272_HYSTERESIS_LOW: + s->hysteresis_low = pmbus_receive16(pmdev); + break; + + case ADM1272_HYSTERESIS_HIGH: + s->hysteresis_high = pmbus_receive16(pmdev); + break; + + case ADM1272_STRT_UP_IOUT_LIM: + s->strt_up_iout_lim = pmbus_receive16(pmdev); + adm1272_check_limits(s); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: writing to unsupported register: 0x%02x\n", + __func__, pmdev->code); + break; + } + return 0; +} + +static void adm1272_get(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + uint16_t value; + + if (strcmp(name, "vin") == 0 || strcmp(name, "vout") == 0) { + value = adm1272_direct_to_millivolts(*(uint16_t *)opaque); + } else if (strcmp(name, "iout") == 0) { + value = adm1272_direct_to_milliamps(*(uint16_t *)opaque); + } else if (strcmp(name, "pin") == 0) { + value = adm1272_direct_to_watts(*(uint16_t *)opaque); + } else { + value = *(uint16_t *)opaque; + } + + visit_type_uint16(v, name, &value, errp); +} + +static void adm1272_set(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + ADM1272State *s = ADM1272(obj); + uint16_t *internal = opaque; + uint16_t value; + + if (!visit_type_uint16(v, name, &value, errp)) { + return; + } + + if (strcmp(name, "vin") == 0 || strcmp(name, "vout") == 0) { + *internal = adm1272_millivolts_to_direct(value); + } else if (strcmp(name, "iout") == 0) { + *internal = adm1272_milliamps_to_direct(value); + } else if (strcmp(name, "pin") == 0) { + *internal = adm1272_watts_to_direct(value); + } else { + *internal = value; + } + + adm1272_check_limits(s); +} + +static const VMStateDescription vmstate_adm1272 = { + .name = "ADM1272", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]){ + VMSTATE_PMBUS_DEVICE(parent, ADM1272State), + VMSTATE_UINT64(ein_ext, ADM1272State), + VMSTATE_UINT32(pin_ext, ADM1272State), + VMSTATE_UINT8(restart_time, ADM1272State), + + VMSTATE_UINT16(peak_vin, ADM1272State), + VMSTATE_UINT16(peak_vout, ADM1272State), + VMSTATE_UINT16(peak_iout, ADM1272State), + VMSTATE_UINT16(peak_temperature, ADM1272State), + VMSTATE_UINT16(peak_pin, ADM1272State), + + VMSTATE_UINT8(pmon_control, ADM1272State), + VMSTATE_UINT16(pmon_config, ADM1272State), + VMSTATE_UINT16(alert1_config, ADM1272State), + VMSTATE_UINT16(alert2_config, ADM1272State), + VMSTATE_UINT16(device_config, ADM1272State), + + VMSTATE_UINT16(hysteresis_low, ADM1272State), + VMSTATE_UINT16(hysteresis_high, ADM1272State), + VMSTATE_UINT8(status_hysteresis, ADM1272State), + VMSTATE_UINT8(status_gpio, ADM1272State), + + VMSTATE_UINT16(strt_up_iout_lim, ADM1272State), + VMSTATE_END_OF_LIST() + } +}; + +static void adm1272_init(Object *obj) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(obj); + uint64_t flags = PB_HAS_VOUT_MODE | PB_HAS_VOUT | PB_HAS_VIN | PB_HAS_IOUT | + PB_HAS_PIN | PB_HAS_TEMPERATURE | PB_HAS_MFR_INFO; + + pmbus_page_config(pmdev, 0, flags); + + object_property_add(obj, "vin", "uint16", + adm1272_get, + adm1272_set, NULL, &pmdev->pages[0].read_vin); + + object_property_add(obj, "vout", "uint16", + adm1272_get, + adm1272_set, NULL, &pmdev->pages[0].read_vout); + + object_property_add(obj, "iout", "uint16", + adm1272_get, + adm1272_set, NULL, &pmdev->pages[0].read_iout); + + object_property_add(obj, "pin", "uint16", + adm1272_get, + adm1272_set, NULL, &pmdev->pages[0].read_pin); + +} + +static void adm1272_class_init(ObjectClass *klass, void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + PMBusDeviceClass *k = PMBUS_DEVICE_CLASS(klass); + + dc->desc = "Analog Devices ADM1272 Hot Swap controller"; + dc->vmsd = &vmstate_adm1272; + k->write_data = adm1272_write_data; + k->receive_byte = adm1272_read_byte; + k->device_num_pages = 1; + + rc->phases.exit = adm1272_exit_reset; +} + +static const TypeInfo adm1272_info = { + .name = TYPE_ADM1272, + .parent = TYPE_PMBUS_DEVICE, + .instance_size = sizeof(ADM1272State), + .instance_init = adm1272_init, + .class_init = adm1272_class_init, +}; + +static void adm1272_register_types(void) +{ + type_register_static(&adm1272_info); +} + +type_init(adm1272_register_types) diff --git a/hw/sensor/dps310.c b/hw/sensor/dps310.c new file mode 100644 index 00000000000..d60a18ac41b --- /dev/null +++ b/hw/sensor/dps310.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2017-2021 Joel Stanley , IBM Corporation + * + * Infineon DPS310 temperature and humidity sensor + * + * https://www.infineon.com/cms/en/product/sensor/pressure-sensors/pressure-sensors-for-iot/dps310/ + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/hw.h" +#include "hw/i2c/i2c.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "migration/vmstate.h" + +#define NUM_REGISTERS 0x33 + +typedef struct DPS310State { + /*< private >*/ + I2CSlave i2c; + + /*< public >*/ + uint8_t regs[NUM_REGISTERS]; + + uint8_t len; + uint8_t pointer; + +} DPS310State; + +#define TYPE_DPS310 "dps310" +#define DPS310(obj) OBJECT_CHECK(DPS310State, (obj), TYPE_DPS310) + +#define DPS310_PRS_B2 0x00 +#define DPS310_PRS_B1 0x01 +#define DPS310_PRS_B0 0x02 +#define DPS310_TMP_B2 0x03 +#define DPS310_TMP_B1 0x04 +#define DPS310_TMP_B0 0x05 +#define DPS310_PRS_CFG 0x06 +#define DPS310_TMP_CFG 0x07 +#define DPS310_TMP_RATE_BITS (0x70) +#define DPS310_MEAS_CFG 0x08 +#define DPS310_MEAS_CTRL_BITS (0x07) +#define DPS310_PRESSURE_EN BIT(0) +#define DPS310_TEMP_EN BIT(1) +#define DPS310_BACKGROUND BIT(2) +#define DPS310_PRS_RDY BIT(4) +#define DPS310_TMP_RDY BIT(5) +#define DPS310_SENSOR_RDY BIT(6) +#define DPS310_COEF_RDY BIT(7) +#define DPS310_CFG_REG 0x09 +#define DPS310_RESET 0x0c +#define DPS310_RESET_MAGIC (BIT(0) | BIT(3)) +#define DPS310_COEF_BASE 0x10 +#define DPS310_COEF_LAST 0x21 +#define DPS310_COEF_SRC 0x28 + +static void dps310_reset(DeviceState *dev) +{ + DPS310State *s = DPS310(dev); + + static const uint8_t regs_reset_state[sizeof(s->regs)] = { + 0xfe, 0x2f, 0xee, 0x02, 0x69, 0xa6, 0x00, 0x80, 0xc7, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x0e, 0x1e, 0xdd, 0x13, 0xca, 0x5f, 0x21, 0x52, + 0xf9, 0xc6, 0x04, 0xd1, 0xdb, 0x47, 0x00, 0x5b, 0xfb, 0x3a, 0x00, 0x00, + 0x20, 0x49, 0x4e, 0xa5, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x60, 0x15, 0x02 + }; + + memcpy(s->regs, regs_reset_state, sizeof(s->regs)); + s->pointer = 0; + + /* TODO: assert these after some timeout ? */ + s->regs[DPS310_MEAS_CFG] = DPS310_COEF_RDY | DPS310_SENSOR_RDY + | DPS310_TMP_RDY | DPS310_PRS_RDY; +} + +static uint8_t dps310_read(DPS310State *s, uint8_t reg) +{ + if (reg >= sizeof(s->regs)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: register 0x%02x out of bounds\n", + __func__, s->pointer); + return 0xFF; + } + + switch (reg) { + case DPS310_PRS_B2: + case DPS310_PRS_B1: + case DPS310_PRS_B0: + case DPS310_TMP_B2: + case DPS310_TMP_B1: + case DPS310_TMP_B0: + case DPS310_PRS_CFG: + case DPS310_TMP_CFG: + case DPS310_MEAS_CFG: + case DPS310_CFG_REG: + case DPS310_COEF_BASE...DPS310_COEF_LAST: + case DPS310_COEF_SRC: + case 0x32: /* Undocumented register to indicate workaround not required */ + return s->regs[reg]; + default: + qemu_log_mask(LOG_UNIMP, "%s: register 0x%02x unimplemented\n", + __func__, reg); + return 0xFF; + } +} + +static void dps310_write(DPS310State *s, uint8_t reg, uint8_t data) +{ + if (reg >= sizeof(s->regs)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: register %d out of bounds\n", + __func__, s->pointer); + return; + } + + switch (reg) { + case DPS310_RESET: + if (data == DPS310_RESET_MAGIC) { + device_cold_reset(DEVICE(s)); + } + break; + case DPS310_PRS_CFG: + case DPS310_TMP_CFG: + case DPS310_MEAS_CFG: + case DPS310_CFG_REG: + s->regs[reg] = data; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: register 0x%02x unimplemented\n", + __func__, reg); + return; + } +} + +static uint8_t dps310_rx(I2CSlave *i2c) +{ + DPS310State *s = DPS310(i2c); + + if (s->len == 1) { + return dps310_read(s, s->pointer++); + } else { + return 0xFF; + } +} + +static int dps310_tx(I2CSlave *i2c, uint8_t data) +{ + DPS310State *s = DPS310(i2c); + + if (s->len == 0) { + /* + * first byte is the register pointer for a read or write + * operation + */ + s->pointer = data; + s->len++; + } else if (s->len == 1) { + dps310_write(s, s->pointer++, data); + } + + return 0; +} + +static int dps310_event(I2CSlave *i2c, enum i2c_event event) +{ + DPS310State *s = DPS310(i2c); + + switch (event) { + case I2C_START_SEND: + s->pointer = 0xFF; + s->len = 0; + break; + case I2C_START_RECV: + if (s->len != 1) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid recv sequence\n", + __func__); + } + break; + default: + break; + } + + return 0; +} + +static const VMStateDescription vmstate_dps310 = { + .name = "DPS310", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT8(len, DPS310State), + VMSTATE_UINT8_ARRAY(regs, DPS310State, NUM_REGISTERS), + VMSTATE_UINT8(pointer, DPS310State), + VMSTATE_I2C_SLAVE(i2c, DPS310State), + VMSTATE_END_OF_LIST() + } +}; + +static void dps310_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + I2CSlaveClass *k = I2C_SLAVE_CLASS(klass); + + k->event = dps310_event; + k->recv = dps310_rx; + k->send = dps310_tx; + dc->reset = dps310_reset; + dc->vmsd = &vmstate_dps310; +} + +static const TypeInfo dps310_info = { + .name = TYPE_DPS310, + .parent = TYPE_I2C_SLAVE, + .instance_size = sizeof(DPS310State), + .class_init = dps310_class_init, +}; + +static void dps310_register_types(void) +{ + type_register_static(&dps310_info); +} + +type_init(dps310_register_types) diff --git a/hw/misc/emc141x.c b/hw/sensor/emc141x.c similarity index 99% rename from hw/misc/emc141x.c rename to hw/sensor/emc141x.c index f7c53d48a42..7ce8f4e9794 100644 --- a/hw/misc/emc141x.c +++ b/hw/sensor/emc141x.c @@ -25,7 +25,7 @@ #include "qapi/visitor.h" #include "qemu/module.h" #include "qom/object.h" -#include "hw/misc/emc141x_regs.h" +#include "hw/sensor/emc141x_regs.h" #define SENSORS_COUNT_MAX 4 diff --git a/hw/sensor/max34451.c b/hw/sensor/max34451.c new file mode 100644 index 00000000000..a91d8bd487c --- /dev/null +++ b/hw/sensor/max34451.c @@ -0,0 +1,775 @@ +/* + * Maxim MAX34451 PMBus 16-Channel V/I monitor and 12-Channel Sequencer/Marginer + * + * Copyright 2021 Google LLC + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/i2c/pmbus_device.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qemu/log.h" +#include "qemu/module.h" + +#define TYPE_MAX34451 "max34451" +#define MAX34451(obj) OBJECT_CHECK(MAX34451State, (obj), TYPE_MAX34451) + +#define MAX34451_MFR_MODE 0xD1 +#define MAX34451_MFR_PSEN_CONFIG 0xD2 +#define MAX34451_MFR_VOUT_PEAK 0xD4 +#define MAX34451_MFR_IOUT_PEAK 0xD5 +#define MAX34451_MFR_TEMPERATURE_PEAK 0xD6 +#define MAX34451_MFR_VOUT_MIN 0xD7 +#define MAX34451_MFR_NV_LOG_CONFIG 0xD8 +#define MAX34451_MFR_FAULT_RESPONSE 0xD9 +#define MAX34451_MFR_FAULT_RETRY 0xDA +#define MAX34451_MFR_NV_FAULT_LOG 0xDC +#define MAX34451_MFR_TIME_COUNT 0xDD +#define MAX34451_MFR_MARGIN_CONFIG 0xDF +#define MAX34451_MFR_FW_SERIAL 0xE0 +#define MAX34451_MFR_IOUT_AVG 0xE2 +#define MAX34451_MFR_CHANNEL_CONFIG 0xE4 +#define MAX34451_MFR_TON_SEQ_MAX 0xE6 +#define MAX34451_MFR_PWM_CONFIG 0xE7 +#define MAX34451_MFR_SEQ_CONFIG 0xE8 +#define MAX34451_MFR_STORE_ALL 0xEE +#define MAX34451_MFR_RESTORE_ALL 0xEF +#define MAX34451_MFR_TEMP_SENSOR_CONFIG 0xF0 +#define MAX34451_MFR_STORE_SINGLE 0xFC +#define MAX34451_MFR_CRC 0xFE + +#define MAX34451_NUM_MARGINED_PSU 12 +#define MAX34451_NUM_PWR_DEVICES 16 +#define MAX34451_NUM_TEMP_DEVICES 5 +#define MAX34451_NUM_PAGES 21 + +#define DEFAULT_OP_ON 0x80 +#define DEFAULT_CAPABILITY 0x20 +#define DEFAULT_ON_OFF_CONFIG 0x1a +#define DEFAULT_VOUT_MODE 0x40 +#define DEFAULT_TEMPERATURE 2500 +#define DEFAULT_SCALE 0x7FFF +#define DEFAULT_OV_LIMIT 0x7FFF +#define DEFAULT_OC_LIMIT 0x7FFF +#define DEFAULT_OT_LIMIT 0x7FFF +#define DEFAULT_VMIN 0x7FFF +#define DEFAULT_TON_FAULT_LIMIT 0xFFFF +#define DEFAULT_CHANNEL_CONFIG 0x20 +#define DEFAULT_TEXT 0x3130313031303130 + +/** + * MAX34451State: + * @code: The command code received + * @page: Each page corresponds to a device monitored by the Max 34451 + * The page register determines the available commands depending on device + ___________________________________________________________________________ + | 0 | Power supply monitored by RS0, controlled by PSEN0, and | + | | margined with PWM0. | + |_______|___________________________________________________________________| + | 1 | Power supply monitored by RS1, controlled by PSEN1, and | + | | margined with PWM1. | + |_______|___________________________________________________________________| + | 2 | Power supply monitored by RS2, controlled by PSEN2, and | + | | margined with PWM2. | + |_______|___________________________________________________________________| + | 3 | Power supply monitored by RS3, controlled by PSEN3, and | + | | margined with PWM3. | + |_______|___________________________________________________________________| + | 4 | Power supply monitored by RS4, controlled by PSEN4, and | + | | margined with PWM4. | + |_______|___________________________________________________________________| + | 5 | Power supply monitored by RS5, controlled by PSEN5, and | + | | margined with PWM5. | + |_______|___________________________________________________________________| + | 6 | Power supply monitored by RS6, controlled by PSEN6, and | + | | margined with PWM6. | + |_______|___________________________________________________________________| + | 7 | Power supply monitored by RS7, controlled by PSEN7, and | + | | margined with PWM7. | + |_______|___________________________________________________________________| + | 8 | Power supply monitored by RS8, controlled by PSEN8, and | + | | optionally margined by OUT0 of external DS4424 at I2C address A0h.| + |_______|___________________________________________________________________| + | 9 | Power supply monitored by RS9, controlled by PSEN9, and | + | | optionally margined by OUT1 of external DS4424 at I2C address A0h.| + |_______|___________________________________________________________________| + | 10 | Power supply monitored by RS10, controlled by PSEN10, and | + | | optionally margined by OUT2 of external DS4424 at I2C address A0h.| + |_______|___________________________________________________________________| + | 11 | Power supply monitored by RS11, controlled by PSEN11, and | + | | optionally margined by OUT3 of external DS4424 at I2C address A0h.| + |_______|___________________________________________________________________| + | 12 | ADC channel 12 (monitors voltage or current) or GPI. | + |_______|___________________________________________________________________| + | 13 | ADC channel 13 (monitors voltage or current) or GPI. | + |_______|___________________________________________________________________| + | 14 | ADC channel 14 (monitors voltage or current) or GPI. | + |_______|___________________________________________________________________| + | 15 | ADC channel 15 (monitors voltage or current) or GPI. | + |_______|___________________________________________________________________| + | 16 | Internal temperature sensor. | + |_______|___________________________________________________________________| + | 17 | External DS75LV temperature sensor with I2C address 90h. | + |_______|___________________________________________________________________| + | 18 | External DS75LV temperature sensor with I2C address 92h. | + |_______|___________________________________________________________________| + | 19 | External DS75LV temperature sensor with I2C address 94h. | + |_______|___________________________________________________________________| + | 20 | External DS75LV temperature sensor with I2C address 96h. | + |_______|___________________________________________________________________| + | 21=E2=80=93254| Reserved. | + |_______|___________________________________________________________________| + | 255 | Applies to all pages. | + |_______|___________________________________________________________________| + * + * @operation: Turn on and off power supplies + * @on_off_config: Configure the power supply on and off transition behaviour + * @write_protect: protect against changes to the device's memory + * @vout_margin_high: the voltage when OPERATION is set to margin high + * @vout_margin_low: the voltage when OPERATION is set to margin low + * @vout_scale: scale ADC reading to actual device reading if different + * @iout_cal_gain: set ratio of the voltage at the ADC input to sensed current + */ +typedef struct MAX34451State { + PMBusDevice parent; + + uint16_t power_good_on[MAX34451_NUM_PWR_DEVICES]; + uint16_t power_good_off[MAX34451_NUM_PWR_DEVICES]; + uint16_t ton_delay[MAX34451_NUM_MARGINED_PSU]; + uint16_t ton_max_fault_limit[MAX34451_NUM_MARGINED_PSU]; + uint16_t toff_delay[MAX34451_NUM_MARGINED_PSU]; + uint8_t status_mfr_specific[MAX34451_NUM_PWR_DEVICES]; + /* Manufacturer specific function */ + uint64_t mfr_location; + uint64_t mfr_date; + uint64_t mfr_serial; + uint16_t mfr_mode; + uint32_t psen_config[MAX34451_NUM_MARGINED_PSU]; + uint16_t vout_peak[MAX34451_NUM_PWR_DEVICES]; + uint16_t iout_peak[MAX34451_NUM_PWR_DEVICES]; + uint16_t temperature_peak[MAX34451_NUM_TEMP_DEVICES]; + uint16_t vout_min[MAX34451_NUM_PWR_DEVICES]; + uint16_t nv_log_config; + uint32_t fault_response[MAX34451_NUM_PWR_DEVICES]; + uint16_t fault_retry; + uint32_t fault_log; + uint32_t time_count; + uint16_t margin_config[MAX34451_NUM_MARGINED_PSU]; + uint16_t fw_serial; + uint16_t iout_avg[MAX34451_NUM_PWR_DEVICES]; + uint16_t channel_config[MAX34451_NUM_PWR_DEVICES]; + uint16_t ton_seq_max[MAX34451_NUM_MARGINED_PSU]; + uint32_t pwm_config[MAX34451_NUM_MARGINED_PSU]; + uint32_t seq_config[MAX34451_NUM_MARGINED_PSU]; + uint16_t temp_sensor_config[MAX34451_NUM_TEMP_DEVICES]; + uint16_t store_single; + uint16_t crc; +} MAX34451State; + + +static void max34451_check_limits(MAX34451State *s) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(s); + + pmbus_check_limits(pmdev); + + for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) { + if (pmdev->pages[i].read_vout == 0) { /* PSU disabled */ + continue; + } + + if (pmdev->pages[i].read_vout > s->vout_peak[i]) { + s->vout_peak[i] = pmdev->pages[i].read_vout; + } + + if (pmdev->pages[i].read_vout < s->vout_min[i]) { + s->vout_min[i] = pmdev->pages[i].read_vout; + } + + if (pmdev->pages[i].read_iout > s->iout_peak[i]) { + s->iout_peak[i] = pmdev->pages[i].read_iout; + } + } + + for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) { + if (pmdev->pages[i + 16].read_temperature_1 > s->temperature_peak[i]) { + s->temperature_peak[i] = pmdev->pages[i + 16].read_temperature_1; + } + } +} + +static uint8_t max34451_read_byte(PMBusDevice *pmdev) +{ + MAX34451State *s = MAX34451(pmdev); + switch (pmdev->code) { + + case PMBUS_POWER_GOOD_ON: + if (pmdev->page < 16) { + pmbus_send16(pmdev, s->power_good_on[pmdev->page]); + } + break; + + case PMBUS_POWER_GOOD_OFF: + if (pmdev->page < 16) { + pmbus_send16(pmdev, s->power_good_off[pmdev->page]); + } + break; + + case PMBUS_TON_DELAY: + if (pmdev->page < 12) { + pmbus_send16(pmdev, s->ton_delay[pmdev->page]); + } + break; + + case PMBUS_TON_MAX_FAULT_LIMIT: + if (pmdev->page < 12) { + pmbus_send16(pmdev, s->ton_max_fault_limit[pmdev->page]); + } + break; + + case PMBUS_TOFF_DELAY: + if (pmdev->page < 12) { + pmbus_send16(pmdev, s->toff_delay[pmdev->page]); + } + break; + + case PMBUS_STATUS_MFR_SPECIFIC: + if (pmdev->page < 16) { + pmbus_send8(pmdev, s->status_mfr_specific[pmdev->page]); + } + break; + + case PMBUS_MFR_ID: + pmbus_send8(pmdev, 0x4d); /* Maxim */ + break; + + case PMBUS_MFR_MODEL: + pmbus_send8(pmdev, 0x59); + break; + + case PMBUS_MFR_LOCATION: + pmbus_send64(pmdev, s->mfr_location); + break; + + case PMBUS_MFR_DATE: + pmbus_send64(pmdev, s->mfr_date); + break; + + case PMBUS_MFR_SERIAL: + pmbus_send64(pmdev, s->mfr_serial); + break; + + case MAX34451_MFR_MODE: + pmbus_send16(pmdev, s->mfr_mode); + break; + + case MAX34451_MFR_PSEN_CONFIG: + if (pmdev->page < 12) { + pmbus_send32(pmdev, s->psen_config[pmdev->page]); + } + break; + + case MAX34451_MFR_VOUT_PEAK: + if (pmdev->page < 16) { + pmbus_send16(pmdev, s->vout_peak[pmdev->page]); + } + break; + + case MAX34451_MFR_IOUT_PEAK: + if (pmdev->page < 16) { + pmbus_send16(pmdev, s->iout_peak[pmdev->page]); + } + break; + + case MAX34451_MFR_TEMPERATURE_PEAK: + if (15 < pmdev->page && pmdev->page < 21) { + pmbus_send16(pmdev, s->temperature_peak[pmdev->page % 16]); + } else { + pmbus_send16(pmdev, s->temperature_peak[0]); + } + break; + + case MAX34451_MFR_VOUT_MIN: + if (pmdev->page < 16) { + pmbus_send16(pmdev, s->vout_min[pmdev->page]); + } + break; + + case MAX34451_MFR_NV_LOG_CONFIG: + pmbus_send16(pmdev, s->nv_log_config); + break; + + case MAX34451_MFR_FAULT_RESPONSE: + if (pmdev->page < 16) { + pmbus_send32(pmdev, s->fault_response[pmdev->page]); + } + break; + + case MAX34451_MFR_FAULT_RETRY: + pmbus_send32(pmdev, s->fault_retry); + break; + + case MAX34451_MFR_NV_FAULT_LOG: + pmbus_send32(pmdev, s->fault_log); + break; + + case MAX34451_MFR_TIME_COUNT: + pmbus_send32(pmdev, s->time_count); + break; + + case MAX34451_MFR_MARGIN_CONFIG: + if (pmdev->page < 12) { + pmbus_send16(pmdev, s->margin_config[pmdev->page]); + } + break; + + case MAX34451_MFR_FW_SERIAL: + if (pmdev->page == 255) { + pmbus_send16(pmdev, 1); /* Firmware revision */ + } + break; + + case MAX34451_MFR_IOUT_AVG: + if (pmdev->page < 16) { + pmbus_send16(pmdev, s->iout_avg[pmdev->page]); + } + break; + + case MAX34451_MFR_CHANNEL_CONFIG: + if (pmdev->page < 16) { + pmbus_send16(pmdev, s->channel_config[pmdev->page]); + } + break; + + case MAX34451_MFR_TON_SEQ_MAX: + if (pmdev->page < 12) { + pmbus_send16(pmdev, s->ton_seq_max[pmdev->page]); + } + break; + + case MAX34451_MFR_PWM_CONFIG: + if (pmdev->page < 12) { + pmbus_send32(pmdev, s->pwm_config[pmdev->page]); + } + break; + + case MAX34451_MFR_SEQ_CONFIG: + if (pmdev->page < 12) { + pmbus_send32(pmdev, s->seq_config[pmdev->page]); + } + break; + + case MAX34451_MFR_TEMP_SENSOR_CONFIG: + if (15 < pmdev->page && pmdev->page < 21) { + pmbus_send32(pmdev, s->temp_sensor_config[pmdev->page % 16]); + } + break; + + case MAX34451_MFR_STORE_SINGLE: + pmbus_send32(pmdev, s->store_single); + break; + + case MAX34451_MFR_CRC: + pmbus_send32(pmdev, s->crc); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: reading from unsupported register: 0x%02x\n", + __func__, pmdev->code); + break; + } + return 0xFF; +} + +static int max34451_write_data(PMBusDevice *pmdev, const uint8_t *buf, + uint8_t len) +{ + MAX34451State *s = MAX34451(pmdev); + + if (len == 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__); + return -1; + } + + pmdev->code = buf[0]; /* PMBus command code */ + + if (len == 1) { + return 0; + } + + /* Exclude command code from buffer */ + buf++; + len--; + uint8_t index = pmdev->page; + + switch (pmdev->code) { + case MAX34451_MFR_STORE_ALL: + case MAX34451_MFR_RESTORE_ALL: + case MAX34451_MFR_STORE_SINGLE: + /* + * TODO: hardware behaviour is to move the contents of volatile + * memory to non-volatile memory. + */ + break; + + case PMBUS_POWER_GOOD_ON: /* R/W word */ + if (pmdev->page < MAX34451_NUM_PWR_DEVICES) { + s->power_good_on[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case PMBUS_POWER_GOOD_OFF: /* R/W word */ + if (pmdev->page < MAX34451_NUM_PWR_DEVICES) { + s->power_good_off[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case PMBUS_TON_DELAY: /* R/W word */ + if (pmdev->page < 12) { + s->ton_delay[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case PMBUS_TON_MAX_FAULT_LIMIT: /* R/W word */ + if (pmdev->page < 12) { + s->ton_max_fault_limit[pmdev->page] + = pmbus_receive16(pmdev); + } + break; + + case PMBUS_TOFF_DELAY: /* R/W word */ + if (pmdev->page < 12) { + s->toff_delay[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case PMBUS_MFR_LOCATION: /* R/W 64 */ + s->mfr_location = pmbus_receive64(pmdev); + break; + + case PMBUS_MFR_DATE: /* R/W 64 */ + s->mfr_date = pmbus_receive64(pmdev); + break; + + case PMBUS_MFR_SERIAL: /* R/W 64 */ + s->mfr_serial = pmbus_receive64(pmdev); + break; + + case MAX34451_MFR_MODE: /* R/W word */ + s->mfr_mode = pmbus_receive16(pmdev); + break; + + case MAX34451_MFR_PSEN_CONFIG: /* R/W 32 */ + if (pmdev->page < 12) { + s->psen_config[pmdev->page] = pmbus_receive32(pmdev); + } + break; + + case MAX34451_MFR_VOUT_PEAK: /* R/W word */ + if (pmdev->page < 16) { + s->vout_peak[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_IOUT_PEAK: /* R/W word */ + if (pmdev->page < 16) { + s->iout_peak[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_TEMPERATURE_PEAK: /* R/W word */ + if (15 < pmdev->page && pmdev->page < 21) { + s->temperature_peak[pmdev->page % 16] + = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_VOUT_MIN: /* R/W word */ + if (pmdev->page < 16) { + s->vout_min[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_NV_LOG_CONFIG: /* R/W word */ + s->nv_log_config = pmbus_receive16(pmdev); + break; + + case MAX34451_MFR_FAULT_RESPONSE: /* R/W 32 */ + if (pmdev->page < 16) { + s->fault_response[pmdev->page] = pmbus_receive32(pmdev); + } + break; + + case MAX34451_MFR_FAULT_RETRY: /* R/W word */ + s->fault_retry = pmbus_receive16(pmdev); + break; + + case MAX34451_MFR_TIME_COUNT: /* R/W 32 */ + s->time_count = pmbus_receive32(pmdev); + break; + + case MAX34451_MFR_MARGIN_CONFIG: /* R/W word */ + if (pmdev->page < 12) { + s->margin_config[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_CHANNEL_CONFIG: /* R/W word */ + if (pmdev->page < 16) { + s->channel_config[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_TON_SEQ_MAX: /* R/W word */ + if (pmdev->page < 12) { + s->ton_seq_max[pmdev->page] = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_PWM_CONFIG: /* R/W 32 */ + if (pmdev->page < 12) { + s->pwm_config[pmdev->page] = pmbus_receive32(pmdev); + } + break; + + case MAX34451_MFR_SEQ_CONFIG: /* R/W 32 */ + if (pmdev->page < 12) { + s->seq_config[pmdev->page] = pmbus_receive32(pmdev); + } + break; + + case MAX34451_MFR_TEMP_SENSOR_CONFIG: /* R/W word */ + if (15 < pmdev->page && pmdev->page < 21) { + s->temp_sensor_config[pmdev->page % 16] + = pmbus_receive16(pmdev); + } + break; + + case MAX34451_MFR_CRC: /* R/W word */ + s->crc = pmbus_receive16(pmdev); + break; + + case MAX34451_MFR_NV_FAULT_LOG: + case MAX34451_MFR_FW_SERIAL: + case MAX34451_MFR_IOUT_AVG: + /* Read only commands */ + pmdev->pages[index].status_word |= PMBUS_STATUS_CML; + pmdev->pages[index].status_cml |= PB_CML_FAULT_INVALID_DATA; + qemu_log_mask(LOG_GUEST_ERROR, + "%s: writing to read-only register 0x%02x\n", + __func__, pmdev->code); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: writing to unsupported register: 0x%02x\n", + __func__, pmdev->code); + break; + } + + return 0; +} + +static void max34451_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint16(v, name, (uint16_t *)opaque, errp); +} + +static void max34451_set(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + MAX34451State *s = MAX34451(obj); + uint16_t *internal = opaque; + uint16_t value; + if (!visit_type_uint16(v, name, &value, errp)) { + return; + } + + *internal = value; + max34451_check_limits(s); +} + +/* used to init uint16_t arrays */ +static inline void *memset_word(void *s, uint16_t c, size_t n) +{ + size_t i; + uint16_t *p = s; + + for (i = 0; i < n; i++) { + p[i] = c; + } + + return s; +} + +static void max34451_exit_reset(Object *obj) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(obj); + MAX34451State *s = MAX34451(obj); + pmdev->capability = DEFAULT_CAPABILITY; + + for (int i = 0; i < MAX34451_NUM_PAGES; i++) { + pmdev->pages[i].operation = DEFAULT_OP_ON; + pmdev->pages[i].on_off_config = DEFAULT_ON_OFF_CONFIG; + pmdev->pages[i].revision = 0x11; + pmdev->pages[i].vout_mode = DEFAULT_VOUT_MODE; + } + + for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) { + pmdev->pages[i].vout_scale_monitor = DEFAULT_SCALE; + pmdev->pages[i].vout_ov_fault_limit = DEFAULT_OV_LIMIT; + pmdev->pages[i].vout_ov_warn_limit = DEFAULT_OV_LIMIT; + pmdev->pages[i].iout_oc_warn_limit = DEFAULT_OC_LIMIT; + pmdev->pages[i].iout_oc_fault_limit = DEFAULT_OC_LIMIT; + } + + for (int i = 0; i < MAX34451_NUM_MARGINED_PSU; i++) { + pmdev->pages[i].ton_max_fault_limit = DEFAULT_TON_FAULT_LIMIT; + } + + for (int i = 16; i < MAX34451_NUM_TEMP_DEVICES + 16; i++) { + pmdev->pages[i].read_temperature_1 = DEFAULT_TEMPERATURE; + pmdev->pages[i].ot_warn_limit = DEFAULT_OT_LIMIT; + pmdev->pages[i].ot_fault_limit = DEFAULT_OT_LIMIT; + } + + memset_word(s->ton_max_fault_limit, DEFAULT_TON_FAULT_LIMIT, + MAX34451_NUM_MARGINED_PSU); + memset_word(s->channel_config, DEFAULT_CHANNEL_CONFIG, + MAX34451_NUM_PWR_DEVICES); + memset_word(s->vout_min, DEFAULT_VMIN, MAX34451_NUM_PWR_DEVICES); + + s->mfr_location = DEFAULT_TEXT; + s->mfr_date = DEFAULT_TEXT; + s->mfr_serial = DEFAULT_TEXT; +} + +static const VMStateDescription vmstate_max34451 = { + .name = TYPE_MAX34451, + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]){ + VMSTATE_PMBUS_DEVICE(parent, MAX34451State), + VMSTATE_UINT16_ARRAY(power_good_on, MAX34451State, + MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16_ARRAY(power_good_off, MAX34451State, + MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16_ARRAY(ton_delay, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT16_ARRAY(ton_max_fault_limit, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT16_ARRAY(toff_delay, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT8_ARRAY(status_mfr_specific, MAX34451State, + MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT64(mfr_location, MAX34451State), + VMSTATE_UINT64(mfr_date, MAX34451State), + VMSTATE_UINT64(mfr_serial, MAX34451State), + VMSTATE_UINT16(mfr_mode, MAX34451State), + VMSTATE_UINT32_ARRAY(psen_config, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT16_ARRAY(vout_peak, MAX34451State, + MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16_ARRAY(iout_peak, MAX34451State, + MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16_ARRAY(temperature_peak, MAX34451State, + MAX34451_NUM_TEMP_DEVICES), + VMSTATE_UINT16_ARRAY(vout_min, MAX34451State, MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16(nv_log_config, MAX34451State), + VMSTATE_UINT32_ARRAY(fault_response, MAX34451State, + MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16(fault_retry, MAX34451State), + VMSTATE_UINT32(fault_log, MAX34451State), + VMSTATE_UINT32(time_count, MAX34451State), + VMSTATE_UINT16_ARRAY(margin_config, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT16(fw_serial, MAX34451State), + VMSTATE_UINT16_ARRAY(iout_avg, MAX34451State, MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16_ARRAY(channel_config, MAX34451State, + MAX34451_NUM_PWR_DEVICES), + VMSTATE_UINT16_ARRAY(ton_seq_max, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT32_ARRAY(pwm_config, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT32_ARRAY(seq_config, MAX34451State, + MAX34451_NUM_MARGINED_PSU), + VMSTATE_UINT16_ARRAY(temp_sensor_config, MAX34451State, + MAX34451_NUM_TEMP_DEVICES), + VMSTATE_UINT16(store_single, MAX34451State), + VMSTATE_UINT16(crc, MAX34451State), + VMSTATE_END_OF_LIST() + } +}; + +static void max34451_init(Object *obj) +{ + PMBusDevice *pmdev = PMBUS_DEVICE(obj); + uint64_t psu_flags = PB_HAS_VOUT | PB_HAS_IOUT | PB_HAS_VOUT_MODE | + PB_HAS_IOUT_GAIN; + + for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) { + pmbus_page_config(pmdev, i, psu_flags); + } + + for (int i = 0; i < MAX34451_NUM_MARGINED_PSU; i++) { + pmbus_page_config(pmdev, i, psu_flags | PB_HAS_VOUT_MARGIN); + } + + for (int i = 16; i < MAX34451_NUM_TEMP_DEVICES + 16; i++) { + pmbus_page_config(pmdev, i, PB_HAS_TEMPERATURE | PB_HAS_VOUT_MODE); + } + + /* get and set the voltage in millivolts, max is 32767 mV */ + for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) { + object_property_add(obj, "vout[*]", "uint16", + max34451_get, + max34451_set, NULL, &pmdev->pages[i].read_vout); + } + + /* + * get and set the temperature of the internal temperature sensor in + * centidegrees Celcius i.e.: 2500 -> 25.00 C, max is 327.67 C + */ + for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) { + object_property_add(obj, "temperature[*]", "uint16", + max34451_get, + max34451_set, + NULL, + &pmdev->pages[i + 16].read_temperature_1); + } + +} + +static void max34451_class_init(ObjectClass *klass, void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + PMBusDeviceClass *k = PMBUS_DEVICE_CLASS(klass); + dc->desc = "Maxim MAX34451 16-Channel V/I monitor"; + dc->vmsd = &vmstate_max34451; + k->write_data = max34451_write_data; + k->receive_byte = max34451_read_byte; + k->device_num_pages = MAX34451_NUM_PAGES; + rc->phases.exit = max34451_exit_reset; +} + +static const TypeInfo max34451_info = { + .name = TYPE_MAX34451, + .parent = TYPE_PMBUS_DEVICE, + .instance_size = sizeof(MAX34451State), + .instance_init = max34451_init, + .class_init = max34451_class_init, +}; + +static void max34451_register_types(void) +{ + type_register_static(&max34451_info); +} + +type_init(max34451_register_types) diff --git a/hw/sensor/meson.build b/hw/sensor/meson.build new file mode 100644 index 00000000000..059c4ca935b --- /dev/null +++ b/hw/sensor/meson.build @@ -0,0 +1,6 @@ +softmmu_ss.add(when: 'CONFIG_TMP105', if_true: files('tmp105.c')) +softmmu_ss.add(when: 'CONFIG_TMP421', if_true: files('tmp421.c')) +softmmu_ss.add(when: 'CONFIG_DPS310', if_true: files('dps310.c')) +softmmu_ss.add(when: 'CONFIG_EMC141X', if_true: files('emc141x.c')) +softmmu_ss.add(when: 'CONFIG_ADM1272', if_true: files('adm1272.c')) +softmmu_ss.add(when: 'CONFIG_MAX34451', if_true: files('max34451.c')) diff --git a/hw/misc/tmp105.c b/hw/sensor/tmp105.c similarity index 99% rename from hw/misc/tmp105.c rename to hw/sensor/tmp105.c index d299d9b21b7..20564494899 100644 --- a/hw/misc/tmp105.c +++ b/hw/sensor/tmp105.c @@ -22,7 +22,7 @@ #include "hw/i2c/i2c.h" #include "hw/irq.h" #include "migration/vmstate.h" -#include "tmp105.h" +#include "hw/sensor/tmp105.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "qemu/module.h" diff --git a/hw/misc/tmp421.c b/hw/sensor/tmp421.c similarity index 100% rename from hw/misc/tmp421.c rename to hw/sensor/tmp421.c diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c index 443820901d4..72759413f37 100644 --- a/hw/sh4/r2d.c +++ b/hw/sh4/r2d.c @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "cpu.h" #include "hw/sysbus.h" #include "hw/sh4/sh.h" @@ -42,7 +43,6 @@ #include "hw/loader.h" #include "hw/usb.h" #include "hw/block/flash.h" -#include "exec/address-spaces.h" #define FLASH_BASE 0x00000000 #define FLASH_SIZE (16 * MiB) @@ -57,10 +57,10 @@ #define LINUX_LOAD_OFFSET 0x0800000 #define INITRD_LOAD_OFFSET 0x1800000 -#define PA_IRLMSK 0x00 -#define PA_POWOFF 0x30 -#define PA_VERREG 0x32 -#define PA_OUTPORT 0x36 +#define PA_IRLMSK 0x00 +#define PA_POWOFF 0x30 +#define PA_VERREG 0x32 +#define PA_OUTPORT 0x36 typedef struct { uint16_t bcr; @@ -97,38 +97,41 @@ enum r2d_fpga_irq { }; static const struct { short irl; uint16_t msk; } irqtab[NR_IRQS] = { - [CF_IDE] = { 1, 1<<9 }, - [CF_CD] = { 2, 1<<8 }, - [PCI_INTA] = { 9, 1<<14 }, - [PCI_INTB] = { 10, 1<<13 }, - [PCI_INTC] = { 3, 1<<12 }, - [PCI_INTD] = { 0, 1<<11 }, - [SM501] = { 4, 1<<10 }, - [KEY] = { 5, 1<<6 }, - [RTC_A] = { 6, 1<<5 }, - [RTC_T] = { 7, 1<<4 }, - [SDCARD] = { 8, 1<<7 }, - [EXT] = { 11, 1<<0 }, - [TP] = { 12, 1<<15 }, + [CF_IDE] = { 1, 1 << 9 }, + [CF_CD] = { 2, 1 << 8 }, + [PCI_INTA] = { 9, 1 << 14 }, + [PCI_INTB] = { 10, 1 << 13 }, + [PCI_INTC] = { 3, 1 << 12 }, + [PCI_INTD] = { 0, 1 << 11 }, + [SM501] = { 4, 1 << 10 }, + [KEY] = { 5, 1 << 6 }, + [RTC_A] = { 6, 1 << 5 }, + [RTC_T] = { 7, 1 << 4 }, + [SDCARD] = { 8, 1 << 7 }, + [EXT] = { 11, 1 << 0 }, + [TP] = { 12, 1 << 15 }, }; static void update_irl(r2d_fpga_t *fpga) { int i, irl = 15; - for (i = 0; i < NR_IRQS; i++) - if (fpga->irlmon & fpga->irlmsk & irqtab[i].msk) - if (irqtab[i].irl < irl) - irl = irqtab[i].irl; + for (i = 0; i < NR_IRQS; i++) { + if ((fpga->irlmon & fpga->irlmsk & irqtab[i].msk) && + irqtab[i].irl < irl) { + irl = irqtab[i].irl; + } + } qemu_set_irq(fpga->irl, irl ^ 15); } static void r2d_fpga_irq_set(void *opaque, int n, int level) { r2d_fpga_t *fpga = opaque; - if (level) + if (level) { fpga->irlmon |= irqtab[n].msk; - else + } else { fpga->irlmon &= ~irqtab[n].msk; + } update_irl(fpga); } @@ -307,7 +310,7 @@ static void r2d_init(MachineState *machine) /* NIC: rtl8139 on-board, and 2 slots. */ for (i = 0; i < nb_nics; i++) pci_nic_init_nofail(&nd_table[i], pci_bus, - "rtl8139", i==0 ? "2" : NULL); + "rtl8139", i == 0 ? "2" : NULL); /* USB keyboard */ usb_create_simple(usb_bus_find(-1), "usb-kbd"); @@ -322,8 +325,8 @@ static void r2d_init(MachineState *machine) SDRAM_BASE + LINUX_LOAD_OFFSET, INITRD_LOAD_OFFSET - LINUX_LOAD_OFFSET); if (kernel_size < 0) { - fprintf(stderr, "qemu: could not load kernel '%s'\n", kernel_filename); - exit(1); + error_report("qemu: could not load kernel '%s'", kernel_filename); + exit(1); } /* initialization which should be done by firmware */ @@ -331,7 +334,8 @@ static void r2d_init(MachineState *machine) MEMTXATTRS_UNSPECIFIED, NULL); /* cs3 SDRAM */ address_space_stw(&address_space_memory, SH7750_BCR2, 3 << (3 * 2), MEMTXATTRS_UNSPECIFIED, NULL); /* cs3 32bit */ - reset_info->vector = (SDRAM_BASE + LINUX_LOAD_OFFSET) | 0xa0000000; /* Start from P2 area */ + /* Start from P2 area */ + reset_info->vector = (SDRAM_BASE + LINUX_LOAD_OFFSET) | 0xa0000000; } if (initrd_filename) { @@ -342,8 +346,8 @@ static void r2d_init(MachineState *machine) SDRAM_SIZE - INITRD_LOAD_OFFSET); if (initrd_size < 0) { - fprintf(stderr, "qemu: could not load initrd '%s'\n", initrd_filename); - exit(1); + error_report("qemu: could not load initrd '%s'", initrd_filename); + exit(1); } /* initialization which should be done by firmware */ @@ -353,8 +357,10 @@ static void r2d_init(MachineState *machine) } if (kernel_cmdline) { - /* I see no evidence that this .kernel_cmdline buffer requires - NUL-termination, so using strncpy should be ok. */ + /* + * I see no evidence that this .kernel_cmdline buffer requires + * NUL-termination, so using strncpy should be ok. + */ strncpy(boot_params.kernel_cmdline, kernel_cmdline, sizeof(boot_params.kernel_cmdline)); } diff --git a/hw/sh4/sh7750.c b/hw/sh4/sh7750.c index f8ac3ec6e32..43dfb6497b5 100644 --- a/hw/sh4/sh7750.c +++ b/hw/sh4/sh7750.c @@ -24,15 +24,19 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" #include "hw/irq.h" #include "hw/sh4/sh.h" #include "sysemu/sysemu.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "sh7750_regs.h" #include "sh7750_regnames.h" #include "hw/sh4/sh_intc.h" #include "hw/timer/tmu012.h" -#include "cpu.h" #include "exec/exec-all.h" +#include "trace.h" #define NB_DEVICES 4 @@ -61,17 +65,17 @@ typedef struct SH7750State { uint16_t gpioic; uint32_t pctra; uint32_t pctrb; - uint16_t portdira; /* Cached */ - uint16_t portpullupa; /* Cached */ - uint16_t portdirb; /* Cached */ - uint16_t portpullupb; /* Cached */ + uint16_t portdira; /* Cached */ + uint16_t portpullupa; /* Cached */ + uint16_t portdirb; /* Cached */ + uint16_t portpullupb; /* Cached */ uint16_t pdtra; uint16_t pdtrb; - uint16_t periph_pdtra; /* Imposed by the peripherals */ - uint16_t periph_portdira; /* Direction seen from the peripherals */ - uint16_t periph_pdtrb; /* Imposed by the peripherals */ - uint16_t periph_portdirb; /* Direction seen from the peripherals */ - sh7750_io_device *devices[NB_DEVICES]; /* External peripherals */ + uint16_t periph_pdtra; /* Imposed by the peripherals */ + uint16_t periph_portdira; /* Direction seen from the peripherals */ + uint16_t periph_pdtrb; /* Imposed by the peripherals */ + uint16_t periph_portdirb; /* Direction seen from the peripherals */ + sh7750_io_device *devices[NB_DEVICES]; /* External peripherals */ /* Cache */ uint32_t ccr; @@ -79,143 +83,145 @@ typedef struct SH7750State { struct intc_desc intc; } SH7750State; -static inline int has_bcr3_and_bcr4(SH7750State * s) +static inline int has_bcr3_and_bcr4(SH7750State *s) { return s->cpu->env.features & SH_FEATURE_BCR3_AND_BCR4; } -/********************************************************************** - I/O ports -**********************************************************************/ -int sh7750_register_io_device(SH7750State * s, sh7750_io_device * device) +/* + * I/O ports + */ + +int sh7750_register_io_device(SH7750State *s, sh7750_io_device *device) { int i; for (i = 0; i < NB_DEVICES; i++) { - if (s->devices[i] == NULL) { - s->devices[i] = device; - return 0; - } + if (s->devices[i] == NULL) { + s->devices[i] = device; + return 0; + } } return -1; } static uint16_t portdir(uint32_t v) { -#define EVENPORTMASK(n) ((v & (1<<((n)<<1))) >> (n)) +#define EVENPORTMASK(n) ((v & (1 << ((n) << 1))) >> (n)) return - EVENPORTMASK(15) | EVENPORTMASK(14) | EVENPORTMASK(13) | - EVENPORTMASK(12) | EVENPORTMASK(11) | EVENPORTMASK(10) | - EVENPORTMASK(9) | EVENPORTMASK(8) | EVENPORTMASK(7) | - EVENPORTMASK(6) | EVENPORTMASK(5) | EVENPORTMASK(4) | - EVENPORTMASK(3) | EVENPORTMASK(2) | EVENPORTMASK(1) | - EVENPORTMASK(0); + EVENPORTMASK(15) | EVENPORTMASK(14) | EVENPORTMASK(13) | + EVENPORTMASK(12) | EVENPORTMASK(11) | EVENPORTMASK(10) | + EVENPORTMASK(9) | EVENPORTMASK(8) | EVENPORTMASK(7) | + EVENPORTMASK(6) | EVENPORTMASK(5) | EVENPORTMASK(4) | + EVENPORTMASK(3) | EVENPORTMASK(2) | EVENPORTMASK(1) | + EVENPORTMASK(0); } static uint16_t portpullup(uint32_t v) { -#define ODDPORTMASK(n) ((v & (1<<(((n)<<1)+1))) >> (n)) +#define ODDPORTMASK(n) ((v & (1 << (((n) << 1) + 1))) >> (n)) return - ODDPORTMASK(15) | ODDPORTMASK(14) | ODDPORTMASK(13) | - ODDPORTMASK(12) | ODDPORTMASK(11) | ODDPORTMASK(10) | - ODDPORTMASK(9) | ODDPORTMASK(8) | ODDPORTMASK(7) | ODDPORTMASK(6) | - ODDPORTMASK(5) | ODDPORTMASK(4) | ODDPORTMASK(3) | ODDPORTMASK(2) | - ODDPORTMASK(1) | ODDPORTMASK(0); + ODDPORTMASK(15) | ODDPORTMASK(14) | ODDPORTMASK(13) | + ODDPORTMASK(12) | ODDPORTMASK(11) | ODDPORTMASK(10) | + ODDPORTMASK(9) | ODDPORTMASK(8) | ODDPORTMASK(7) | ODDPORTMASK(6) | + ODDPORTMASK(5) | ODDPORTMASK(4) | ODDPORTMASK(3) | ODDPORTMASK(2) | + ODDPORTMASK(1) | ODDPORTMASK(0); } -static uint16_t porta_lines(SH7750State * s) +static uint16_t porta_lines(SH7750State *s) { - return (s->portdira & s->pdtra) | /* CPU */ - (s->periph_portdira & s->periph_pdtra) | /* Peripherals */ - (~(s->portdira | s->periph_portdira) & s->portpullupa); /* Pullups */ + return (s->portdira & s->pdtra) | /* CPU */ + (s->periph_portdira & s->periph_pdtra) | /* Peripherals */ + (~(s->portdira | s->periph_portdira) & s->portpullupa); /* Pullups */ } -static uint16_t portb_lines(SH7750State * s) +static uint16_t portb_lines(SH7750State *s) { - return (s->portdirb & s->pdtrb) | /* CPU */ - (s->periph_portdirb & s->periph_pdtrb) | /* Peripherals */ - (~(s->portdirb | s->periph_portdirb) & s->portpullupb); /* Pullups */ + return (s->portdirb & s->pdtrb) | /* CPU */ + (s->periph_portdirb & s->periph_pdtrb) | /* Peripherals */ + (~(s->portdirb | s->periph_portdirb) & s->portpullupb); /* Pullups */ } -static void gen_port_interrupts(SH7750State * s) +static void gen_port_interrupts(SH7750State *s) { /* XXXXX interrupts not generated */ } -static void porta_changed(SH7750State * s, uint16_t prev) +static void porta_changed(SH7750State *s, uint16_t prev) { uint16_t currenta, changes; int i, r = 0; -#if 0 - fprintf(stderr, "porta changed from 0x%04x to 0x%04x\n", - prev, porta_lines(s)); - fprintf(stderr, "pdtra=0x%04x, pctra=0x%08x\n", s->pdtra, s->pctra); -#endif currenta = porta_lines(s); - if (currenta == prev) - return; + if (currenta == prev) { + return; + } + trace_sh7750_porta(prev, currenta, s->pdtra, s->pctra); changes = currenta ^ prev; for (i = 0; i < NB_DEVICES; i++) { - if (s->devices[i] && (s->devices[i]->portamask_trigger & changes)) { - r |= s->devices[i]->port_change_cb(currenta, portb_lines(s), - &s->periph_pdtra, - &s->periph_portdira, - &s->periph_pdtrb, - &s->periph_portdirb); - } + if (s->devices[i] && (s->devices[i]->portamask_trigger & changes)) { + r |= s->devices[i]->port_change_cb(currenta, portb_lines(s), + &s->periph_pdtra, + &s->periph_portdira, + &s->periph_pdtrb, + &s->periph_portdirb); + } } - if (r) - gen_port_interrupts(s); + if (r) { + gen_port_interrupts(s); + } } -static void portb_changed(SH7750State * s, uint16_t prev) +static void portb_changed(SH7750State *s, uint16_t prev) { uint16_t currentb, changes; int i, r = 0; currentb = portb_lines(s); - if (currentb == prev) - return; + if (currentb == prev) { + return; + } + trace_sh7750_portb(prev, currentb, s->pdtrb, s->pctrb); changes = currentb ^ prev; for (i = 0; i < NB_DEVICES; i++) { - if (s->devices[i] && (s->devices[i]->portbmask_trigger & changes)) { - r |= s->devices[i]->port_change_cb(portb_lines(s), currentb, - &s->periph_pdtra, - &s->periph_portdira, - &s->periph_pdtrb, - &s->periph_portdirb); - } + if (s->devices[i] && (s->devices[i]->portbmask_trigger & changes)) { + r |= s->devices[i]->port_change_cb(portb_lines(s), currentb, + &s->periph_pdtra, + &s->periph_portdira, + &s->periph_pdtrb, + &s->periph_portdirb); + } } - if (r) - gen_port_interrupts(s); + if (r) { + gen_port_interrupts(s); + } } -/********************************************************************** - Memory -**********************************************************************/ +/* + * Memory + */ static void error_access(const char *kind, hwaddr addr) { fprintf(stderr, "%s to %s (0x" TARGET_FMT_plx ") not supported\n", - kind, regname(addr), addr); + kind, regname(addr), addr); } static void ignore_access(const char *kind, hwaddr addr) { fprintf(stderr, "%s to %s (0x" TARGET_FMT_plx ") ignored\n", - kind, regname(addr), addr); + kind, regname(addr), addr); } static uint32_t sh7750_mem_readb(void *opaque, hwaddr addr) { switch (addr) { default: - error_access("byte read", addr); + error_access("byte read", addr); abort(); } } @@ -226,30 +232,31 @@ static uint32_t sh7750_mem_readw(void *opaque, hwaddr addr) switch (addr) { case SH7750_BCR2_A7: - return s->bcr2; + return s->bcr2; case SH7750_BCR3_A7: - if(!has_bcr3_and_bcr4(s)) - error_access("word read", addr); - return s->bcr3; + if (!has_bcr3_and_bcr4(s)) { + error_access("word read", addr); + } + return s->bcr3; case SH7750_FRQCR_A7: - return 0; + return 0; case SH7750_PCR_A7: - return s->pcr; + return s->pcr; case SH7750_RFCR_A7: - fprintf(stderr, - "Read access to refresh count register, incrementing\n"); - return s->rfcr++; + fprintf(stderr, + "Read access to refresh count register, incrementing\n"); + return s->rfcr++; case SH7750_PDTRA_A7: - return porta_lines(s); + return porta_lines(s); case SH7750_PDTRB_A7: - return portb_lines(s); + return portb_lines(s); case SH7750_RTCOR_A7: case SH7750_RTCNT_A7: case SH7750_RTCSR_A7: - ignore_access("word read", addr); - return 0; + ignore_access("word read", addr); + return 0; default: - error_access("word read", addr); + error_access("word read", addr); abort(); } } @@ -261,11 +268,12 @@ static uint32_t sh7750_mem_readl(void *opaque, hwaddr addr) switch (addr) { case SH7750_BCR1_A7: - return s->bcr1; + return s->bcr1; case SH7750_BCR4_A7: - if(!has_bcr3_and_bcr4(s)) - error_access("long read", addr); - return s->bcr4; + if (!has_bcr3_and_bcr4(s)) { + error_access("long read", addr); + } + return s->bcr4; case SH7750_WCR1_A7: case SH7750_WCR2_A7: case SH7750_WCR3_A7: @@ -289,31 +297,31 @@ static uint32_t sh7750_mem_readl(void *opaque, hwaddr addr) case SH7750_INTEVT_A7: return s->cpu->env.intevt; case SH7750_CCR_A7: - return s->ccr; - case 0x1f000030: /* Processor version */ + return s->ccr; + case 0x1f000030: /* Processor version */ scc = SUPERH_CPU_GET_CLASS(s->cpu); return scc->pvr; - case 0x1f000040: /* Cache version */ + case 0x1f000040: /* Cache version */ scc = SUPERH_CPU_GET_CLASS(s->cpu); return scc->cvr; - case 0x1f000044: /* Processor revision */ + case 0x1f000044: /* Processor revision */ scc = SUPERH_CPU_GET_CLASS(s->cpu); return scc->prr; default: - error_access("long read", addr); + error_access("long read", addr); abort(); } } #define is_in_sdrmx(a, x) (a >= SH7750_SDMR ## x ## _A7 \ - && a <= (SH7750_SDMR ## x ## _A7 + SH7750_SDMR ## x ## _REGNB)) + && a <= (SH7750_SDMR ## x ## _A7 + SH7750_SDMR ## x ## _REGNB)) static void sh7750_mem_writeb(void *opaque, hwaddr addr, - uint32_t mem_value) + uint32_t mem_value) { if (is_in_sdrmx(addr, 2) || is_in_sdrmx(addr, 3)) { - ignore_access("byte write", addr); - return; + ignore_access("byte write", addr); + return; } error_access("byte write", addr); @@ -321,94 +329,96 @@ static void sh7750_mem_writeb(void *opaque, hwaddr addr, } static void sh7750_mem_writew(void *opaque, hwaddr addr, - uint32_t mem_value) + uint32_t mem_value) { SH7750State *s = opaque; uint16_t temp; switch (addr) { - /* SDRAM controller */ + /* SDRAM controller */ case SH7750_BCR2_A7: s->bcr2 = mem_value; return; case SH7750_BCR3_A7: - if(!has_bcr3_and_bcr4(s)) - error_access("word write", addr); - s->bcr3 = mem_value; - return; + if (!has_bcr3_and_bcr4(s)) { + error_access("word write", addr); + } + s->bcr3 = mem_value; + return; case SH7750_PCR_A7: - s->pcr = mem_value; - return; + s->pcr = mem_value; + return; case SH7750_RTCNT_A7: case SH7750_RTCOR_A7: case SH7750_RTCSR_A7: - ignore_access("word write", addr); - return; - /* IO ports */ + ignore_access("word write", addr); + return; + /* IO ports */ case SH7750_PDTRA_A7: - temp = porta_lines(s); - s->pdtra = mem_value; - porta_changed(s, temp); - return; + temp = porta_lines(s); + s->pdtra = mem_value; + porta_changed(s, temp); + return; case SH7750_PDTRB_A7: - temp = portb_lines(s); - s->pdtrb = mem_value; - portb_changed(s, temp); - return; + temp = portb_lines(s); + s->pdtrb = mem_value; + portb_changed(s, temp); + return; case SH7750_RFCR_A7: - fprintf(stderr, "Write access to refresh count register\n"); - s->rfcr = mem_value; - return; + fprintf(stderr, "Write access to refresh count register\n"); + s->rfcr = mem_value; + return; case SH7750_GPIOIC_A7: - s->gpioic = mem_value; - if (mem_value != 0) { - fprintf(stderr, "I/O interrupts not implemented\n"); + s->gpioic = mem_value; + if (mem_value != 0) { + fprintf(stderr, "I/O interrupts not implemented\n"); abort(); - } - return; + } + return; default: - error_access("word write", addr); + error_access("word write", addr); abort(); } } static void sh7750_mem_writel(void *opaque, hwaddr addr, - uint32_t mem_value) + uint32_t mem_value) { SH7750State *s = opaque; uint16_t temp; switch (addr) { - /* SDRAM controller */ + /* SDRAM controller */ case SH7750_BCR1_A7: s->bcr1 = mem_value; return; case SH7750_BCR4_A7: - if(!has_bcr3_and_bcr4(s)) - error_access("long write", addr); - s->bcr4 = mem_value; - return; + if (!has_bcr3_and_bcr4(s)) { + error_access("long write", addr); + } + s->bcr4 = mem_value; + return; case SH7750_WCR1_A7: case SH7750_WCR2_A7: case SH7750_WCR3_A7: case SH7750_MCR_A7: - ignore_access("long write", addr); - return; - /* IO ports */ + ignore_access("long write", addr); + return; + /* IO ports */ case SH7750_PCTRA_A7: - temp = porta_lines(s); - s->pctra = mem_value; - s->portdira = portdir(mem_value); - s->portpullupa = portpullup(mem_value); - porta_changed(s, temp); - return; + temp = porta_lines(s); + s->pctra = mem_value; + s->portdira = portdir(mem_value); + s->portpullupa = portpullup(mem_value); + porta_changed(s, temp); + return; case SH7750_PCTRB_A7: - temp = portb_lines(s); - s->pctrb = mem_value; - s->portdirb = portdir(mem_value); - s->portpullupb = portpullup(mem_value); - portb_changed(s, temp); - return; + temp = portb_lines(s); + s->pctrb = mem_value; + s->portdirb = portdir(mem_value); + s->portpullupb = portpullup(mem_value); + portb_changed(s, temp); + return; case SH7750_MMUCR_A7: if (mem_value & MMUCR_TI) { cpu_sh4_invalidate_tlb(&s->cpu->env); @@ -444,10 +454,10 @@ static void sh7750_mem_writel(void *opaque, hwaddr addr, s->cpu->env.intevt = mem_value & 0x000007ff; return; case SH7750_CCR_A7: - s->ccr = mem_value; - return; + s->ccr = mem_value; + return; default: - error_access("long write", addr); + error_access("long write", addr); abort(); } } @@ -492,161 +502,161 @@ static const MemoryRegionOps sh7750_mem_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -/* sh775x interrupt controller tables for sh_intc.c +/* + * sh775x interrupt controller tables for sh_intc.c * stolen from linux/arch/sh/kernel/cpu/sh4/setup-sh7750.c */ enum { - UNUSED = 0, - - /* interrupt sources */ - IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6, IRL_7, - IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E, - IRL0, IRL1, IRL2, IRL3, - HUDI, GPIOI, - DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, DMAC_DMTE3, - DMAC_DMTE4, DMAC_DMTE5, DMAC_DMTE6, DMAC_DMTE7, - DMAC_DMAE, - PCIC0_PCISERR, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON, - PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3, - TMU3, TMU4, TMU0, TMU1, TMU2_TUNI, TMU2_TICPI, - RTC_ATI, RTC_PRI, RTC_CUI, - SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI, - SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI, - WDT, - REF_RCMI, REF_ROVI, - - /* interrupt groups */ - DMAC, PCIC1, TMU2, RTC, SCI1, SCIF, REF, - /* irl bundle */ - IRL, - - NR_SOURCES, + UNUSED = 0, + + /* interrupt sources */ + IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6, IRL_7, + IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E, + IRL0, IRL1, IRL2, IRL3, + HUDI, GPIOI, + DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, DMAC_DMTE3, + DMAC_DMTE4, DMAC_DMTE5, DMAC_DMTE6, DMAC_DMTE7, + DMAC_DMAE, + PCIC0_PCISERR, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON, + PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3, + TMU3, TMU4, TMU0, TMU1, TMU2_TUNI, TMU2_TICPI, + RTC_ATI, RTC_PRI, RTC_CUI, + SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI, + SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI, + WDT, + REF_RCMI, REF_ROVI, + + /* interrupt groups */ + DMAC, PCIC1, TMU2, RTC, SCI1, SCIF, REF, + /* irl bundle */ + IRL, + + NR_SOURCES, }; static struct intc_vect vectors[] = { - INTC_VECT(HUDI, 0x600), INTC_VECT(GPIOI, 0x620), - INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420), - INTC_VECT(TMU2_TUNI, 0x440), INTC_VECT(TMU2_TICPI, 0x460), - INTC_VECT(RTC_ATI, 0x480), INTC_VECT(RTC_PRI, 0x4a0), - INTC_VECT(RTC_CUI, 0x4c0), - INTC_VECT(SCI1_ERI, 0x4e0), INTC_VECT(SCI1_RXI, 0x500), - INTC_VECT(SCI1_TXI, 0x520), INTC_VECT(SCI1_TEI, 0x540), - INTC_VECT(SCIF_ERI, 0x700), INTC_VECT(SCIF_RXI, 0x720), - INTC_VECT(SCIF_BRI, 0x740), INTC_VECT(SCIF_TXI, 0x760), - INTC_VECT(WDT, 0x560), - INTC_VECT(REF_RCMI, 0x580), INTC_VECT(REF_ROVI, 0x5a0), + INTC_VECT(HUDI, 0x600), INTC_VECT(GPIOI, 0x620), + INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420), + INTC_VECT(TMU2_TUNI, 0x440), INTC_VECT(TMU2_TICPI, 0x460), + INTC_VECT(RTC_ATI, 0x480), INTC_VECT(RTC_PRI, 0x4a0), + INTC_VECT(RTC_CUI, 0x4c0), + INTC_VECT(SCI1_ERI, 0x4e0), INTC_VECT(SCI1_RXI, 0x500), + INTC_VECT(SCI1_TXI, 0x520), INTC_VECT(SCI1_TEI, 0x540), + INTC_VECT(SCIF_ERI, 0x700), INTC_VECT(SCIF_RXI, 0x720), + INTC_VECT(SCIF_BRI, 0x740), INTC_VECT(SCIF_TXI, 0x760), + INTC_VECT(WDT, 0x560), + INTC_VECT(REF_RCMI, 0x580), INTC_VECT(REF_ROVI, 0x5a0), }; static struct intc_group groups[] = { - INTC_GROUP(TMU2, TMU2_TUNI, TMU2_TICPI), - INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI), - INTC_GROUP(SCI1, SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI), - INTC_GROUP(SCIF, SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI), - INTC_GROUP(REF, REF_RCMI, REF_ROVI), + INTC_GROUP(TMU2, TMU2_TUNI, TMU2_TICPI), + INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI), + INTC_GROUP(SCI1, SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI), + INTC_GROUP(SCIF, SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI), + INTC_GROUP(REF, REF_RCMI, REF_ROVI), }; static struct intc_prio_reg prio_registers[] = { - { 0xffd00004, 0, 16, 4, /* IPRA */ { TMU0, TMU1, TMU2, RTC } }, - { 0xffd00008, 0, 16, 4, /* IPRB */ { WDT, REF, SCI1, 0 } }, - { 0xffd0000c, 0, 16, 4, /* IPRC */ { GPIOI, DMAC, SCIF, HUDI } }, - { 0xffd00010, 0, 16, 4, /* IPRD */ { IRL0, IRL1, IRL2, IRL3 } }, - { 0xfe080000, 0, 32, 4, /* INTPRI00 */ { 0, 0, 0, 0, - TMU4, TMU3, - PCIC1, PCIC0_PCISERR } }, + { 0xffd00004, 0, 16, 4, /* IPRA */ { TMU0, TMU1, TMU2, RTC } }, + { 0xffd00008, 0, 16, 4, /* IPRB */ { WDT, REF, SCI1, 0 } }, + { 0xffd0000c, 0, 16, 4, /* IPRC */ { GPIOI, DMAC, SCIF, HUDI } }, + { 0xffd00010, 0, 16, 4, /* IPRD */ { IRL0, IRL1, IRL2, IRL3 } }, + { 0xfe080000, 0, 32, 4, /* INTPRI00 */ { 0, 0, 0, 0, TMU4, TMU3, + PCIC1, PCIC0_PCISERR } }, }; /* SH7750, SH7750S, SH7751 and SH7091 all have 4-channel DMA controllers */ static struct intc_vect vectors_dma4[] = { - INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660), - INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0), - INTC_VECT(DMAC_DMAE, 0x6c0), + INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660), + INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0), + INTC_VECT(DMAC_DMAE, 0x6c0), }; static struct intc_group groups_dma4[] = { - INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, - DMAC_DMTE3, DMAC_DMAE), + INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, + DMAC_DMTE3, DMAC_DMAE), }; /* SH7750R and SH7751R both have 8-channel DMA controllers */ static struct intc_vect vectors_dma8[] = { - INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660), - INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0), - INTC_VECT(DMAC_DMTE4, 0x780), INTC_VECT(DMAC_DMTE5, 0x7a0), - INTC_VECT(DMAC_DMTE6, 0x7c0), INTC_VECT(DMAC_DMTE7, 0x7e0), - INTC_VECT(DMAC_DMAE, 0x6c0), + INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660), + INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0), + INTC_VECT(DMAC_DMTE4, 0x780), INTC_VECT(DMAC_DMTE5, 0x7a0), + INTC_VECT(DMAC_DMTE6, 0x7c0), INTC_VECT(DMAC_DMTE7, 0x7e0), + INTC_VECT(DMAC_DMAE, 0x6c0), }; static struct intc_group groups_dma8[] = { - INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, - DMAC_DMTE3, DMAC_DMTE4, DMAC_DMTE5, - DMAC_DMTE6, DMAC_DMTE7, DMAC_DMAE), + INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, + DMAC_DMTE3, DMAC_DMTE4, DMAC_DMTE5, + DMAC_DMTE6, DMAC_DMTE7, DMAC_DMAE), }; /* SH7750R, SH7751 and SH7751R all have two extra timer channels */ static struct intc_vect vectors_tmu34[] = { - INTC_VECT(TMU3, 0xb00), INTC_VECT(TMU4, 0xb80), + INTC_VECT(TMU3, 0xb00), INTC_VECT(TMU4, 0xb80), }; static struct intc_mask_reg mask_registers[] = { - { 0xfe080040, 0xfe080060, 32, /* INTMSK00 / INTMSKCLR00 */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, TMU4, TMU3, - PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON, - PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, - PCIC1_PCIDMA3, PCIC0_PCISERR } }, + { 0xfe080040, 0xfe080060, 32, /* INTMSK00 / INTMSKCLR00 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, TMU4, TMU3, + PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON, + PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, + PCIC1_PCIDMA3, PCIC0_PCISERR } }, }; /* SH7750S, SH7750R, SH7751 and SH7751R all have IRLM priority registers */ static struct intc_vect vectors_irlm[] = { - INTC_VECT(IRL0, 0x240), INTC_VECT(IRL1, 0x2a0), - INTC_VECT(IRL2, 0x300), INTC_VECT(IRL3, 0x360), + INTC_VECT(IRL0, 0x240), INTC_VECT(IRL1, 0x2a0), + INTC_VECT(IRL2, 0x300), INTC_VECT(IRL3, 0x360), }; /* SH7751 and SH7751R both have PCI */ static struct intc_vect vectors_pci[] = { - INTC_VECT(PCIC0_PCISERR, 0xa00), INTC_VECT(PCIC1_PCIERR, 0xae0), - INTC_VECT(PCIC1_PCIPWDWN, 0xac0), INTC_VECT(PCIC1_PCIPWON, 0xaa0), - INTC_VECT(PCIC1_PCIDMA0, 0xa80), INTC_VECT(PCIC1_PCIDMA1, 0xa60), - INTC_VECT(PCIC1_PCIDMA2, 0xa40), INTC_VECT(PCIC1_PCIDMA3, 0xa20), + INTC_VECT(PCIC0_PCISERR, 0xa00), INTC_VECT(PCIC1_PCIERR, 0xae0), + INTC_VECT(PCIC1_PCIPWDWN, 0xac0), INTC_VECT(PCIC1_PCIPWON, 0xaa0), + INTC_VECT(PCIC1_PCIDMA0, 0xa80), INTC_VECT(PCIC1_PCIDMA1, 0xa60), + INTC_VECT(PCIC1_PCIDMA2, 0xa40), INTC_VECT(PCIC1_PCIDMA3, 0xa20), }; static struct intc_group groups_pci[] = { - INTC_GROUP(PCIC1, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON, - PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3), + INTC_GROUP(PCIC1, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON, + PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3), }; static struct intc_vect vectors_irl[] = { - INTC_VECT(IRL_0, 0x200), - INTC_VECT(IRL_1, 0x220), - INTC_VECT(IRL_2, 0x240), - INTC_VECT(IRL_3, 0x260), - INTC_VECT(IRL_4, 0x280), - INTC_VECT(IRL_5, 0x2a0), - INTC_VECT(IRL_6, 0x2c0), - INTC_VECT(IRL_7, 0x2e0), - INTC_VECT(IRL_8, 0x300), - INTC_VECT(IRL_9, 0x320), - INTC_VECT(IRL_A, 0x340), - INTC_VECT(IRL_B, 0x360), - INTC_VECT(IRL_C, 0x380), - INTC_VECT(IRL_D, 0x3a0), - INTC_VECT(IRL_E, 0x3c0), + INTC_VECT(IRL_0, 0x200), + INTC_VECT(IRL_1, 0x220), + INTC_VECT(IRL_2, 0x240), + INTC_VECT(IRL_3, 0x260), + INTC_VECT(IRL_4, 0x280), + INTC_VECT(IRL_5, 0x2a0), + INTC_VECT(IRL_6, 0x2c0), + INTC_VECT(IRL_7, 0x2e0), + INTC_VECT(IRL_8, 0x300), + INTC_VECT(IRL_9, 0x320), + INTC_VECT(IRL_A, 0x340), + INTC_VECT(IRL_B, 0x360), + INTC_VECT(IRL_C, 0x380), + INTC_VECT(IRL_D, 0x3a0), + INTC_VECT(IRL_E, 0x3c0), }; static struct intc_group groups_irl[] = { - INTC_GROUP(IRL, IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6, - IRL_7, IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E), + INTC_GROUP(IRL, IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6, + IRL_7, IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E), }; -/********************************************************************** - Memory mapped cache and TLB -**********************************************************************/ +/* + * Memory mapped cache and TLB + */ #define MM_REGION_MASK 0x07000000 #define MM_ICACHE_ADDR (0) @@ -680,7 +690,7 @@ static uint64_t sh7750_mmct_read(void *opaque, hwaddr addr, case MM_ICACHE_ADDR: case MM_ICACHE_DATA: /* do nothing */ - break; + break; case MM_ITLB_ADDR: ret = cpu_sh4_read_mmaped_itlb_addr(&s->cpu->env, addr); break; @@ -690,7 +700,7 @@ static uint64_t sh7750_mmct_read(void *opaque, hwaddr addr, case MM_OCACHE_ADDR: case MM_OCACHE_DATA: /* do nothing */ - break; + break; case MM_UTLB_ADDR: ret = cpu_sh4_read_mmaped_utlb_addr(&s->cpu->env, addr); break; @@ -723,27 +733,27 @@ static void sh7750_mmct_write(void *opaque, hwaddr addr, case MM_ICACHE_ADDR: case MM_ICACHE_DATA: /* do nothing */ - break; + break; case MM_ITLB_ADDR: cpu_sh4_write_mmaped_itlb_addr(&s->cpu->env, addr, mem_value); break; case MM_ITLB_DATA: cpu_sh4_write_mmaped_itlb_data(&s->cpu->env, addr, mem_value); abort(); - break; + break; case MM_OCACHE_ADDR: case MM_OCACHE_DATA: /* do nothing */ - break; + break; case MM_UTLB_ADDR: cpu_sh4_write_mmaped_utlb_addr(&s->cpu->env, addr, mem_value); - break; + break; case MM_UTLB_DATA: cpu_sh4_write_mmaped_utlb_data(&s->cpu->env, addr, mem_value); - break; + break; default: abort(); - break; + break; } } @@ -756,10 +766,13 @@ static const MemoryRegionOps sh7750_mmct_ops = { SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem) { SH7750State *s; + DeviceState *dev; + SysBusDevice *sb; + MemoryRegion *mr, *alias; s = g_malloc0(sizeof(SH7750State)); s->cpu = cpu; - s->periph_freq = 60000000; /* 60MHz */ + s->periph_freq = 60000000; /* 60MHz */ memory_region_init_io(&s->iomem, NULL, &sh7750_mem_ops, s, "memory", 0x1fc01000); @@ -792,81 +805,100 @@ SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem) memory_region_add_subregion(sysmem, 0xf0000000, &s->mmct_iomem); sh_intc_init(sysmem, &s->intc, NR_SOURCES, - _INTC_ARRAY(mask_registers), - _INTC_ARRAY(prio_registers)); + _INTC_ARRAY(mask_registers), + _INTC_ARRAY(prio_registers)); sh_intc_register_sources(&s->intc, - _INTC_ARRAY(vectors), - _INTC_ARRAY(groups)); + _INTC_ARRAY(vectors), + _INTC_ARRAY(groups)); cpu->env.intc_handle = &s->intc; - sh_serial_init(sysmem, 0x1fe00000, - 0, s->periph_freq, serial_hd(0), - s->intc.irqs[SCI1_ERI], - s->intc.irqs[SCI1_RXI], - s->intc.irqs[SCI1_TXI], - s->intc.irqs[SCI1_TEI], - NULL); - sh_serial_init(sysmem, 0x1fe80000, - SH_SERIAL_FEAT_SCIF, - s->periph_freq, serial_hd(1), - s->intc.irqs[SCIF_ERI], - s->intc.irqs[SCIF_RXI], - s->intc.irqs[SCIF_TXI], - NULL, - s->intc.irqs[SCIF_BRI]); + /* SCI */ + dev = qdev_new(TYPE_SH_SERIAL); + dev->id = g_strdup("sci"); + qdev_prop_set_chr(dev, "chardev", serial_hd(0)); + sb = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(sb, &error_fatal); + sysbus_mmio_map(sb, 0, 0xffe00000); + alias = g_malloc(sizeof(*alias)); + mr = sysbus_mmio_get_region(sb, 0); + memory_region_init_alias(alias, OBJECT(dev), "sci-a7", mr, + 0, memory_region_size(mr)); + memory_region_add_subregion(sysmem, A7ADDR(0xffe00000), alias); + qdev_connect_gpio_out_named(dev, "eri", 0, s->intc.irqs[SCI1_ERI]); + qdev_connect_gpio_out_named(dev, "rxi", 0, s->intc.irqs[SCI1_RXI]); + qdev_connect_gpio_out_named(dev, "txi", 0, s->intc.irqs[SCI1_TXI]); + qdev_connect_gpio_out_named(dev, "tei", 0, s->intc.irqs[SCI1_TEI]); + + /* SCIF */ + dev = qdev_new(TYPE_SH_SERIAL); + dev->id = g_strdup("scif"); + qdev_prop_set_chr(dev, "chardev", serial_hd(1)); + qdev_prop_set_uint8(dev, "features", SH_SERIAL_FEAT_SCIF); + sb = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(sb, &error_fatal); + sysbus_mmio_map(sb, 0, 0xffe80000); + alias = g_malloc(sizeof(*alias)); + mr = sysbus_mmio_get_region(sb, 0); + memory_region_init_alias(alias, OBJECT(dev), "scif-a7", mr, + 0, memory_region_size(mr)); + memory_region_add_subregion(sysmem, A7ADDR(0xffe80000), alias); + qdev_connect_gpio_out_named(dev, "eri", 0, s->intc.irqs[SCIF_ERI]); + qdev_connect_gpio_out_named(dev, "rxi", 0, s->intc.irqs[SCIF_RXI]); + qdev_connect_gpio_out_named(dev, "txi", 0, s->intc.irqs[SCIF_TXI]); + qdev_connect_gpio_out_named(dev, "bri", 0, s->intc.irqs[SCIF_BRI]); tmu012_init(sysmem, 0x1fd80000, - TMU012_FEAT_TOCR | TMU012_FEAT_3CHAN | TMU012_FEAT_EXTCLK, - s->periph_freq, - s->intc.irqs[TMU0], - s->intc.irqs[TMU1], - s->intc.irqs[TMU2_TUNI], - s->intc.irqs[TMU2_TICPI]); + TMU012_FEAT_TOCR | TMU012_FEAT_3CHAN | TMU012_FEAT_EXTCLK, + s->periph_freq, + s->intc.irqs[TMU0], + s->intc.irqs[TMU1], + s->intc.irqs[TMU2_TUNI], + s->intc.irqs[TMU2_TICPI]); if (cpu->env.id & (SH_CPU_SH7750 | SH_CPU_SH7750S | SH_CPU_SH7751)) { sh_intc_register_sources(&s->intc, - _INTC_ARRAY(vectors_dma4), - _INTC_ARRAY(groups_dma4)); + _INTC_ARRAY(vectors_dma4), + _INTC_ARRAY(groups_dma4)); } if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751R)) { sh_intc_register_sources(&s->intc, - _INTC_ARRAY(vectors_dma8), - _INTC_ARRAY(groups_dma8)); + _INTC_ARRAY(vectors_dma8), + _INTC_ARRAY(groups_dma8)); } if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751 | SH_CPU_SH7751R)) { sh_intc_register_sources(&s->intc, - _INTC_ARRAY(vectors_tmu34), - NULL, 0); + _INTC_ARRAY(vectors_tmu34), + NULL, 0); tmu012_init(sysmem, 0x1e100000, 0, s->periph_freq, - s->intc.irqs[TMU3], - s->intc.irqs[TMU4], - NULL, NULL); + s->intc.irqs[TMU3], + s->intc.irqs[TMU4], + NULL, NULL); } if (cpu->env.id & (SH_CPU_SH7751_ALL)) { sh_intc_register_sources(&s->intc, - _INTC_ARRAY(vectors_pci), - _INTC_ARRAY(groups_pci)); + _INTC_ARRAY(vectors_pci), + _INTC_ARRAY(groups_pci)); } if (cpu->env.id & (SH_CPU_SH7750S | SH_CPU_SH7750R | SH_CPU_SH7751_ALL)) { sh_intc_register_sources(&s->intc, - _INTC_ARRAY(vectors_irlm), - NULL, 0); + _INTC_ARRAY(vectors_irlm), + NULL, 0); } sh_intc_register_sources(&s->intc, - _INTC_ARRAY(vectors_irl), - _INTC_ARRAY(groups_irl)); + _INTC_ARRAY(vectors_irl), + _INTC_ARRAY(groups_irl)); return s; } qemu_irq sh7750_irl(SH7750State *s) { - sh_intc_toggle_source(sh_intc_source(&s->intc, IRL), 1, 0); /* enable */ - return qemu_allocate_irq(sh_intc_set_irl, sh_intc_source(&s->intc, IRL), 0); + sh_intc_toggle_source(&s->intc.sources[IRL], 1, 0); /* enable */ + return qemu_allocate_irq(sh_intc_set_irl, &s->intc.sources[IRL], 0); } diff --git a/hw/sh4/sh7750_regnames.c b/hw/sh4/sh7750_regnames.c index 0630fe3cf4a..e531d46a8ed 100644 --- a/hw/sh4/sh7750_regnames.c +++ b/hw/sh4/sh7750_regnames.c @@ -12,85 +12,87 @@ typedef struct { static regname_t regnames[] = { REGNAME(SH7750_PTEH_A7) - REGNAME(SH7750_PTEL_A7) - REGNAME(SH7750_PTEA_A7) - REGNAME(SH7750_TTB_A7) - REGNAME(SH7750_TEA_A7) - REGNAME(SH7750_MMUCR_A7) - REGNAME(SH7750_CCR_A7) - REGNAME(SH7750_QACR0_A7) - REGNAME(SH7750_QACR1_A7) - REGNAME(SH7750_TRA_A7) - REGNAME(SH7750_EXPEVT_A7) - REGNAME(SH7750_INTEVT_A7) - REGNAME(SH7750_STBCR_A7) - REGNAME(SH7750_STBCR2_A7) - REGNAME(SH7750_FRQCR_A7) - REGNAME(SH7750_WTCNT_A7) - REGNAME(SH7750_WTCSR_A7) - REGNAME(SH7750_R64CNT_A7) - REGNAME(SH7750_RSECCNT_A7) - REGNAME(SH7750_RMINCNT_A7) - REGNAME(SH7750_RHRCNT_A7) - REGNAME(SH7750_RWKCNT_A7) - REGNAME(SH7750_RDAYCNT_A7) - REGNAME(SH7750_RMONCNT_A7) - REGNAME(SH7750_RYRCNT_A7) - REGNAME(SH7750_RSECAR_A7) - REGNAME(SH7750_RMINAR_A7) - REGNAME(SH7750_RHRAR_A7) - REGNAME(SH7750_RWKAR_A7) - REGNAME(SH7750_RDAYAR_A7) - REGNAME(SH7750_RMONAR_A7) - REGNAME(SH7750_RCR1_A7) - REGNAME(SH7750_RCR2_A7) - REGNAME(SH7750_BCR1_A7) - REGNAME(SH7750_BCR2_A7) - REGNAME(SH7750_WCR1_A7) - REGNAME(SH7750_WCR2_A7) - REGNAME(SH7750_WCR3_A7) - REGNAME(SH7750_MCR_A7) - REGNAME(SH7750_PCR_A7) - REGNAME(SH7750_RTCSR_A7) - REGNAME(SH7750_RTCNT_A7) - REGNAME(SH7750_RTCOR_A7) - REGNAME(SH7750_RFCR_A7) - REGNAME(SH7750_SAR0_A7) - REGNAME(SH7750_SAR1_A7) - REGNAME(SH7750_SAR2_A7) - REGNAME(SH7750_SAR3_A7) - REGNAME(SH7750_DAR0_A7) - REGNAME(SH7750_DAR1_A7) - REGNAME(SH7750_DAR2_A7) - REGNAME(SH7750_DAR3_A7) - REGNAME(SH7750_DMATCR0_A7) - REGNAME(SH7750_DMATCR1_A7) - REGNAME(SH7750_DMATCR2_A7) - REGNAME(SH7750_DMATCR3_A7) - REGNAME(SH7750_CHCR0_A7) - REGNAME(SH7750_CHCR1_A7) - REGNAME(SH7750_CHCR2_A7) - REGNAME(SH7750_CHCR3_A7) - REGNAME(SH7750_DMAOR_A7) - REGNAME(SH7750_PCTRA_A7) - REGNAME(SH7750_PDTRA_A7) - REGNAME(SH7750_PCTRB_A7) - REGNAME(SH7750_PDTRB_A7) - REGNAME(SH7750_GPIOIC_A7) - REGNAME(SH7750_ICR_A7) - REGNAME(SH7750_BCR3_A7) - REGNAME(SH7750_BCR4_A7) - REGNAME(SH7750_SDMR2_A7) - REGNAME(SH7750_SDMR3_A7) {(uint32_t) - 1, NULL} + REGNAME(SH7750_PTEL_A7) + REGNAME(SH7750_PTEA_A7) + REGNAME(SH7750_TTB_A7) + REGNAME(SH7750_TEA_A7) + REGNAME(SH7750_MMUCR_A7) + REGNAME(SH7750_CCR_A7) + REGNAME(SH7750_QACR0_A7) + REGNAME(SH7750_QACR1_A7) + REGNAME(SH7750_TRA_A7) + REGNAME(SH7750_EXPEVT_A7) + REGNAME(SH7750_INTEVT_A7) + REGNAME(SH7750_STBCR_A7) + REGNAME(SH7750_STBCR2_A7) + REGNAME(SH7750_FRQCR_A7) + REGNAME(SH7750_WTCNT_A7) + REGNAME(SH7750_WTCSR_A7) + REGNAME(SH7750_R64CNT_A7) + REGNAME(SH7750_RSECCNT_A7) + REGNAME(SH7750_RMINCNT_A7) + REGNAME(SH7750_RHRCNT_A7) + REGNAME(SH7750_RWKCNT_A7) + REGNAME(SH7750_RDAYCNT_A7) + REGNAME(SH7750_RMONCNT_A7) + REGNAME(SH7750_RYRCNT_A7) + REGNAME(SH7750_RSECAR_A7) + REGNAME(SH7750_RMINAR_A7) + REGNAME(SH7750_RHRAR_A7) + REGNAME(SH7750_RWKAR_A7) + REGNAME(SH7750_RDAYAR_A7) + REGNAME(SH7750_RMONAR_A7) + REGNAME(SH7750_RCR1_A7) + REGNAME(SH7750_RCR2_A7) + REGNAME(SH7750_BCR1_A7) + REGNAME(SH7750_BCR2_A7) + REGNAME(SH7750_WCR1_A7) + REGNAME(SH7750_WCR2_A7) + REGNAME(SH7750_WCR3_A7) + REGNAME(SH7750_MCR_A7) + REGNAME(SH7750_PCR_A7) + REGNAME(SH7750_RTCSR_A7) + REGNAME(SH7750_RTCNT_A7) + REGNAME(SH7750_RTCOR_A7) + REGNAME(SH7750_RFCR_A7) + REGNAME(SH7750_SAR0_A7) + REGNAME(SH7750_SAR1_A7) + REGNAME(SH7750_SAR2_A7) + REGNAME(SH7750_SAR3_A7) + REGNAME(SH7750_DAR0_A7) + REGNAME(SH7750_DAR1_A7) + REGNAME(SH7750_DAR2_A7) + REGNAME(SH7750_DAR3_A7) + REGNAME(SH7750_DMATCR0_A7) + REGNAME(SH7750_DMATCR1_A7) + REGNAME(SH7750_DMATCR2_A7) + REGNAME(SH7750_DMATCR3_A7) + REGNAME(SH7750_CHCR0_A7) + REGNAME(SH7750_CHCR1_A7) + REGNAME(SH7750_CHCR2_A7) + REGNAME(SH7750_CHCR3_A7) + REGNAME(SH7750_DMAOR_A7) + REGNAME(SH7750_PCTRA_A7) + REGNAME(SH7750_PDTRA_A7) + REGNAME(SH7750_PCTRB_A7) + REGNAME(SH7750_PDTRB_A7) + REGNAME(SH7750_GPIOIC_A7) + REGNAME(SH7750_ICR_A7) + REGNAME(SH7750_BCR3_A7) + REGNAME(SH7750_BCR4_A7) + REGNAME(SH7750_SDMR2_A7) + REGNAME(SH7750_SDMR3_A7) + { (uint32_t)-1, NULL } }; const char *regname(uint32_t addr) { unsigned int i; - for (i = 0; regnames[i].regaddr != (uint32_t) - 1; i++) { - if (regnames[i].regaddr == addr) - return regnames[i].regname; + for (i = 0; regnames[i].regaddr != (uint32_t)-1; i++) { + if (regnames[i].regaddr == addr) { + return regnames[i].regname; + } } return ""; diff --git a/hw/sh4/sh7750_regs.h b/hw/sh4/sh7750_regs.h index ab073dadc74..beb571d5e9b 100644 --- a/hw/sh4/sh7750_regs.h +++ b/hw/sh4/sh7750_regs.h @@ -43,9 +43,8 @@ * All register has 2 addresses: in 0xff000000 - 0xffffffff (P4 address) and * in 0x1f000000 - 0x1fffffff (area 7 address) */ -#define SH7750_P4_BASE 0xff000000 /* Accessible only in - privileged mode */ -#define SH7750_A7_BASE 0x1f000000 /* Accessible only using TLB */ +#define SH7750_P4_BASE 0xff000000 /* Accessible only in privileged mode */ +#define SH7750_A7_BASE 0x1f000000 /* Accessible only using TLB */ #define SH7750_P4_REG32(ofs) (SH7750_P4_BASE + (ofs)) #define SH7750_A7_REG32(ofs) (SH7750_A7_BASE + (ofs)) @@ -55,84 +54,84 @@ */ /* Page Table Entry High register - PTEH */ -#define SH7750_PTEH_REGOFS 0x000000 /* offset */ +#define SH7750_PTEH_REGOFS 0x000000 /* offset */ #define SH7750_PTEH SH7750_P4_REG32(SH7750_PTEH_REGOFS) #define SH7750_PTEH_A7 SH7750_A7_REG32(SH7750_PTEH_REGOFS) -#define SH7750_PTEH_VPN 0xfffffd00 /* Virtual page number */ +#define SH7750_PTEH_VPN 0xfffffd00 /* Virtual page number */ #define SH7750_PTEH_VPN_S 10 -#define SH7750_PTEH_ASID 0x000000ff /* Address space identifier */ +#define SH7750_PTEH_ASID 0x000000ff /* Address space identifier */ #define SH7750_PTEH_ASID_S 0 /* Page Table Entry Low register - PTEL */ -#define SH7750_PTEL_REGOFS 0x000004 /* offset */ +#define SH7750_PTEL_REGOFS 0x000004 /* offset */ #define SH7750_PTEL SH7750_P4_REG32(SH7750_PTEL_REGOFS) #define SH7750_PTEL_A7 SH7750_A7_REG32(SH7750_PTEL_REGOFS) -#define SH7750_PTEL_PPN 0x1ffffc00 /* Physical page number */ +#define SH7750_PTEL_PPN 0x1ffffc00 /* Physical page number */ #define SH7750_PTEL_PPN_S 10 -#define SH7750_PTEL_V 0x00000100 /* Validity (0-entry is invalid) */ -#define SH7750_PTEL_SZ1 0x00000080 /* Page size bit 1 */ -#define SH7750_PTEL_SZ0 0x00000010 /* Page size bit 0 */ -#define SH7750_PTEL_SZ_1KB 0x00000000 /* 1-kbyte page */ -#define SH7750_PTEL_SZ_4KB 0x00000010 /* 4-kbyte page */ -#define SH7750_PTEL_SZ_64KB 0x00000080 /* 64-kbyte page */ -#define SH7750_PTEL_SZ_1MB 0x00000090 /* 1-Mbyte page */ -#define SH7750_PTEL_PR 0x00000060 /* Protection Key Data */ -#define SH7750_PTEL_PR_ROPO 0x00000000 /* read-only in priv mode */ -#define SH7750_PTEL_PR_RWPO 0x00000020 /* read-write in priv mode */ -#define SH7750_PTEL_PR_ROPU 0x00000040 /* read-only in priv or user mode */ -#define SH7750_PTEL_PR_RWPU 0x00000060 /* read-write in priv or user mode */ -#define SH7750_PTEL_C 0x00000008 /* Cacheability - (0 - page not cacheable) */ -#define SH7750_PTEL_D 0x00000004 /* Dirty bit (1 - write has been - performed to a page) */ -#define SH7750_PTEL_SH 0x00000002 /* Share Status bit (1 - page are - shared by processes) */ -#define SH7750_PTEL_WT 0x00000001 /* Write-through bit, specifies the - cache write mode: - 0 - Copy-back mode - 1 - Write-through mode */ +#define SH7750_PTEL_V 0x00000100 /* Validity (0-entry is invalid) */ +#define SH7750_PTEL_SZ1 0x00000080 /* Page size bit 1 */ +#define SH7750_PTEL_SZ0 0x00000010 /* Page size bit 0 */ +#define SH7750_PTEL_SZ_1KB 0x00000000 /* 1-kbyte page */ +#define SH7750_PTEL_SZ_4KB 0x00000010 /* 4-kbyte page */ +#define SH7750_PTEL_SZ_64KB 0x00000080 /* 64-kbyte page */ +#define SH7750_PTEL_SZ_1MB 0x00000090 /* 1-Mbyte page */ +#define SH7750_PTEL_PR 0x00000060 /* Protection Key Data */ +#define SH7750_PTEL_PR_ROPO 0x00000000 /* read-only in priv mode */ +#define SH7750_PTEL_PR_RWPO 0x00000020 /* read-write in priv mode */ +#define SH7750_PTEL_PR_ROPU 0x00000040 /* read-only in priv or user mode */ +#define SH7750_PTEL_PR_RWPU 0x00000060 /* read-write in priv or user mode */ +#define SH7750_PTEL_C 0x00000008 /* Cacheability */ + /* (0 - page not cacheable) */ +#define SH7750_PTEL_D 0x00000004 /* Dirty bit (1 - write has been */ + /* performed to a page) */ +#define SH7750_PTEL_SH 0x00000002 /* Share Status bit (1 - page are */ + /* shared by processes) */ +#define SH7750_PTEL_WT 0x00000001 /* Write-through bit, specifies the */ + /* cache write mode: */ + /* 0 - Copy-back mode */ + /* 1 - Write-through mode */ /* Page Table Entry Assistance register - PTEA */ -#define SH7750_PTEA_REGOFS 0x000034 /* offset */ +#define SH7750_PTEA_REGOFS 0x000034 /* offset */ #define SH7750_PTEA SH7750_P4_REG32(SH7750_PTEA_REGOFS) #define SH7750_PTEA_A7 SH7750_A7_REG32(SH7750_PTEA_REGOFS) -#define SH7750_PTEA_TC 0x00000008 /* Timing Control bit - 0 - use area 5 wait states - 1 - use area 6 wait states */ -#define SH7750_PTEA_SA 0x00000007 /* Space Attribute bits: */ -#define SH7750_PTEA_SA_UNDEF 0x00000000 /* 0 - undefined */ -#define SH7750_PTEA_SA_IOVAR 0x00000001 /* 1 - variable-size I/O space */ -#define SH7750_PTEA_SA_IO8 0x00000002 /* 2 - 8-bit I/O space */ -#define SH7750_PTEA_SA_IO16 0x00000003 /* 3 - 16-bit I/O space */ -#define SH7750_PTEA_SA_CMEM8 0x00000004 /* 4 - 8-bit common memory space */ -#define SH7750_PTEA_SA_CMEM16 0x00000005 /* 5 - 16-bit common memory space */ -#define SH7750_PTEA_SA_AMEM8 0x00000006 /* 6 - 8-bit attr memory space */ -#define SH7750_PTEA_SA_AMEM16 0x00000007 /* 7 - 16-bit attr memory space */ +#define SH7750_PTEA_TC 0x00000008 /* Timing Control bit */ + /* 0 - use area 5 wait states */ + /* 1 - use area 6 wait states */ +#define SH7750_PTEA_SA 0x00000007 /* Space Attribute bits: */ +#define SH7750_PTEA_SA_UNDEF 0x00000000 /* 0 - undefined */ +#define SH7750_PTEA_SA_IOVAR 0x00000001 /* 1 - variable-size I/O space */ +#define SH7750_PTEA_SA_IO8 0x00000002 /* 2 - 8-bit I/O space */ +#define SH7750_PTEA_SA_IO16 0x00000003 /* 3 - 16-bit I/O space */ +#define SH7750_PTEA_SA_CMEM8 0x00000004 /* 4 - 8-bit common memory space */ +#define SH7750_PTEA_SA_CMEM16 0x00000005 /* 5 - 16-bit common memory space */ +#define SH7750_PTEA_SA_AMEM8 0x00000006 /* 6 - 8-bit attr memory space */ +#define SH7750_PTEA_SA_AMEM16 0x00000007 /* 7 - 16-bit attr memory space */ /* Translation table base register */ -#define SH7750_TTB_REGOFS 0x000008 /* offset */ +#define SH7750_TTB_REGOFS 0x000008 /* offset */ #define SH7750_TTB SH7750_P4_REG32(SH7750_TTB_REGOFS) #define SH7750_TTB_A7 SH7750_A7_REG32(SH7750_TTB_REGOFS) /* TLB exeption address register - TEA */ -#define SH7750_TEA_REGOFS 0x00000c /* offset */ +#define SH7750_TEA_REGOFS 0x00000c /* offset */ #define SH7750_TEA SH7750_P4_REG32(SH7750_TEA_REGOFS) #define SH7750_TEA_A7 SH7750_A7_REG32(SH7750_TEA_REGOFS) /* MMU control register - MMUCR */ -#define SH7750_MMUCR_REGOFS 0x000010 /* offset */ +#define SH7750_MMUCR_REGOFS 0x000010 /* offset */ #define SH7750_MMUCR SH7750_P4_REG32(SH7750_MMUCR_REGOFS) #define SH7750_MMUCR_A7 SH7750_A7_REG32(SH7750_MMUCR_REGOFS) -#define SH7750_MMUCR_AT 0x00000001 /* Address translation bit */ -#define SH7750_MMUCR_TI 0x00000004 /* TLB invalidate */ -#define SH7750_MMUCR_SV 0x00000100 /* Single Virtual Mode bit */ -#define SH7750_MMUCR_SQMD 0x00000200 /* Store Queue Mode bit */ -#define SH7750_MMUCR_URC 0x0000FC00 /* UTLB Replace Counter */ +#define SH7750_MMUCR_AT 0x00000001 /* Address translation bit */ +#define SH7750_MMUCR_TI 0x00000004 /* TLB invalidate */ +#define SH7750_MMUCR_SV 0x00000100 /* Single Virtual Mode bit */ +#define SH7750_MMUCR_SQMD 0x00000200 /* Store Queue Mode bit */ +#define SH7750_MMUCR_URC 0x0000FC00 /* UTLB Replace Counter */ #define SH7750_MMUCR_URC_S 10 -#define SH7750_MMUCR_URB 0x00FC0000 /* UTLB Replace Boundary */ +#define SH7750_MMUCR_URB 0x00FC0000 /* UTLB Replace Boundary */ #define SH7750_MMUCR_URB_S 18 -#define SH7750_MMUCR_LRUI 0xFC000000 /* Least Recently Used ITLB */ +#define SH7750_MMUCR_LRUI 0xFC000000 /* Least Recently Used ITLB */ #define SH7750_MMUCR_LRUI_S 26 @@ -145,30 +144,30 @@ */ /* Cache Control Register - CCR */ -#define SH7750_CCR_REGOFS 0x00001c /* offset */ +#define SH7750_CCR_REGOFS 0x00001c /* offset */ #define SH7750_CCR SH7750_P4_REG32(SH7750_CCR_REGOFS) #define SH7750_CCR_A7 SH7750_A7_REG32(SH7750_CCR_REGOFS) -#define SH7750_CCR_IIX 0x00008000 /* IC index enable bit */ -#define SH7750_CCR_ICI 0x00000800 /* IC invalidation bit: - set it to clear IC */ -#define SH7750_CCR_ICE 0x00000100 /* IC enable bit */ -#define SH7750_CCR_OIX 0x00000080 /* OC index enable bit */ -#define SH7750_CCR_ORA 0x00000020 /* OC RAM enable bit - if you set OCE = 0, - you should set ORA = 0 */ -#define SH7750_CCR_OCI 0x00000008 /* OC invalidation bit */ -#define SH7750_CCR_CB 0x00000004 /* Copy-back bit for P1 area */ -#define SH7750_CCR_WT 0x00000002 /* Write-through bit for P0,U0,P3 area */ -#define SH7750_CCR_OCE 0x00000001 /* OC enable bit */ +#define SH7750_CCR_IIX 0x00008000 /* IC index enable bit */ +#define SH7750_CCR_ICI 0x00000800 /* IC invalidation bit: */ + /* set it to clear IC */ +#define SH7750_CCR_ICE 0x00000100 /* IC enable bit */ +#define SH7750_CCR_OIX 0x00000080 /* OC index enable bit */ +#define SH7750_CCR_ORA 0x00000020 /* OC RAM enable bit */ + /* if you set OCE = 0, */ + /* you should set ORA = 0 */ +#define SH7750_CCR_OCI 0x00000008 /* OC invalidation bit */ +#define SH7750_CCR_CB 0x00000004 /* Copy-back bit for P1 area */ +#define SH7750_CCR_WT 0x00000002 /* Write-through bit for P0,U0,P3 area */ +#define SH7750_CCR_OCE 0x00000001 /* OC enable bit */ /* Queue address control register 0 - QACR0 */ -#define SH7750_QACR0_REGOFS 0x000038 /* offset */ +#define SH7750_QACR0_REGOFS 0x000038 /* offset */ #define SH7750_QACR0 SH7750_P4_REG32(SH7750_QACR0_REGOFS) #define SH7750_QACR0_A7 SH7750_A7_REG32(SH7750_QACR0_REGOFS) /* Queue address control register 1 - QACR1 */ -#define SH7750_QACR1_REGOFS 0x00003c /* offset */ +#define SH7750_QACR1_REGOFS 0x00003c /* offset */ #define SH7750_QACR1 SH7750_P4_REG32(SH7750_QACR1_REGOFS) #define SH7750_QACR1_A7 SH7750_A7_REG32(SH7750_QACR1_REGOFS) @@ -178,11 +177,11 @@ */ /* Immediate data for TRAPA instruction - TRA */ -#define SH7750_TRA_REGOFS 0x000020 /* offset */ +#define SH7750_TRA_REGOFS 0x000020 /* offset */ #define SH7750_TRA SH7750_P4_REG32(SH7750_TRA_REGOFS) #define SH7750_TRA_A7 SH7750_A7_REG32(SH7750_TRA_REGOFS) -#define SH7750_TRA_IMM 0x000003fd /* Immediate data operand */ +#define SH7750_TRA_IMM 0x000003fd /* Immediate data operand */ #define SH7750_TRA_IMM_S 2 /* Exeption event register - EXPEVT */ @@ -190,14 +189,14 @@ #define SH7750_EXPEVT SH7750_P4_REG32(SH7750_EXPEVT_REGOFS) #define SH7750_EXPEVT_A7 SH7750_A7_REG32(SH7750_EXPEVT_REGOFS) -#define SH7750_EXPEVT_EX 0x00000fff /* Exeption code */ +#define SH7750_EXPEVT_EX 0x00000fff /* Exeption code */ #define SH7750_EXPEVT_EX_S 0 /* Interrupt event register */ #define SH7750_INTEVT_REGOFS 0x000028 #define SH7750_INTEVT SH7750_P4_REG32(SH7750_INTEVT_REGOFS) #define SH7750_INTEVT_A7 SH7750_A7_REG32(SH7750_INTEVT_REGOFS) -#define SH7750_INTEVT_EX 0x00000fff /* Exeption code */ +#define SH7750_INTEVT_EX 0x00000fff /* Exeption code */ #define SH7750_INTEVT_EX_S 0 /* @@ -206,683 +205,684 @@ #define SH7750_EVT_TO_NUM(evt) ((evt) >> 5) /* Reset exception category */ -#define SH7750_EVT_POWER_ON_RST 0x000 /* Power-on reset */ -#define SH7750_EVT_MANUAL_RST 0x020 /* Manual reset */ -#define SH7750_EVT_TLB_MULT_HIT 0x140 /* TLB multiple-hit exception */ +#define SH7750_EVT_POWER_ON_RST 0x000 /* Power-on reset */ +#define SH7750_EVT_MANUAL_RST 0x020 /* Manual reset */ +#define SH7750_EVT_TLB_MULT_HIT 0x140 /* TLB multiple-hit exception */ /* General exception category */ -#define SH7750_EVT_USER_BREAK 0x1E0 /* User break */ -#define SH7750_EVT_IADDR_ERR 0x0E0 /* Instruction address error */ -#define SH7750_EVT_TLB_READ_MISS 0x040 /* ITLB miss exception / - DTLB miss exception (read) */ -#define SH7750_EVT_TLB_READ_PROTV 0x0A0 /* ITLB protection violation / - DTLB protection violation (read) */ -#define SH7750_EVT_ILLEGAL_INSTR 0x180 /* General Illegal Instruction - exception */ -#define SH7750_EVT_SLOT_ILLEGAL_INSTR 0x1A0 /* Slot Illegal Instruction - exception */ -#define SH7750_EVT_FPU_DISABLE 0x800 /* General FPU disable exception */ -#define SH7750_EVT_SLOT_FPU_DISABLE 0x820 /* Slot FPU disable exception */ -#define SH7750_EVT_DATA_READ_ERR 0x0E0 /* Data address error (read) */ -#define SH7750_EVT_DATA_WRITE_ERR 0x100 /* Data address error (write) */ -#define SH7750_EVT_DTLB_WRITE_MISS 0x060 /* DTLB miss exception (write) */ -#define SH7750_EVT_DTLB_WRITE_PROTV 0x0C0 /* DTLB protection violation - exception (write) */ -#define SH7750_EVT_FPU_EXCEPTION 0x120 /* FPU exception */ -#define SH7750_EVT_INITIAL_PGWRITE 0x080 /* Initial Page Write exception */ -#define SH7750_EVT_TRAPA 0x160 /* Unconditional trap (TRAPA) */ +#define SH7750_EVT_USER_BREAK 0x1E0 /* User break */ +#define SH7750_EVT_IADDR_ERR 0x0E0 /* Instruction address error */ +#define SH7750_EVT_TLB_READ_MISS 0x040 /* ITLB miss exception / */ + /* DTLB miss exception (read) */ +#define SH7750_EVT_TLB_READ_PROTV 0x0A0 /* ITLB protection violation, */ + /* DTLB protection violation */ + /* (read) */ +#define SH7750_EVT_ILLEGAL_INSTR 0x180 /* General Illegal Instruction */ + /* exception */ +#define SH7750_EVT_SLOT_ILLEGAL_INSTR 0x1A0 /* Slot Illegal Instruction */ + /* exception */ +#define SH7750_EVT_FPU_DISABLE 0x800 /* General FPU disable exception */ +#define SH7750_EVT_SLOT_FPU_DISABLE 0x820 /* Slot FPU disable exception */ +#define SH7750_EVT_DATA_READ_ERR 0x0E0 /* Data address error (read) */ +#define SH7750_EVT_DATA_WRITE_ERR 0x100 /* Data address error (write) */ +#define SH7750_EVT_DTLB_WRITE_MISS 0x060 /* DTLB miss exception (write) */ +#define SH7750_EVT_DTLB_WRITE_PROTV 0x0C0 /* DTLB protection violation */ + /* exception (write) */ +#define SH7750_EVT_FPU_EXCEPTION 0x120 /* FPU exception */ +#define SH7750_EVT_INITIAL_PGWRITE 0x080 /* Initial Page Write exception */ +#define SH7750_EVT_TRAPA 0x160 /* Unconditional trap (TRAPA) */ /* Interrupt exception category */ -#define SH7750_EVT_NMI 0x1C0 /* Non-maskable interrupt */ -#define SH7750_EVT_IRQ0 0x200 /* External Interrupt 0 */ -#define SH7750_EVT_IRQ1 0x220 /* External Interrupt 1 */ -#define SH7750_EVT_IRQ2 0x240 /* External Interrupt 2 */ -#define SH7750_EVT_IRQ3 0x260 /* External Interrupt 3 */ -#define SH7750_EVT_IRQ4 0x280 /* External Interrupt 4 */ -#define SH7750_EVT_IRQ5 0x2A0 /* External Interrupt 5 */ -#define SH7750_EVT_IRQ6 0x2C0 /* External Interrupt 6 */ -#define SH7750_EVT_IRQ7 0x2E0 /* External Interrupt 7 */ -#define SH7750_EVT_IRQ8 0x300 /* External Interrupt 8 */ -#define SH7750_EVT_IRQ9 0x320 /* External Interrupt 9 */ -#define SH7750_EVT_IRQA 0x340 /* External Interrupt A */ -#define SH7750_EVT_IRQB 0x360 /* External Interrupt B */ -#define SH7750_EVT_IRQC 0x380 /* External Interrupt C */ -#define SH7750_EVT_IRQD 0x3A0 /* External Interrupt D */ -#define SH7750_EVT_IRQE 0x3C0 /* External Interrupt E */ +#define SH7750_EVT_NMI 0x1C0 /* Non-maskable interrupt */ +#define SH7750_EVT_IRQ0 0x200 /* External Interrupt 0 */ +#define SH7750_EVT_IRQ1 0x220 /* External Interrupt 1 */ +#define SH7750_EVT_IRQ2 0x240 /* External Interrupt 2 */ +#define SH7750_EVT_IRQ3 0x260 /* External Interrupt 3 */ +#define SH7750_EVT_IRQ4 0x280 /* External Interrupt 4 */ +#define SH7750_EVT_IRQ5 0x2A0 /* External Interrupt 5 */ +#define SH7750_EVT_IRQ6 0x2C0 /* External Interrupt 6 */ +#define SH7750_EVT_IRQ7 0x2E0 /* External Interrupt 7 */ +#define SH7750_EVT_IRQ8 0x300 /* External Interrupt 8 */ +#define SH7750_EVT_IRQ9 0x320 /* External Interrupt 9 */ +#define SH7750_EVT_IRQA 0x340 /* External Interrupt A */ +#define SH7750_EVT_IRQB 0x360 /* External Interrupt B */ +#define SH7750_EVT_IRQC 0x380 /* External Interrupt C */ +#define SH7750_EVT_IRQD 0x3A0 /* External Interrupt D */ +#define SH7750_EVT_IRQE 0x3C0 /* External Interrupt E */ /* Peripheral Module Interrupts - Timer Unit (TMU) */ -#define SH7750_EVT_TUNI0 0x400 /* TMU Underflow Interrupt 0 */ -#define SH7750_EVT_TUNI1 0x420 /* TMU Underflow Interrupt 1 */ -#define SH7750_EVT_TUNI2 0x440 /* TMU Underflow Interrupt 2 */ -#define SH7750_EVT_TICPI2 0x460 /* TMU Input Capture Interrupt 2 */ +#define SH7750_EVT_TUNI0 0x400 /* TMU Underflow Interrupt 0 */ +#define SH7750_EVT_TUNI1 0x420 /* TMU Underflow Interrupt 1 */ +#define SH7750_EVT_TUNI2 0x440 /* TMU Underflow Interrupt 2 */ +#define SH7750_EVT_TICPI2 0x460 /* TMU Input Capture Interrupt 2 */ /* Peripheral Module Interrupts - Real-Time Clock (RTC) */ -#define SH7750_EVT_RTC_ATI 0x480 /* Alarm Interrupt Request */ -#define SH7750_EVT_RTC_PRI 0x4A0 /* Periodic Interrupt Request */ -#define SH7750_EVT_RTC_CUI 0x4C0 /* Carry Interrupt Request */ +#define SH7750_EVT_RTC_ATI 0x480 /* Alarm Interrupt Request */ +#define SH7750_EVT_RTC_PRI 0x4A0 /* Periodic Interrupt Request */ +#define SH7750_EVT_RTC_CUI 0x4C0 /* Carry Interrupt Request */ /* Peripheral Module Interrupts - Serial Communication Interface (SCI) */ -#define SH7750_EVT_SCI_ERI 0x4E0 /* Receive Error */ -#define SH7750_EVT_SCI_RXI 0x500 /* Receive Data Register Full */ -#define SH7750_EVT_SCI_TXI 0x520 /* Transmit Data Register Empty */ -#define SH7750_EVT_SCI_TEI 0x540 /* Transmit End */ +#define SH7750_EVT_SCI_ERI 0x4E0 /* Receive Error */ +#define SH7750_EVT_SCI_RXI 0x500 /* Receive Data Register Full */ +#define SH7750_EVT_SCI_TXI 0x520 /* Transmit Data Register Empty */ +#define SH7750_EVT_SCI_TEI 0x540 /* Transmit End */ /* Peripheral Module Interrupts - Watchdog Timer (WDT) */ -#define SH7750_EVT_WDT_ITI 0x560 /* Interval Timer Interrupt - (used when WDT operates in - interval timer mode) */ +#define SH7750_EVT_WDT_ITI 0x560 /* Interval Timer Interrupt */ + /* (used when WDT operates in */ + /* interval timer mode) */ /* Peripheral Module Interrupts - Memory Refresh Unit (REF) */ -#define SH7750_EVT_REF_RCMI 0x580 /* Compare-match Interrupt */ -#define SH7750_EVT_REF_ROVI 0x5A0 /* Refresh Counter Overflow - interrupt */ +#define SH7750_EVT_REF_RCMI 0x580 /* Compare-match Interrupt */ +#define SH7750_EVT_REF_ROVI 0x5A0 /* Refresh Counter Overflow */ + /* interrupt */ /* Peripheral Module Interrupts - Hitachi User Debug Interface (H-UDI) */ -#define SH7750_EVT_HUDI 0x600 /* UDI interrupt */ +#define SH7750_EVT_HUDI 0x600 /* UDI interrupt */ /* Peripheral Module Interrupts - General-Purpose I/O (GPIO) */ -#define SH7750_EVT_GPIO 0x620 /* GPIO Interrupt */ +#define SH7750_EVT_GPIO 0x620 /* GPIO Interrupt */ /* Peripheral Module Interrupts - DMA Controller (DMAC) */ -#define SH7750_EVT_DMAC_DMTE0 0x640 /* DMAC 0 Transfer End Interrupt */ -#define SH7750_EVT_DMAC_DMTE1 0x660 /* DMAC 1 Transfer End Interrupt */ -#define SH7750_EVT_DMAC_DMTE2 0x680 /* DMAC 2 Transfer End Interrupt */ -#define SH7750_EVT_DMAC_DMTE3 0x6A0 /* DMAC 3 Transfer End Interrupt */ -#define SH7750_EVT_DMAC_DMAE 0x6C0 /* DMAC Address Error Interrupt */ - -/* Peripheral Module Interrupts - Serial Communication Interface with FIFO */ -/* (SCIF) */ -#define SH7750_EVT_SCIF_ERI 0x700 /* Receive Error */ -#define SH7750_EVT_SCIF_RXI 0x720 /* Receive FIFO Data Full or - Receive Data ready interrupt */ -#define SH7750_EVT_SCIF_BRI 0x740 /* Break or overrun error */ -#define SH7750_EVT_SCIF_TXI 0x760 /* Transmit FIFO Data Empty */ +#define SH7750_EVT_DMAC_DMTE0 0x640 /* DMAC 0 Transfer End Interrupt */ +#define SH7750_EVT_DMAC_DMTE1 0x660 /* DMAC 1 Transfer End Interrupt */ +#define SH7750_EVT_DMAC_DMTE2 0x680 /* DMAC 2 Transfer End Interrupt */ +#define SH7750_EVT_DMAC_DMTE3 0x6A0 /* DMAC 3 Transfer End Interrupt */ +#define SH7750_EVT_DMAC_DMAE 0x6C0 /* DMAC Address Error Interrupt */ + +/* Peripheral Module Interrupts Serial Communication Interface w/ FIFO (SCIF) */ +#define SH7750_EVT_SCIF_ERI 0x700 /* Receive Error */ +#define SH7750_EVT_SCIF_RXI 0x720 /* Receive FIFO Data Full or */ + /* Receive Data ready interrupt */ +#define SH7750_EVT_SCIF_BRI 0x740 /* Break or overrun error */ +#define SH7750_EVT_SCIF_TXI 0x760 /* Transmit FIFO Data Empty */ /* * Power Management */ -#define SH7750_STBCR_REGOFS 0xC00004 /* offset */ +#define SH7750_STBCR_REGOFS 0xC00004 /* offset */ #define SH7750_STBCR SH7750_P4_REG32(SH7750_STBCR_REGOFS) #define SH7750_STBCR_A7 SH7750_A7_REG32(SH7750_STBCR_REGOFS) -#define SH7750_STBCR_STBY 0x80 /* Specifies a transition to standby mode: - 0 - Transition to SLEEP mode on SLEEP - 1 - Transition to STANDBY mode on SLEEP */ -#define SH7750_STBCR_PHZ 0x40 /* State of peripheral module pins in - standby mode: - 0 - normal state - 1 - high-impendance state */ +#define SH7750_STBCR_STBY 0x80 /* Specifies a transition to standby mode: */ + /* 0 Transition to SLEEP mode on SLEEP */ + /* 1 Transition to STANDBY mode on SLEEP */ +#define SH7750_STBCR_PHZ 0x40 /* State of peripheral module pins in */ + /* standby mode: */ + /* 0 normal state */ + /* 1 high-impendance state */ -#define SH7750_STBCR_PPU 0x20 /* Peripheral module pins pull-up controls */ -#define SH7750_STBCR_MSTP4 0x10 /* Stopping the clock supply to DMAC */ +#define SH7750_STBCR_PPU 0x20 /* Peripheral module pins pull-up controls */ +#define SH7750_STBCR_MSTP4 0x10 /* Stopping the clock supply to DMAC */ #define SH7750_STBCR_DMAC_STP SH7750_STBCR_MSTP4 -#define SH7750_STBCR_MSTP3 0x08 /* Stopping the clock supply to SCIF */ +#define SH7750_STBCR_MSTP3 0x08 /* Stopping the clock supply to SCIF */ #define SH7750_STBCR_SCIF_STP SH7750_STBCR_MSTP3 -#define SH7750_STBCR_MSTP2 0x04 /* Stopping the clock supply to TMU */ +#define SH7750_STBCR_MSTP2 0x04 /* Stopping the clock supply to TMU */ #define SH7750_STBCR_TMU_STP SH7750_STBCR_MSTP2 -#define SH7750_STBCR_MSTP1 0x02 /* Stopping the clock supply to RTC */ +#define SH7750_STBCR_MSTP1 0x02 /* Stopping the clock supply to RTC */ #define SH7750_STBCR_RTC_STP SH7750_STBCR_MSTP1 -#define SH7750_STBCR_MSPT0 0x01 /* Stopping the clock supply to SCI */ +#define SH7750_STBCR_MSPT0 0x01 /* Stopping the clock supply to SCI */ #define SH7750_STBCR_SCI_STP SH7750_STBCR_MSTP0 #define SH7750_STBCR_STBY 0x80 -#define SH7750_STBCR2_REGOFS 0xC00010 /* offset */ +#define SH7750_STBCR2_REGOFS 0xC00010 /* offset */ #define SH7750_STBCR2 SH7750_P4_REG32(SH7750_STBCR2_REGOFS) #define SH7750_STBCR2_A7 SH7750_A7_REG32(SH7750_STBCR2_REGOFS) -#define SH7750_STBCR2_DSLP 0x80 /* Specifies transition to deep sleep mode: - 0 - transition to sleep or standby mode - as it is specified in STBY bit - 1 - transition to deep sleep mode on - execution of SLEEP instruction */ -#define SH7750_STBCR2_MSTP6 0x02 /* Stopping the clock supply to Store Queue - in the cache controller */ +#define SH7750_STBCR2_DSLP 0x80 /* Specifies transition to deep sleep mode */ + /* 0 transition to sleep or standby mode */ + /* as it is specified in STBY bit */ + /* 1 transition to deep sleep mode on */ + /* execution of SLEEP instruction */ +#define SH7750_STBCR2_MSTP6 0x02 /* Stopping the clock supply to the */ + /* Store Queue in the cache controller */ #define SH7750_STBCR2_SQ_STP SH7750_STBCR2_MSTP6 -#define SH7750_STBCR2_MSTP5 0x01 /* Stopping the clock supply to the User - Break Controller (UBC) */ +#define SH7750_STBCR2_MSTP5 0x01 /* Stopping the clock supply to the */ + /* User Break Controller (UBC) */ #define SH7750_STBCR2_UBC_STP SH7750_STBCR2_MSTP5 /* * Clock Pulse Generator (CPG) */ -#define SH7750_FRQCR_REGOFS 0xC00000 /* offset */ +#define SH7750_FRQCR_REGOFS 0xC00000 /* offset */ #define SH7750_FRQCR SH7750_P4_REG32(SH7750_FRQCR_REGOFS) #define SH7750_FRQCR_A7 SH7750_A7_REG32(SH7750_FRQCR_REGOFS) -#define SH7750_FRQCR_CKOEN 0x0800 /* Clock Output Enable - 0 - CKIO pin goes to HiZ/pullup - 1 - Clock is output from CKIO */ -#define SH7750_FRQCR_PLL1EN 0x0400 /* PLL circuit 1 enable */ -#define SH7750_FRQCR_PLL2EN 0x0200 /* PLL circuit 2 enable */ - -#define SH7750_FRQCR_IFC 0x01C0 /* CPU clock frequency division ratio: */ -#define SH7750_FRQCR_IFCDIV1 0x0000 /* 0 - * 1 */ -#define SH7750_FRQCR_IFCDIV2 0x0040 /* 1 - * 1/2 */ -#define SH7750_FRQCR_IFCDIV3 0x0080 /* 2 - * 1/3 */ -#define SH7750_FRQCR_IFCDIV4 0x00C0 /* 3 - * 1/4 */ -#define SH7750_FRQCR_IFCDIV6 0x0100 /* 4 - * 1/6 */ -#define SH7750_FRQCR_IFCDIV8 0x0140 /* 5 - * 1/8 */ - -#define SH7750_FRQCR_BFC 0x0038 /* Bus clock frequency division ratio: */ -#define SH7750_FRQCR_BFCDIV1 0x0000 /* 0 - * 1 */ -#define SH7750_FRQCR_BFCDIV2 0x0008 /* 1 - * 1/2 */ -#define SH7750_FRQCR_BFCDIV3 0x0010 /* 2 - * 1/3 */ -#define SH7750_FRQCR_BFCDIV4 0x0018 /* 3 - * 1/4 */ -#define SH7750_FRQCR_BFCDIV6 0x0020 /* 4 - * 1/6 */ -#define SH7750_FRQCR_BFCDIV8 0x0028 /* 5 - * 1/8 */ - -#define SH7750_FRQCR_PFC 0x0007 /* Peripheral module clock frequency - division ratio: */ -#define SH7750_FRQCR_PFCDIV2 0x0000 /* 0 - * 1/2 */ -#define SH7750_FRQCR_PFCDIV3 0x0001 /* 1 - * 1/3 */ -#define SH7750_FRQCR_PFCDIV4 0x0002 /* 2 - * 1/4 */ -#define SH7750_FRQCR_PFCDIV6 0x0003 /* 3 - * 1/6 */ -#define SH7750_FRQCR_PFCDIV8 0x0004 /* 4 - * 1/8 */ +#define SH7750_FRQCR_CKOEN 0x0800 /* Clock Output Enable */ + /* 0 - CKIO pin goes to HiZ/pullup */ + /* 1 - Clock is output from CKIO */ +#define SH7750_FRQCR_PLL1EN 0x0400 /* PLL circuit 1 enable */ +#define SH7750_FRQCR_PLL2EN 0x0200 /* PLL circuit 2 enable */ + +#define SH7750_FRQCR_IFC 0x01C0 /* CPU clock frequency division ratio: */ +#define SH7750_FRQCR_IFCDIV1 0x0000 /* 0 - * 1 */ +#define SH7750_FRQCR_IFCDIV2 0x0040 /* 1 - * 1/2 */ +#define SH7750_FRQCR_IFCDIV3 0x0080 /* 2 - * 1/3 */ +#define SH7750_FRQCR_IFCDIV4 0x00C0 /* 3 - * 1/4 */ +#define SH7750_FRQCR_IFCDIV6 0x0100 /* 4 - * 1/6 */ +#define SH7750_FRQCR_IFCDIV8 0x0140 /* 5 - * 1/8 */ + +#define SH7750_FRQCR_BFC 0x0038 /* Bus clock frequency division ratio: */ +#define SH7750_FRQCR_BFCDIV1 0x0000 /* 0 - * 1 */ +#define SH7750_FRQCR_BFCDIV2 0x0008 /* 1 - * 1/2 */ +#define SH7750_FRQCR_BFCDIV3 0x0010 /* 2 - * 1/3 */ +#define SH7750_FRQCR_BFCDIV4 0x0018 /* 3 - * 1/4 */ +#define SH7750_FRQCR_BFCDIV6 0x0020 /* 4 - * 1/6 */ +#define SH7750_FRQCR_BFCDIV8 0x0028 /* 5 - * 1/8 */ + +#define SH7750_FRQCR_PFC 0x0007 /* Peripheral module clock frequency */ + /* division ratio: */ +#define SH7750_FRQCR_PFCDIV2 0x0000 /* 0 - * 1/2 */ +#define SH7750_FRQCR_PFCDIV3 0x0001 /* 1 - * 1/3 */ +#define SH7750_FRQCR_PFCDIV4 0x0002 /* 2 - * 1/4 */ +#define SH7750_FRQCR_PFCDIV6 0x0003 /* 3 - * 1/6 */ +#define SH7750_FRQCR_PFCDIV8 0x0004 /* 4 - * 1/8 */ /* * Watchdog Timer (WDT) */ /* Watchdog Timer Counter register - WTCNT */ -#define SH7750_WTCNT_REGOFS 0xC00008 /* offset */ +#define SH7750_WTCNT_REGOFS 0xC00008 /* offset */ #define SH7750_WTCNT SH7750_P4_REG32(SH7750_WTCNT_REGOFS) #define SH7750_WTCNT_A7 SH7750_A7_REG32(SH7750_WTCNT_REGOFS) -#define SH7750_WTCNT_KEY 0x5A00 /* When WTCNT byte register written, - you have to set the upper byte to - 0x5A */ +#define SH7750_WTCNT_KEY 0x5A00 /* When WTCNT byte register written, you */ + /* have to set the upper byte to 0x5A */ /* Watchdog Timer Control/Status register - WTCSR */ -#define SH7750_WTCSR_REGOFS 0xC0000C /* offset */ +#define SH7750_WTCSR_REGOFS 0xC0000C /* offset */ #define SH7750_WTCSR SH7750_P4_REG32(SH7750_WTCSR_REGOFS) #define SH7750_WTCSR_A7 SH7750_A7_REG32(SH7750_WTCSR_REGOFS) -#define SH7750_WTCSR_KEY 0xA500 /* When WTCSR byte register written, - you have to set the upper byte to - 0xA5 */ -#define SH7750_WTCSR_TME 0x80 /* Timer enable (1-upcount start) */ -#define SH7750_WTCSR_MODE 0x40 /* Timer Mode Select: */ -#define SH7750_WTCSR_MODE_WT 0x40 /* Watchdog Timer Mode */ -#define SH7750_WTCSR_MODE_IT 0x00 /* Interval Timer Mode */ -#define SH7750_WTCSR_RSTS 0x20 /* Reset Select: */ -#define SH7750_WTCSR_RST_MAN 0x20 /* Manual Reset */ -#define SH7750_WTCSR_RST_PWR 0x00 /* Power-on Reset */ -#define SH7750_WTCSR_WOVF 0x10 /* Watchdog Timer Overflow Flag */ -#define SH7750_WTCSR_IOVF 0x08 /* Interval Timer Overflow Flag */ -#define SH7750_WTCSR_CKS 0x07 /* Clock Select: */ -#define SH7750_WTCSR_CKS_DIV32 0x00 /* 1/32 of frequency divider 2 input */ -#define SH7750_WTCSR_CKS_DIV64 0x01 /* 1/64 */ -#define SH7750_WTCSR_CKS_DIV128 0x02 /* 1/128 */ -#define SH7750_WTCSR_CKS_DIV256 0x03 /* 1/256 */ -#define SH7750_WTCSR_CKS_DIV512 0x04 /* 1/512 */ -#define SH7750_WTCSR_CKS_DIV1024 0x05 /* 1/1024 */ -#define SH7750_WTCSR_CKS_DIV2048 0x06 /* 1/2048 */ -#define SH7750_WTCSR_CKS_DIV4096 0x07 /* 1/4096 */ +#define SH7750_WTCSR_KEY 0xA500 /* When WTCSR byte register written, you */ + /* have to set the upper byte to 0xA5 */ +#define SH7750_WTCSR_TME 0x80 /* Timer enable (1-upcount start) */ +#define SH7750_WTCSR_MODE 0x40 /* Timer Mode Select: */ +#define SH7750_WTCSR_MODE_WT 0x40 /* Watchdog Timer Mode */ +#define SH7750_WTCSR_MODE_IT 0x00 /* Interval Timer Mode */ +#define SH7750_WTCSR_RSTS 0x20 /* Reset Select: */ +#define SH7750_WTCSR_RST_MAN 0x20 /* Manual Reset */ +#define SH7750_WTCSR_RST_PWR 0x00 /* Power-on Reset */ +#define SH7750_WTCSR_WOVF 0x10 /* Watchdog Timer Overflow Flag */ +#define SH7750_WTCSR_IOVF 0x08 /* Interval Timer Overflow Flag */ +#define SH7750_WTCSR_CKS 0x07 /* Clock Select: */ +#define SH7750_WTCSR_CKS_DIV32 0x00 /* 1/32 of frequency divider 2 input */ +#define SH7750_WTCSR_CKS_DIV64 0x01 /* 1/64 */ +#define SH7750_WTCSR_CKS_DIV128 0x02 /* 1/128 */ +#define SH7750_WTCSR_CKS_DIV256 0x03 /* 1/256 */ +#define SH7750_WTCSR_CKS_DIV512 0x04 /* 1/512 */ +#define SH7750_WTCSR_CKS_DIV1024 0x05 /* 1/1024 */ +#define SH7750_WTCSR_CKS_DIV2048 0x06 /* 1/2048 */ +#define SH7750_WTCSR_CKS_DIV4096 0x07 /* 1/4096 */ /* * Real-Time Clock (RTC) */ /* 64-Hz Counter Register (byte, read-only) - R64CNT */ -#define SH7750_R64CNT_REGOFS 0xC80000 /* offset */ +#define SH7750_R64CNT_REGOFS 0xC80000 /* offset */ #define SH7750_R64CNT SH7750_P4_REG32(SH7750_R64CNT_REGOFS) #define SH7750_R64CNT_A7 SH7750_A7_REG32(SH7750_R64CNT_REGOFS) /* Second Counter Register (byte, BCD-coded) - RSECCNT */ -#define SH7750_RSECCNT_REGOFS 0xC80004 /* offset */ +#define SH7750_RSECCNT_REGOFS 0xC80004 /* offset */ #define SH7750_RSECCNT SH7750_P4_REG32(SH7750_RSECCNT_REGOFS) #define SH7750_RSECCNT_A7 SH7750_A7_REG32(SH7750_RSECCNT_REGOFS) /* Minute Counter Register (byte, BCD-coded) - RMINCNT */ -#define SH7750_RMINCNT_REGOFS 0xC80008 /* offset */ +#define SH7750_RMINCNT_REGOFS 0xC80008 /* offset */ #define SH7750_RMINCNT SH7750_P4_REG32(SH7750_RMINCNT_REGOFS) #define SH7750_RMINCNT_A7 SH7750_A7_REG32(SH7750_RMINCNT_REGOFS) /* Hour Counter Register (byte, BCD-coded) - RHRCNT */ -#define SH7750_RHRCNT_REGOFS 0xC8000C /* offset */ +#define SH7750_RHRCNT_REGOFS 0xC8000C /* offset */ #define SH7750_RHRCNT SH7750_P4_REG32(SH7750_RHRCNT_REGOFS) #define SH7750_RHRCNT_A7 SH7750_A7_REG32(SH7750_RHRCNT_REGOFS) /* Day-of-Week Counter Register (byte) - RWKCNT */ -#define SH7750_RWKCNT_REGOFS 0xC80010 /* offset */ +#define SH7750_RWKCNT_REGOFS 0xC80010 /* offset */ #define SH7750_RWKCNT SH7750_P4_REG32(SH7750_RWKCNT_REGOFS) #define SH7750_RWKCNT_A7 SH7750_A7_REG32(SH7750_RWKCNT_REGOFS) -#define SH7750_RWKCNT_SUN 0 /* Sunday */ -#define SH7750_RWKCNT_MON 1 /* Monday */ -#define SH7750_RWKCNT_TUE 2 /* Tuesday */ -#define SH7750_RWKCNT_WED 3 /* Wednesday */ -#define SH7750_RWKCNT_THU 4 /* Thursday */ -#define SH7750_RWKCNT_FRI 5 /* Friday */ -#define SH7750_RWKCNT_SAT 6 /* Saturday */ +#define SH7750_RWKCNT_SUN 0 /* Sunday */ +#define SH7750_RWKCNT_MON 1 /* Monday */ +#define SH7750_RWKCNT_TUE 2 /* Tuesday */ +#define SH7750_RWKCNT_WED 3 /* Wednesday */ +#define SH7750_RWKCNT_THU 4 /* Thursday */ +#define SH7750_RWKCNT_FRI 5 /* Friday */ +#define SH7750_RWKCNT_SAT 6 /* Saturday */ /* Day Counter Register (byte, BCD-coded) - RDAYCNT */ -#define SH7750_RDAYCNT_REGOFS 0xC80014 /* offset */ +#define SH7750_RDAYCNT_REGOFS 0xC80014 /* offset */ #define SH7750_RDAYCNT SH7750_P4_REG32(SH7750_RDAYCNT_REGOFS) #define SH7750_RDAYCNT_A7 SH7750_A7_REG32(SH7750_RDAYCNT_REGOFS) /* Month Counter Register (byte, BCD-coded) - RMONCNT */ -#define SH7750_RMONCNT_REGOFS 0xC80018 /* offset */ +#define SH7750_RMONCNT_REGOFS 0xC80018 /* offset */ #define SH7750_RMONCNT SH7750_P4_REG32(SH7750_RMONCNT_REGOFS) #define SH7750_RMONCNT_A7 SH7750_A7_REG32(SH7750_RMONCNT_REGOFS) /* Year Counter Register (half, BCD-coded) - RYRCNT */ -#define SH7750_RYRCNT_REGOFS 0xC8001C /* offset */ +#define SH7750_RYRCNT_REGOFS 0xC8001C /* offset */ #define SH7750_RYRCNT SH7750_P4_REG32(SH7750_RYRCNT_REGOFS) #define SH7750_RYRCNT_A7 SH7750_A7_REG32(SH7750_RYRCNT_REGOFS) /* Second Alarm Register (byte, BCD-coded) - RSECAR */ -#define SH7750_RSECAR_REGOFS 0xC80020 /* offset */ +#define SH7750_RSECAR_REGOFS 0xC80020 /* offset */ #define SH7750_RSECAR SH7750_P4_REG32(SH7750_RSECAR_REGOFS) #define SH7750_RSECAR_A7 SH7750_A7_REG32(SH7750_RSECAR_REGOFS) -#define SH7750_RSECAR_ENB 0x80 /* Second Alarm Enable */ +#define SH7750_RSECAR_ENB 0x80 /* Second Alarm Enable */ /* Minute Alarm Register (byte, BCD-coded) - RMINAR */ -#define SH7750_RMINAR_REGOFS 0xC80024 /* offset */ +#define SH7750_RMINAR_REGOFS 0xC80024 /* offset */ #define SH7750_RMINAR SH7750_P4_REG32(SH7750_RMINAR_REGOFS) #define SH7750_RMINAR_A7 SH7750_A7_REG32(SH7750_RMINAR_REGOFS) -#define SH7750_RMINAR_ENB 0x80 /* Minute Alarm Enable */ +#define SH7750_RMINAR_ENB 0x80 /* Minute Alarm Enable */ /* Hour Alarm Register (byte, BCD-coded) - RHRAR */ -#define SH7750_RHRAR_REGOFS 0xC80028 /* offset */ +#define SH7750_RHRAR_REGOFS 0xC80028 /* offset */ #define SH7750_RHRAR SH7750_P4_REG32(SH7750_RHRAR_REGOFS) #define SH7750_RHRAR_A7 SH7750_A7_REG32(SH7750_RHRAR_REGOFS) -#define SH7750_RHRAR_ENB 0x80 /* Hour Alarm Enable */ +#define SH7750_RHRAR_ENB 0x80 /* Hour Alarm Enable */ /* Day-of-Week Alarm Register (byte) - RWKAR */ -#define SH7750_RWKAR_REGOFS 0xC8002C /* offset */ +#define SH7750_RWKAR_REGOFS 0xC8002C /* offset */ #define SH7750_RWKAR SH7750_P4_REG32(SH7750_RWKAR_REGOFS) #define SH7750_RWKAR_A7 SH7750_A7_REG32(SH7750_RWKAR_REGOFS) -#define SH7750_RWKAR_ENB 0x80 /* Day-of-week Alarm Enable */ +#define SH7750_RWKAR_ENB 0x80 /* Day-of-week Alarm Enable */ -#define SH7750_RWKAR_SUN 0 /* Sunday */ -#define SH7750_RWKAR_MON 1 /* Monday */ -#define SH7750_RWKAR_TUE 2 /* Tuesday */ -#define SH7750_RWKAR_WED 3 /* Wednesday */ -#define SH7750_RWKAR_THU 4 /* Thursday */ -#define SH7750_RWKAR_FRI 5 /* Friday */ -#define SH7750_RWKAR_SAT 6 /* Saturday */ +#define SH7750_RWKAR_SUN 0 /* Sunday */ +#define SH7750_RWKAR_MON 1 /* Monday */ +#define SH7750_RWKAR_TUE 2 /* Tuesday */ +#define SH7750_RWKAR_WED 3 /* Wednesday */ +#define SH7750_RWKAR_THU 4 /* Thursday */ +#define SH7750_RWKAR_FRI 5 /* Friday */ +#define SH7750_RWKAR_SAT 6 /* Saturday */ /* Day Alarm Register (byte, BCD-coded) - RDAYAR */ -#define SH7750_RDAYAR_REGOFS 0xC80030 /* offset */ +#define SH7750_RDAYAR_REGOFS 0xC80030 /* offset */ #define SH7750_RDAYAR SH7750_P4_REG32(SH7750_RDAYAR_REGOFS) #define SH7750_RDAYAR_A7 SH7750_A7_REG32(SH7750_RDAYAR_REGOFS) -#define SH7750_RDAYAR_ENB 0x80 /* Day Alarm Enable */ +#define SH7750_RDAYAR_ENB 0x80 /* Day Alarm Enable */ /* Month Counter Register (byte, BCD-coded) - RMONAR */ -#define SH7750_RMONAR_REGOFS 0xC80034 /* offset */ +#define SH7750_RMONAR_REGOFS 0xC80034 /* offset */ #define SH7750_RMONAR SH7750_P4_REG32(SH7750_RMONAR_REGOFS) #define SH7750_RMONAR_A7 SH7750_A7_REG32(SH7750_RMONAR_REGOFS) -#define SH7750_RMONAR_ENB 0x80 /* Month Alarm Enable */ +#define SH7750_RMONAR_ENB 0x80 /* Month Alarm Enable */ /* RTC Control Register 1 (byte) - RCR1 */ -#define SH7750_RCR1_REGOFS 0xC80038 /* offset */ +#define SH7750_RCR1_REGOFS 0xC80038 /* offset */ #define SH7750_RCR1 SH7750_P4_REG32(SH7750_RCR1_REGOFS) #define SH7750_RCR1_A7 SH7750_A7_REG32(SH7750_RCR1_REGOFS) -#define SH7750_RCR1_CF 0x80 /* Carry Flag */ -#define SH7750_RCR1_CIE 0x10 /* Carry Interrupt Enable */ -#define SH7750_RCR1_AIE 0x08 /* Alarm Interrupt Enable */ -#define SH7750_RCR1_AF 0x01 /* Alarm Flag */ +#define SH7750_RCR1_CF 0x80 /* Carry Flag */ +#define SH7750_RCR1_CIE 0x10 /* Carry Interrupt Enable */ +#define SH7750_RCR1_AIE 0x08 /* Alarm Interrupt Enable */ +#define SH7750_RCR1_AF 0x01 /* Alarm Flag */ /* RTC Control Register 2 (byte) - RCR2 */ -#define SH7750_RCR2_REGOFS 0xC8003C /* offset */ +#define SH7750_RCR2_REGOFS 0xC8003C /* offset */ #define SH7750_RCR2 SH7750_P4_REG32(SH7750_RCR2_REGOFS) #define SH7750_RCR2_A7 SH7750_A7_REG32(SH7750_RCR2_REGOFS) -#define SH7750_RCR2_PEF 0x80 /* Periodic Interrupt Flag */ -#define SH7750_RCR2_PES 0x70 /* Periodic Interrupt Enable: */ -#define SH7750_RCR2_PES_DIS 0x00 /* Periodic Interrupt Disabled */ -#define SH7750_RCR2_PES_DIV256 0x10 /* Generated at 1/256 sec interval */ -#define SH7750_RCR2_PES_DIV64 0x20 /* Generated at 1/64 sec interval */ -#define SH7750_RCR2_PES_DIV16 0x30 /* Generated at 1/16 sec interval */ -#define SH7750_RCR2_PES_DIV4 0x40 /* Generated at 1/4 sec interval */ -#define SH7750_RCR2_PES_DIV2 0x50 /* Generated at 1/2 sec interval */ -#define SH7750_RCR2_PES_x1 0x60 /* Generated at 1 sec interval */ -#define SH7750_RCR2_PES_x2 0x70 /* Generated at 2 sec interval */ -#define SH7750_RCR2_RTCEN 0x08 /* RTC Crystal Oscillator is Operated */ -#define SH7750_RCR2_ADJ 0x04 /* 30-Second Adjastment */ -#define SH7750_RCR2_RESET 0x02 /* Frequency divider circuits are reset */ -#define SH7750_RCR2_START 0x01 /* 0 - sec, min, hr, day-of-week, month, - year counters are stopped - 1 - sec, min, hr, day-of-week, month, - year counters operate normally */ +#define SH7750_RCR2_PEF 0x80 /* Periodic Interrupt Flag */ +#define SH7750_RCR2_PES 0x70 /* Periodic Interrupt Enable: */ +#define SH7750_RCR2_PES_DIS 0x00 /* Periodic Interrupt Disabled */ +#define SH7750_RCR2_PES_DIV256 0x10 /* Generated at 1/256 sec interval */ +#define SH7750_RCR2_PES_DIV64 0x20 /* Generated at 1/64 sec interval */ +#define SH7750_RCR2_PES_DIV16 0x30 /* Generated at 1/16 sec interval */ +#define SH7750_RCR2_PES_DIV4 0x40 /* Generated at 1/4 sec interval */ +#define SH7750_RCR2_PES_DIV2 0x50 /* Generated at 1/2 sec interval */ +#define SH7750_RCR2_PES_x1 0x60 /* Generated at 1 sec interval */ +#define SH7750_RCR2_PES_x2 0x70 /* Generated at 2 sec interval */ +#define SH7750_RCR2_RTCEN 0x08 /* RTC Crystal Oscillator is Operated */ +#define SH7750_RCR2_ADJ 0x04 /* 30-Second Adjastment */ +#define SH7750_RCR2_RESET 0x02 /* Frequency divider circuits are reset */ +#define SH7750_RCR2_START 0x01 /* 0 - sec, min, hr, day-of-week, month, */ + /* year counters are stopped */ + /* 1 - sec, min, hr, day-of-week, month, */ + /* year counters operate normally */ /* * Bus State Controller - BSC */ /* Bus Control Register 1 - BCR1 */ -#define SH7750_BCR1_REGOFS 0x800000 /* offset */ +#define SH7750_BCR1_REGOFS 0x800000 /* offset */ #define SH7750_BCR1 SH7750_P4_REG32(SH7750_BCR1_REGOFS) #define SH7750_BCR1_A7 SH7750_A7_REG32(SH7750_BCR1_REGOFS) -#define SH7750_BCR1_ENDIAN 0x80000000 /* Endianness (1 - little endian) */ -#define SH7750_BCR1_MASTER 0x40000000 /* Master/Slave mode (1-master) */ -#define SH7750_BCR1_A0MPX 0x20000000 /* Area 0 Memory Type (0-SRAM,1-MPX) */ -#define SH7750_BCR1_IPUP 0x02000000 /* Input Pin Pull-up Control: - 0 - pull-up resistor is on for - control input pins - 1 - pull-up resistor is off */ -#define SH7750_BCR1_OPUP 0x01000000 /* Output Pin Pull-up Control: - 0 - pull-up resistor is on for - control output pins - 1 - pull-up resistor is off */ -#define SH7750_BCR1_A1MBC 0x00200000 /* Area 1 SRAM Byte Control Mode: - 0 - Area 1 SRAM is set to - normal mode - 1 - Area 1 SRAM is set to byte - control mode */ -#define SH7750_BCR1_A4MBC 0x00100000 /* Area 4 SRAM Byte Control Mode: - 0 - Area 4 SRAM is set to - normal mode - 1 - Area 4 SRAM is set to byte - control mode */ -#define SH7750_BCR1_BREQEN 0x00080000 /* BREQ Enable: - 0 - External requests are not - accepted - 1 - External requests are - accepted */ -#define SH7750_BCR1_PSHR 0x00040000 /* Partial Sharing Bit: - 0 - Master Mode - 1 - Partial-sharing Mode */ -#define SH7750_BCR1_MEMMPX 0x00020000 /* Area 1 to 6 MPX Interface: - 0 - SRAM/burst ROM interface - 1 - MPX interface */ -#define SH7750_BCR1_HIZMEM 0x00008000 /* High Impendance Control. Specifies - the state of A[25:0], BS\, CSn\, - RD/WR\, CE2A\, CE2B\ in standby - mode and when bus is released: - 0 - signals go to High-Z mode - 1 - signals driven */ -#define SH7750_BCR1_HIZCNT 0x00004000 /* High Impendance Control. Specifies - the state of the RAS\, RAS2\, WEn\, - CASn\, DQMn, RD\, CASS\, FRAME\, - RD2\ signals in standby mode and - when bus is released: - 0 - signals go to High-Z mode - 1 - signals driven */ -#define SH7750_BCR1_A0BST 0x00003800 /* Area 0 Burst ROM Control */ -#define SH7750_BCR1_A0BST_SRAM 0x0000 /* Area 0 accessed as SRAM i/f */ -#define SH7750_BCR1_A0BST_ROM4 0x0800 /* Area 0 accessed as burst ROM - interface, 4 cosequtive access */ -#define SH7750_BCR1_A0BST_ROM8 0x1000 /* Area 0 accessed as burst ROM - interface, 8 cosequtive access */ -#define SH7750_BCR1_A0BST_ROM16 0x1800 /* Area 0 accessed as burst ROM - interface, 16 cosequtive access */ -#define SH7750_BCR1_A0BST_ROM32 0x2000 /* Area 0 accessed as burst ROM - interface, 32 cosequtive access */ - -#define SH7750_BCR1_A5BST 0x00000700 /* Area 5 Burst ROM Control */ -#define SH7750_BCR1_A5BST_SRAM 0x0000 /* Area 5 accessed as SRAM i/f */ -#define SH7750_BCR1_A5BST_ROM4 0x0100 /* Area 5 accessed as burst ROM - interface, 4 cosequtive access */ -#define SH7750_BCR1_A5BST_ROM8 0x0200 /* Area 5 accessed as burst ROM - interface, 8 cosequtive access */ -#define SH7750_BCR1_A5BST_ROM16 0x0300 /* Area 5 accessed as burst ROM - interface, 16 cosequtive access */ -#define SH7750_BCR1_A5BST_ROM32 0x0400 /* Area 5 accessed as burst ROM - interface, 32 cosequtive access */ - -#define SH7750_BCR1_A6BST 0x000000E0 /* Area 6 Burst ROM Control */ -#define SH7750_BCR1_A6BST_SRAM 0x0000 /* Area 6 accessed as SRAM i/f */ -#define SH7750_BCR1_A6BST_ROM4 0x0020 /* Area 6 accessed as burst ROM - interface, 4 cosequtive access */ -#define SH7750_BCR1_A6BST_ROM8 0x0040 /* Area 6 accessed as burst ROM - interface, 8 cosequtive access */ -#define SH7750_BCR1_A6BST_ROM16 0x0060 /* Area 6 accessed as burst ROM - interface, 16 cosequtive access */ -#define SH7750_BCR1_A6BST_ROM32 0x0080 /* Area 6 accessed as burst ROM - interface, 32 cosequtive access */ - -#define SH7750_BCR1_DRAMTP 0x001C /* Area 2 and 3 Memory Type */ -#define SH7750_BCR1_DRAMTP_2SRAM_3SRAM 0x0000 /* Area 2 and 3 are SRAM or MPX - interface. */ -#define SH7750_BCR1_DRAMTP_2SRAM_3SDRAM 0x0008 /* Area 2 - SRAM/MPX, Area 3 - - synchronous DRAM */ -#define SH7750_BCR1_DRAMTP_2SDRAM_3SDRAM 0x000C /* Area 2 and 3 are synchronous - DRAM interface */ -#define SH7750_BCR1_DRAMTP_2SRAM_3DRAM 0x0010 /* Area 2 - SRAM/MPX, Area 3 - - DRAM interface */ -#define SH7750_BCR1_DRAMTP_2DRAM_3DRAM 0x0014 /* Area 2 and 3 are DRAM - interface */ - -#define SH7750_BCR1_A56PCM 0x00000001 /* Area 5 and 6 Bus Type: - 0 - SRAM interface - 1 - PCMCIA interface */ +#define SH7750_BCR1_ENDIAN 0x80000000 /* Endianness (1 - little endian) */ +#define SH7750_BCR1_MASTER 0x40000000 /* Master/Slave mode (1-master) */ +#define SH7750_BCR1_A0MPX 0x20000000 /* Area 0 Memory Type (0-SRAM,1-MPX) */ +#define SH7750_BCR1_IPUP 0x02000000 /* Input Pin Pull-up Control: */ + /* 0 - pull-up resistor is on for */ + /* control input pins */ + /* 1 - pull-up resistor is off */ +#define SH7750_BCR1_OPUP 0x01000000 /* Output Pin Pull-up Control: */ + /* 0 - pull-up resistor is on for */ + /* control output pins */ + /* 1 - pull-up resistor is off */ +#define SH7750_BCR1_A1MBC 0x00200000 /* Area 1 SRAM Byte Control Mode: */ + /* 0 - Area 1 SRAM is set to */ + /* normal mode */ + /* 1 - Area 1 SRAM is set to byte */ + /* control mode */ +#define SH7750_BCR1_A4MBC 0x00100000 /* Area 4 SRAM Byte Control Mode: */ + /* 0 - Area 4 SRAM is set to */ + /* normal mode */ + /* 1 - Area 4 SRAM is set to byte */ + /* control mode */ +#define SH7750_BCR1_BREQEN 0x00080000 /* BREQ Enable: */ + /* 0 - External requests are not */ + /* accepted */ + /* 1 - External requests are */ + /* accepted */ +#define SH7750_BCR1_PSHR 0x00040000 /* Partial Sharing Bit: */ + /* 0 - Master Mode */ + /* 1 - Partial-sharing Mode */ +#define SH7750_BCR1_MEMMPX 0x00020000 /* Area 1 to 6 MPX Interface: */ + /* 0 - SRAM/burst ROM interface */ + /* 1 - MPX interface */ +#define SH7750_BCR1_HIZMEM 0x00008000 /* High Impendance Control. */ + /* Specifies the state of A[25:0], */ + /* BS\, CSn\, RD/WR\, CE2A\, CE2B\ */ + /* in standby mode and when bus is */ + /* released: */ + /* 0 - signals go to High-Z mode */ + /* 1 - signals driven */ +#define SH7750_BCR1_HIZCNT 0x00004000 /* High Impendance Control. */ + /* Specifies the state of the */ + /* RAS\, RAS2\, WEn\, CASn\, DQMn, */ + /* RD\, CASS\, FRAME\, RD2\ */ + /* signals in standby mode and */ + /* when bus is released: */ + /* 0 - signals go to High-Z mode */ + /* 1 - signals driven */ +#define SH7750_BCR1_A0BST 0x00003800 /* Area 0 Burst ROM Control */ +#define SH7750_BCR1_A0BST_SRAM 0x0000 /* Area 0 accessed as SRAM i/f */ +#define SH7750_BCR1_A0BST_ROM4 0x0800 /* Area 0 accessed as burst ROM */ + /* interface, 4 cosequtive access */ +#define SH7750_BCR1_A0BST_ROM8 0x1000 /* Area 0 accessed as burst ROM */ + /* interface, 8 cosequtive access */ +#define SH7750_BCR1_A0BST_ROM16 0x1800 /* Area 0 accessed as burst ROM */ + /* interface, 16 cosequtive access */ +#define SH7750_BCR1_A0BST_ROM32 0x2000 /* Area 0 accessed as burst ROM */ + /* interface, 32 cosequtive access */ + +#define SH7750_BCR1_A5BST 0x00000700 /* Area 5 Burst ROM Control */ +#define SH7750_BCR1_A5BST_SRAM 0x0000 /* Area 5 accessed as SRAM i/f */ +#define SH7750_BCR1_A5BST_ROM4 0x0100 /* Area 5 accessed as burst ROM */ + /* interface, 4 cosequtive access */ +#define SH7750_BCR1_A5BST_ROM8 0x0200 /* Area 5 accessed as burst ROM */ + /* interface, 8 cosequtive access */ +#define SH7750_BCR1_A5BST_ROM16 0x0300 /* Area 5 accessed as burst ROM */ + /* interface, 16 cosequtive access */ +#define SH7750_BCR1_A5BST_ROM32 0x0400 /* Area 5 accessed as burst ROM */ + /* interface, 32 cosequtive access */ + +#define SH7750_BCR1_A6BST 0x000000E0 /* Area 6 Burst ROM Control */ +#define SH7750_BCR1_A6BST_SRAM 0x0000 /* Area 6 accessed as SRAM i/f */ +#define SH7750_BCR1_A6BST_ROM4 0x0020 /* Area 6 accessed as burst ROM */ + /* interface, 4 cosequtive access */ +#define SH7750_BCR1_A6BST_ROM8 0x0040 /* Area 6 accessed as burst ROM */ + /* interface, 8 cosequtive access */ +#define SH7750_BCR1_A6BST_ROM16 0x0060 /* Area 6 accessed as burst ROM */ + /* interface, 16 cosequtive access */ +#define SH7750_BCR1_A6BST_ROM32 0x0080 /* Area 6 accessed as burst ROM */ + /* interface, 32 cosequtive access */ + +#define SH7750_BCR1_DRAMTP 0x001C /* Area 2 and 3 Memory Type */ +#define SH7750_BCR1_DRAMTP_2SRAM_3SRAM 0x0000 /* Area 2 and 3 are SRAM or */ + /* MPX interface. */ +#define SH7750_BCR1_DRAMTP_2SRAM_3SDRAM 0x0008 /* Area 2 - SRAM/MPX, Area 3 */ + /* synchronous DRAM */ +#define SH7750_BCR1_DRAMTP_2SDRAM_3SDRAM 0x000C /* Area 2 and 3 are */ + /* synchronous DRAM interface */ +#define SH7750_BCR1_DRAMTP_2SRAM_3DRAM 0x0010 /* Area 2 - SRAM/MPX, Area 3 */ + /* DRAM interface */ +#define SH7750_BCR1_DRAMTP_2DRAM_3DRAM 0x0014 /* Area 2 and 3 are DRAM */ + /* interface */ + +#define SH7750_BCR1_A56PCM 0x00000001 /* Area 5 and 6 Bus Type: */ + /* 0 - SRAM interface */ + /* 1 - PCMCIA interface */ /* Bus Control Register 2 (half) - BCR2 */ -#define SH7750_BCR2_REGOFS 0x800004 /* offset */ +#define SH7750_BCR2_REGOFS 0x800004 /* offset */ #define SH7750_BCR2 SH7750_P4_REG32(SH7750_BCR2_REGOFS) #define SH7750_BCR2_A7 SH7750_A7_REG32(SH7750_BCR2_REGOFS) -#define SH7750_BCR2_A0SZ 0xC000 /* Area 0 Bus Width */ +#define SH7750_BCR2_A0SZ 0xC000 /* Area 0 Bus Width */ #define SH7750_BCR2_A0SZ_S 14 -#define SH7750_BCR2_A6SZ 0x3000 /* Area 6 Bus Width */ +#define SH7750_BCR2_A6SZ 0x3000 /* Area 6 Bus Width */ #define SH7750_BCR2_A6SZ_S 12 -#define SH7750_BCR2_A5SZ 0x0C00 /* Area 5 Bus Width */ +#define SH7750_BCR2_A5SZ 0x0C00 /* Area 5 Bus Width */ #define SH7750_BCR2_A5SZ_S 10 -#define SH7750_BCR2_A4SZ 0x0300 /* Area 4 Bus Width */ +#define SH7750_BCR2_A4SZ 0x0300 /* Area 4 Bus Width */ #define SH7750_BCR2_A4SZ_S 8 -#define SH7750_BCR2_A3SZ 0x00C0 /* Area 3 Bus Width */ +#define SH7750_BCR2_A3SZ 0x00C0 /* Area 3 Bus Width */ #define SH7750_BCR2_A3SZ_S 6 -#define SH7750_BCR2_A2SZ 0x0030 /* Area 2 Bus Width */ +#define SH7750_BCR2_A2SZ 0x0030 /* Area 2 Bus Width */ #define SH7750_BCR2_A2SZ_S 4 -#define SH7750_BCR2_A1SZ 0x000C /* Area 1 Bus Width */ +#define SH7750_BCR2_A1SZ 0x000C /* Area 1 Bus Width */ #define SH7750_BCR2_A1SZ_S 2 -#define SH7750_BCR2_SZ_64 0 /* 64 bits */ -#define SH7750_BCR2_SZ_8 1 /* 8 bits */ -#define SH7750_BCR2_SZ_16 2 /* 16 bits */ -#define SH7750_BCR2_SZ_32 3 /* 32 bits */ -#define SH7750_BCR2_PORTEN 0x0001 /* Port Function Enable : - 0 - D51-D32 are not used as a port - 1 - D51-D32 are used as a port */ +#define SH7750_BCR2_SZ_64 0 /* 64 bits */ +#define SH7750_BCR2_SZ_8 1 /* 8 bits */ +#define SH7750_BCR2_SZ_16 2 /* 16 bits */ +#define SH7750_BCR2_SZ_32 3 /* 32 bits */ +#define SH7750_BCR2_PORTEN 0x0001 /* Port Function Enable */ + /* 0 - D51-D32 are not used as a port */ + /* 1 - D51-D32 are used as a port */ /* Wait Control Register 1 - WCR1 */ -#define SH7750_WCR1_REGOFS 0x800008 /* offset */ +#define SH7750_WCR1_REGOFS 0x800008 /* offset */ #define SH7750_WCR1 SH7750_P4_REG32(SH7750_WCR1_REGOFS) #define SH7750_WCR1_A7 SH7750_A7_REG32(SH7750_WCR1_REGOFS) -#define SH7750_WCR1_DMAIW 0x70000000 /* DACK Device Inter-Cycle Idle - specification */ +#define SH7750_WCR1_DMAIW 0x70000000 /* DACK Device Inter-Cycle Idle */ + /* specification */ #define SH7750_WCR1_DMAIW_S 28 -#define SH7750_WCR1_A6IW 0x07000000 /* Area 6 Inter-Cycle Idle spec. */ +#define SH7750_WCR1_A6IW 0x07000000 /* Area 6 Inter-Cycle Idle spec. */ #define SH7750_WCR1_A6IW_S 24 -#define SH7750_WCR1_A5IW 0x00700000 /* Area 5 Inter-Cycle Idle spec. */ +#define SH7750_WCR1_A5IW 0x00700000 /* Area 5 Inter-Cycle Idle spec. */ #define SH7750_WCR1_A5IW_S 20 -#define SH7750_WCR1_A4IW 0x00070000 /* Area 4 Inter-Cycle Idle spec. */ +#define SH7750_WCR1_A4IW 0x00070000 /* Area 4 Inter-Cycle Idle spec. */ #define SH7750_WCR1_A4IW_S 16 -#define SH7750_WCR1_A3IW 0x00007000 /* Area 3 Inter-Cycle Idle spec. */ +#define SH7750_WCR1_A3IW 0x00007000 /* Area 3 Inter-Cycle Idle spec. */ #define SH7750_WCR1_A3IW_S 12 -#define SH7750_WCR1_A2IW 0x00000700 /* Area 2 Inter-Cycle Idle spec. */ +#define SH7750_WCR1_A2IW 0x00000700 /* Area 2 Inter-Cycle Idle spec. */ #define SH7750_WCR1_A2IW_S 8 -#define SH7750_WCR1_A1IW 0x00000070 /* Area 1 Inter-Cycle Idle spec. */ +#define SH7750_WCR1_A1IW 0x00000070 /* Area 1 Inter-Cycle Idle spec. */ #define SH7750_WCR1_A1IW_S 4 -#define SH7750_WCR1_A0IW 0x00000007 /* Area 0 Inter-Cycle Idle spec. */ +#define SH7750_WCR1_A0IW 0x00000007 /* Area 0 Inter-Cycle Idle spec. */ #define SH7750_WCR1_A0IW_S 0 /* Wait Control Register 2 - WCR2 */ -#define SH7750_WCR2_REGOFS 0x80000C /* offset */ +#define SH7750_WCR2_REGOFS 0x80000C /* offset */ #define SH7750_WCR2 SH7750_P4_REG32(SH7750_WCR2_REGOFS) #define SH7750_WCR2_A7 SH7750_A7_REG32(SH7750_WCR2_REGOFS) -#define SH7750_WCR2_A6W 0xE0000000 /* Area 6 Wait Control */ +#define SH7750_WCR2_A6W 0xE0000000 /* Area 6 Wait Control */ #define SH7750_WCR2_A6W_S 29 -#define SH7750_WCR2_A6B 0x1C000000 /* Area 6 Burst Pitch */ +#define SH7750_WCR2_A6B 0x1C000000 /* Area 6 Burst Pitch */ #define SH7750_WCR2_A6B_S 26 -#define SH7750_WCR2_A5W 0x03800000 /* Area 5 Wait Control */ +#define SH7750_WCR2_A5W 0x03800000 /* Area 5 Wait Control */ #define SH7750_WCR2_A5W_S 23 -#define SH7750_WCR2_A5B 0x00700000 /* Area 5 Burst Pitch */ +#define SH7750_WCR2_A5B 0x00700000 /* Area 5 Burst Pitch */ #define SH7750_WCR2_A5B_S 20 -#define SH7750_WCR2_A4W 0x000E0000 /* Area 4 Wait Control */ +#define SH7750_WCR2_A4W 0x000E0000 /* Area 4 Wait Control */ #define SH7750_WCR2_A4W_S 17 -#define SH7750_WCR2_A3W 0x0000E000 /* Area 3 Wait Control */ +#define SH7750_WCR2_A3W 0x0000E000 /* Area 3 Wait Control */ #define SH7750_WCR2_A3W_S 13 -#define SH7750_WCR2_A2W 0x00000E00 /* Area 2 Wait Control */ +#define SH7750_WCR2_A2W 0x00000E00 /* Area 2 Wait Control */ #define SH7750_WCR2_A2W_S 9 -#define SH7750_WCR2_A1W 0x000001C0 /* Area 1 Wait Control */ +#define SH7750_WCR2_A1W 0x000001C0 /* Area 1 Wait Control */ #define SH7750_WCR2_A1W_S 6 -#define SH7750_WCR2_A0W 0x00000038 /* Area 0 Wait Control */ +#define SH7750_WCR2_A0W 0x00000038 /* Area 0 Wait Control */ #define SH7750_WCR2_A0W_S 3 -#define SH7750_WCR2_A0B 0x00000007 /* Area 0 Burst Pitch */ +#define SH7750_WCR2_A0B 0x00000007 /* Area 0 Burst Pitch */ #define SH7750_WCR2_A0B_S 0 -#define SH7750_WCR2_WS0 0 /* 0 wait states inserted */ -#define SH7750_WCR2_WS1 1 /* 1 wait states inserted */ -#define SH7750_WCR2_WS2 2 /* 2 wait states inserted */ -#define SH7750_WCR2_WS3 3 /* 3 wait states inserted */ -#define SH7750_WCR2_WS6 4 /* 6 wait states inserted */ -#define SH7750_WCR2_WS9 5 /* 9 wait states inserted */ -#define SH7750_WCR2_WS12 6 /* 12 wait states inserted */ -#define SH7750_WCR2_WS15 7 /* 15 wait states inserted */ - -#define SH7750_WCR2_BPWS0 0 /* 0 wait states inserted from 2nd access */ -#define SH7750_WCR2_BPWS1 1 /* 1 wait states inserted from 2nd access */ -#define SH7750_WCR2_BPWS2 2 /* 2 wait states inserted from 2nd access */ -#define SH7750_WCR2_BPWS3 3 /* 3 wait states inserted from 2nd access */ -#define SH7750_WCR2_BPWS4 4 /* 4 wait states inserted from 2nd access */ -#define SH7750_WCR2_BPWS5 5 /* 5 wait states inserted from 2nd access */ -#define SH7750_WCR2_BPWS6 6 /* 6 wait states inserted from 2nd access */ -#define SH7750_WCR2_BPWS7 7 /* 7 wait states inserted from 2nd access */ +#define SH7750_WCR2_WS0 0 /* 0 wait states inserted */ +#define SH7750_WCR2_WS1 1 /* 1 wait states inserted */ +#define SH7750_WCR2_WS2 2 /* 2 wait states inserted */ +#define SH7750_WCR2_WS3 3 /* 3 wait states inserted */ +#define SH7750_WCR2_WS6 4 /* 6 wait states inserted */ +#define SH7750_WCR2_WS9 5 /* 9 wait states inserted */ +#define SH7750_WCR2_WS12 6 /* 12 wait states inserted */ +#define SH7750_WCR2_WS15 7 /* 15 wait states inserted */ + +#define SH7750_WCR2_BPWS0 0 /* 0 wait states inserted from 2nd access */ +#define SH7750_WCR2_BPWS1 1 /* 1 wait states inserted from 2nd access */ +#define SH7750_WCR2_BPWS2 2 /* 2 wait states inserted from 2nd access */ +#define SH7750_WCR2_BPWS3 3 /* 3 wait states inserted from 2nd access */ +#define SH7750_WCR2_BPWS4 4 /* 4 wait states inserted from 2nd access */ +#define SH7750_WCR2_BPWS5 5 /* 5 wait states inserted from 2nd access */ +#define SH7750_WCR2_BPWS6 6 /* 6 wait states inserted from 2nd access */ +#define SH7750_WCR2_BPWS7 7 /* 7 wait states inserted from 2nd access */ /* DRAM CAS\ Assertion Delay (area 3,2) */ -#define SH7750_WCR2_DRAM_CAS_ASW1 0 /* 1 cycle */ -#define SH7750_WCR2_DRAM_CAS_ASW2 1 /* 2 cycles */ -#define SH7750_WCR2_DRAM_CAS_ASW3 2 /* 3 cycles */ -#define SH7750_WCR2_DRAM_CAS_ASW4 3 /* 4 cycles */ -#define SH7750_WCR2_DRAM_CAS_ASW7 4 /* 7 cycles */ -#define SH7750_WCR2_DRAM_CAS_ASW10 5 /* 10 cycles */ -#define SH7750_WCR2_DRAM_CAS_ASW13 6 /* 13 cycles */ -#define SH7750_WCR2_DRAM_CAS_ASW16 7 /* 16 cycles */ +#define SH7750_WCR2_DRAM_CAS_ASW1 0 /* 1 cycle */ +#define SH7750_WCR2_DRAM_CAS_ASW2 1 /* 2 cycles */ +#define SH7750_WCR2_DRAM_CAS_ASW3 2 /* 3 cycles */ +#define SH7750_WCR2_DRAM_CAS_ASW4 3 /* 4 cycles */ +#define SH7750_WCR2_DRAM_CAS_ASW7 4 /* 7 cycles */ +#define SH7750_WCR2_DRAM_CAS_ASW10 5 /* 10 cycles */ +#define SH7750_WCR2_DRAM_CAS_ASW13 6 /* 13 cycles */ +#define SH7750_WCR2_DRAM_CAS_ASW16 7 /* 16 cycles */ /* SDRAM CAS\ Latency Cycles */ -#define SH7750_WCR2_SDRAM_CAS_LAT1 1 /* 1 cycle */ -#define SH7750_WCR2_SDRAM_CAS_LAT2 2 /* 2 cycles */ -#define SH7750_WCR2_SDRAM_CAS_LAT3 3 /* 3 cycles */ -#define SH7750_WCR2_SDRAM_CAS_LAT4 4 /* 4 cycles */ -#define SH7750_WCR2_SDRAM_CAS_LAT5 5 /* 5 cycles */ +#define SH7750_WCR2_SDRAM_CAS_LAT1 1 /* 1 cycle */ +#define SH7750_WCR2_SDRAM_CAS_LAT2 2 /* 2 cycles */ +#define SH7750_WCR2_SDRAM_CAS_LAT3 3 /* 3 cycles */ +#define SH7750_WCR2_SDRAM_CAS_LAT4 4 /* 4 cycles */ +#define SH7750_WCR2_SDRAM_CAS_LAT5 5 /* 5 cycles */ /* Wait Control Register 3 - WCR3 */ -#define SH7750_WCR3_REGOFS 0x800010 /* offset */ +#define SH7750_WCR3_REGOFS 0x800010 /* offset */ #define SH7750_WCR3 SH7750_P4_REG32(SH7750_WCR3_REGOFS) #define SH7750_WCR3_A7 SH7750_A7_REG32(SH7750_WCR3_REGOFS) -#define SH7750_WCR3_A6S 0x04000000 /* Area 6 Write Strobe Setup time */ -#define SH7750_WCR3_A6H 0x03000000 /* Area 6 Data Hold Time */ +#define SH7750_WCR3_A6S 0x04000000 /* Area 6 Write Strobe Setup time */ +#define SH7750_WCR3_A6H 0x03000000 /* Area 6 Data Hold Time */ #define SH7750_WCR3_A6H_S 24 -#define SH7750_WCR3_A5S 0x00400000 /* Area 5 Write Strobe Setup time */ -#define SH7750_WCR3_A5H 0x00300000 /* Area 5 Data Hold Time */ +#define SH7750_WCR3_A5S 0x00400000 /* Area 5 Write Strobe Setup time */ +#define SH7750_WCR3_A5H 0x00300000 /* Area 5 Data Hold Time */ #define SH7750_WCR3_A5H_S 20 -#define SH7750_WCR3_A4S 0x00040000 /* Area 4 Write Strobe Setup time */ -#define SH7750_WCR3_A4H 0x00030000 /* Area 4 Data Hold Time */ +#define SH7750_WCR3_A4S 0x00040000 /* Area 4 Write Strobe Setup time */ +#define SH7750_WCR3_A4H 0x00030000 /* Area 4 Data Hold Time */ #define SH7750_WCR3_A4H_S 16 -#define SH7750_WCR3_A3S 0x00004000 /* Area 3 Write Strobe Setup time */ -#define SH7750_WCR3_A3H 0x00003000 /* Area 3 Data Hold Time */ +#define SH7750_WCR3_A3S 0x00004000 /* Area 3 Write Strobe Setup time */ +#define SH7750_WCR3_A3H 0x00003000 /* Area 3 Data Hold Time */ #define SH7750_WCR3_A3H_S 12 -#define SH7750_WCR3_A2S 0x00000400 /* Area 2 Write Strobe Setup time */ -#define SH7750_WCR3_A2H 0x00000300 /* Area 2 Data Hold Time */ +#define SH7750_WCR3_A2S 0x00000400 /* Area 2 Write Strobe Setup time */ +#define SH7750_WCR3_A2H 0x00000300 /* Area 2 Data Hold Time */ #define SH7750_WCR3_A2H_S 8 -#define SH7750_WCR3_A1S 0x00000040 /* Area 1 Write Strobe Setup time */ -#define SH7750_WCR3_A1H 0x00000030 /* Area 1 Data Hold Time */ +#define SH7750_WCR3_A1S 0x00000040 /* Area 1 Write Strobe Setup time */ +#define SH7750_WCR3_A1H 0x00000030 /* Area 1 Data Hold Time */ #define SH7750_WCR3_A1H_S 4 -#define SH7750_WCR3_A0S 0x00000004 /* Area 0 Write Strobe Setup time */ -#define SH7750_WCR3_A0H 0x00000003 /* Area 0 Data Hold Time */ +#define SH7750_WCR3_A0S 0x00000004 /* Area 0 Write Strobe Setup time */ +#define SH7750_WCR3_A0H 0x00000003 /* Area 0 Data Hold Time */ #define SH7750_WCR3_A0H_S 0 -#define SH7750_WCR3_DHWS_0 0 /* 0 wait states data hold time */ -#define SH7750_WCR3_DHWS_1 1 /* 1 wait states data hold time */ -#define SH7750_WCR3_DHWS_2 2 /* 2 wait states data hold time */ -#define SH7750_WCR3_DHWS_3 3 /* 3 wait states data hold time */ +#define SH7750_WCR3_DHWS_0 0 /* 0 wait states data hold time */ +#define SH7750_WCR3_DHWS_1 1 /* 1 wait states data hold time */ +#define SH7750_WCR3_DHWS_2 2 /* 2 wait states data hold time */ +#define SH7750_WCR3_DHWS_3 3 /* 3 wait states data hold time */ -#define SH7750_MCR_REGOFS 0x800014 /* offset */ +#define SH7750_MCR_REGOFS 0x800014 /* offset */ #define SH7750_MCR SH7750_P4_REG32(SH7750_MCR_REGOFS) #define SH7750_MCR_A7 SH7750_A7_REG32(SH7750_MCR_REGOFS) -#define SH7750_MCR_RASD 0x80000000 /* RAS Down mode */ -#define SH7750_MCR_MRSET 0x40000000 /* SDRAM Mode Register Set */ -#define SH7750_MCR_PALL 0x00000000 /* SDRAM Precharge All cmd. Mode */ -#define SH7750_MCR_TRC 0x38000000 /* RAS Precharge Time at End of - Refresh: */ -#define SH7750_MCR_TRC_0 0x00000000 /* 0 */ -#define SH7750_MCR_TRC_3 0x08000000 /* 3 */ -#define SH7750_MCR_TRC_6 0x10000000 /* 6 */ -#define SH7750_MCR_TRC_9 0x18000000 /* 9 */ -#define SH7750_MCR_TRC_12 0x20000000 /* 12 */ -#define SH7750_MCR_TRC_15 0x28000000 /* 15 */ -#define SH7750_MCR_TRC_18 0x30000000 /* 18 */ -#define SH7750_MCR_TRC_21 0x38000000 /* 21 */ - -#define SH7750_MCR_TCAS 0x00800000 /* CAS Negation Period */ -#define SH7750_MCR_TCAS_1 0x00000000 /* 1 */ -#define SH7750_MCR_TCAS_2 0x00800000 /* 2 */ - -#define SH7750_MCR_TPC 0x00380000 /* DRAM: RAS Precharge Period - SDRAM: minimum number of cycles - until the next bank active cmd - is output after precharging */ +#define SH7750_MCR_RASD 0x80000000 /* RAS Down mode */ +#define SH7750_MCR_MRSET 0x40000000 /* SDRAM Mode Register Set */ +#define SH7750_MCR_PALL 0x00000000 /* SDRAM Precharge All cmd. Mode */ +#define SH7750_MCR_TRC 0x38000000 /* RAS Precharge Time at End of */ + /* Refresh: */ +#define SH7750_MCR_TRC_0 0x00000000 /* 0 */ +#define SH7750_MCR_TRC_3 0x08000000 /* 3 */ +#define SH7750_MCR_TRC_6 0x10000000 /* 6 */ +#define SH7750_MCR_TRC_9 0x18000000 /* 9 */ +#define SH7750_MCR_TRC_12 0x20000000 /* 12 */ +#define SH7750_MCR_TRC_15 0x28000000 /* 15 */ +#define SH7750_MCR_TRC_18 0x30000000 /* 18 */ +#define SH7750_MCR_TRC_21 0x38000000 /* 21 */ + +#define SH7750_MCR_TCAS 0x00800000 /* CAS Negation Period */ +#define SH7750_MCR_TCAS_1 0x00000000 /* 1 */ +#define SH7750_MCR_TCAS_2 0x00800000 /* 2 */ + +#define SH7750_MCR_TPC 0x00380000 /* DRAM: RAS Precharge Period */ + /* SDRAM: minimum number of cycles */ + /* until the next bank active cmd */ + /* is output after precharging */ #define SH7750_MCR_TPC_S 19 -#define SH7750_MCR_TPC_SDRAM_1 0x00000000 /* 1 cycle */ -#define SH7750_MCR_TPC_SDRAM_2 0x00080000 /* 2 cycles */ -#define SH7750_MCR_TPC_SDRAM_3 0x00100000 /* 3 cycles */ -#define SH7750_MCR_TPC_SDRAM_4 0x00180000 /* 4 cycles */ -#define SH7750_MCR_TPC_SDRAM_5 0x00200000 /* 5 cycles */ -#define SH7750_MCR_TPC_SDRAM_6 0x00280000 /* 6 cycles */ -#define SH7750_MCR_TPC_SDRAM_7 0x00300000 /* 7 cycles */ -#define SH7750_MCR_TPC_SDRAM_8 0x00380000 /* 8 cycles */ - -#define SH7750_MCR_RCD 0x00030000 /* DRAM: RAS-CAS Assertion Delay time - SDRAM: bank active-read/write cmd - delay time */ -#define SH7750_MCR_RCD_DRAM_2 0x00000000 /* DRAM delay 2 clocks */ -#define SH7750_MCR_RCD_DRAM_3 0x00010000 /* DRAM delay 3 clocks */ -#define SH7750_MCR_RCD_DRAM_4 0x00020000 /* DRAM delay 4 clocks */ -#define SH7750_MCR_RCD_DRAM_5 0x00030000 /* DRAM delay 5 clocks */ -#define SH7750_MCR_RCD_SDRAM_2 0x00010000 /* DRAM delay 2 clocks */ -#define SH7750_MCR_RCD_SDRAM_3 0x00020000 /* DRAM delay 3 clocks */ -#define SH7750_MCR_RCD_SDRAM_4 0x00030000 /* DRAM delay 4 clocks */ - -#define SH7750_MCR_TRWL 0x0000E000 /* SDRAM Write Precharge Delay */ -#define SH7750_MCR_TRWL_1 0x00000000 /* 1 */ -#define SH7750_MCR_TRWL_2 0x00002000 /* 2 */ -#define SH7750_MCR_TRWL_3 0x00004000 /* 3 */ -#define SH7750_MCR_TRWL_4 0x00006000 /* 4 */ -#define SH7750_MCR_TRWL_5 0x00008000 /* 5 */ - -#define SH7750_MCR_TRAS 0x00001C00 /* DRAM: CAS-Before-RAS Refresh RAS - asserting period - SDRAM: Command interval after - synchronous DRAM refresh */ -#define SH7750_MCR_TRAS_DRAM_2 0x00000000 /* 2 */ -#define SH7750_MCR_TRAS_DRAM_3 0x00000400 /* 3 */ -#define SH7750_MCR_TRAS_DRAM_4 0x00000800 /* 4 */ -#define SH7750_MCR_TRAS_DRAM_5 0x00000C00 /* 5 */ -#define SH7750_MCR_TRAS_DRAM_6 0x00001000 /* 6 */ -#define SH7750_MCR_TRAS_DRAM_7 0x00001400 /* 7 */ -#define SH7750_MCR_TRAS_DRAM_8 0x00001800 /* 8 */ -#define SH7750_MCR_TRAS_DRAM_9 0x00001C00 /* 9 */ - -#define SH7750_MCR_TRAS_SDRAM_TRC_4 0x00000000 /* 4 + TRC */ -#define SH7750_MCR_TRAS_SDRAM_TRC_5 0x00000400 /* 5 + TRC */ -#define SH7750_MCR_TRAS_SDRAM_TRC_6 0x00000800 /* 6 + TRC */ -#define SH7750_MCR_TRAS_SDRAM_TRC_7 0x00000C00 /* 7 + TRC */ -#define SH7750_MCR_TRAS_SDRAM_TRC_8 0x00001000 /* 8 + TRC */ -#define SH7750_MCR_TRAS_SDRAM_TRC_9 0x00001400 /* 9 + TRC */ -#define SH7750_MCR_TRAS_SDRAM_TRC_10 0x00001800 /* 10 + TRC */ -#define SH7750_MCR_TRAS_SDRAM_TRC_11 0x00001C00 /* 11 + TRC */ - -#define SH7750_MCR_BE 0x00000200 /* Burst Enable */ -#define SH7750_MCR_SZ 0x00000180 /* Memory Data Size */ -#define SH7750_MCR_SZ_64 0x00000000 /* 64 bits */ -#define SH7750_MCR_SZ_16 0x00000100 /* 16 bits */ -#define SH7750_MCR_SZ_32 0x00000180 /* 32 bits */ - -#define SH7750_MCR_AMX 0x00000078 /* Address Multiplexing */ +#define SH7750_MCR_TPC_SDRAM_1 0x00000000 /* 1 cycle */ +#define SH7750_MCR_TPC_SDRAM_2 0x00080000 /* 2 cycles */ +#define SH7750_MCR_TPC_SDRAM_3 0x00100000 /* 3 cycles */ +#define SH7750_MCR_TPC_SDRAM_4 0x00180000 /* 4 cycles */ +#define SH7750_MCR_TPC_SDRAM_5 0x00200000 /* 5 cycles */ +#define SH7750_MCR_TPC_SDRAM_6 0x00280000 /* 6 cycles */ +#define SH7750_MCR_TPC_SDRAM_7 0x00300000 /* 7 cycles */ +#define SH7750_MCR_TPC_SDRAM_8 0x00380000 /* 8 cycles */ + +#define SH7750_MCR_RCD 0x00030000 /* DRAM: RAS-CAS Assertion Delay */ + /* time */ + /* SDRAM: bank active-read/write */ + /* command delay time */ +#define SH7750_MCR_RCD_DRAM_2 0x00000000 /* DRAM delay 2 clocks */ +#define SH7750_MCR_RCD_DRAM_3 0x00010000 /* DRAM delay 3 clocks */ +#define SH7750_MCR_RCD_DRAM_4 0x00020000 /* DRAM delay 4 clocks */ +#define SH7750_MCR_RCD_DRAM_5 0x00030000 /* DRAM delay 5 clocks */ +#define SH7750_MCR_RCD_SDRAM_2 0x00010000 /* DRAM delay 2 clocks */ +#define SH7750_MCR_RCD_SDRAM_3 0x00020000 /* DRAM delay 3 clocks */ +#define SH7750_MCR_RCD_SDRAM_4 0x00030000 /* DRAM delay 4 clocks */ + +#define SH7750_MCR_TRWL 0x0000E000 /* SDRAM Write Precharge Delay */ +#define SH7750_MCR_TRWL_1 0x00000000 /* 1 */ +#define SH7750_MCR_TRWL_2 0x00002000 /* 2 */ +#define SH7750_MCR_TRWL_3 0x00004000 /* 3 */ +#define SH7750_MCR_TRWL_4 0x00006000 /* 4 */ +#define SH7750_MCR_TRWL_5 0x00008000 /* 5 */ + +#define SH7750_MCR_TRAS 0x00001C00 /* DRAM: CAS-Before-RAS Refresh RAS */ + /* asserting period */ + /* SDRAM: Command interval after */ + /* synchronous DRAM refresh */ +#define SH7750_MCR_TRAS_DRAM_2 0x00000000 /* 2 */ +#define SH7750_MCR_TRAS_DRAM_3 0x00000400 /* 3 */ +#define SH7750_MCR_TRAS_DRAM_4 0x00000800 /* 4 */ +#define SH7750_MCR_TRAS_DRAM_5 0x00000C00 /* 5 */ +#define SH7750_MCR_TRAS_DRAM_6 0x00001000 /* 6 */ +#define SH7750_MCR_TRAS_DRAM_7 0x00001400 /* 7 */ +#define SH7750_MCR_TRAS_DRAM_8 0x00001800 /* 8 */ +#define SH7750_MCR_TRAS_DRAM_9 0x00001C00 /* 9 */ + +#define SH7750_MCR_TRAS_SDRAM_TRC_4 0x00000000 /* 4 + TRC */ +#define SH7750_MCR_TRAS_SDRAM_TRC_5 0x00000400 /* 5 + TRC */ +#define SH7750_MCR_TRAS_SDRAM_TRC_6 0x00000800 /* 6 + TRC */ +#define SH7750_MCR_TRAS_SDRAM_TRC_7 0x00000C00 /* 7 + TRC */ +#define SH7750_MCR_TRAS_SDRAM_TRC_8 0x00001000 /* 8 + TRC */ +#define SH7750_MCR_TRAS_SDRAM_TRC_9 0x00001400 /* 9 + TRC */ +#define SH7750_MCR_TRAS_SDRAM_TRC_10 0x00001800 /* 10 + TRC */ +#define SH7750_MCR_TRAS_SDRAM_TRC_11 0x00001C00 /* 11 + TRC */ + +#define SH7750_MCR_BE 0x00000200 /* Burst Enable */ +#define SH7750_MCR_SZ 0x00000180 /* Memory Data Size */ +#define SH7750_MCR_SZ_64 0x00000000 /* 64 bits */ +#define SH7750_MCR_SZ_16 0x00000100 /* 16 bits */ +#define SH7750_MCR_SZ_32 0x00000180 /* 32 bits */ + +#define SH7750_MCR_AMX 0x00000078 /* Address Multiplexing */ #define SH7750_MCR_AMX_S 3 -#define SH7750_MCR_AMX_DRAM_8BIT_COL 0x00000000 /* 8-bit column addr */ -#define SH7750_MCR_AMX_DRAM_9BIT_COL 0x00000008 /* 9-bit column addr */ -#define SH7750_MCR_AMX_DRAM_10BIT_COL 0x00000010 /* 10-bit column addr */ -#define SH7750_MCR_AMX_DRAM_11BIT_COL 0x00000018 /* 11-bit column addr */ -#define SH7750_MCR_AMX_DRAM_12BIT_COL 0x00000020 /* 12-bit column addr */ +#define SH7750_MCR_AMX_DRAM_8BIT_COL 0x00000000 /* 8-bit column addr */ +#define SH7750_MCR_AMX_DRAM_9BIT_COL 0x00000008 /* 9-bit column addr */ +#define SH7750_MCR_AMX_DRAM_10BIT_COL 0x00000010 /* 10-bit column addr */ +#define SH7750_MCR_AMX_DRAM_11BIT_COL 0x00000018 /* 11-bit column addr */ +#define SH7750_MCR_AMX_DRAM_12BIT_COL 0x00000020 /* 12-bit column addr */ /* See SH7750 Hardware Manual for SDRAM address multiplexor selection */ -#define SH7750_MCR_RFSH 0x00000004 /* Refresh Control */ -#define SH7750_MCR_RMODE 0x00000002 /* Refresh Mode: */ -#define SH7750_MCR_RMODE_NORMAL 0x00000000 /* Normal Refresh Mode */ -#define SH7750_MCR_RMODE_SELF 0x00000002 /* Self-Refresh Mode */ -#define SH7750_MCR_RMODE_EDO 0x00000001 /* EDO Mode */ +#define SH7750_MCR_RFSH 0x00000004 /* Refresh Control */ +#define SH7750_MCR_RMODE 0x00000002 /* Refresh Mode: */ +#define SH7750_MCR_RMODE_NORMAL 0x00000000 /* Normal Refresh Mode */ +#define SH7750_MCR_RMODE_SELF 0x00000002 /* Self-Refresh Mode */ +#define SH7750_MCR_RMODE_EDO 0x00000001 /* EDO Mode */ /* SDRAM Mode Set address */ #define SH7750_SDRAM_MODE_A2_BASE 0xFF900000 @@ -894,119 +894,119 @@ /* PCMCIA Control Register (half) - PCR */ -#define SH7750_PCR_REGOFS 0x800018 /* offset */ +#define SH7750_PCR_REGOFS 0x800018 /* offset */ #define SH7750_PCR SH7750_P4_REG32(SH7750_PCR_REGOFS) #define SH7750_PCR_A7 SH7750_A7_REG32(SH7750_PCR_REGOFS) -#define SH7750_PCR_A5PCW 0xC000 /* Area 5 PCMCIA Wait - Number of wait - states to be added to the number of - waits specified by WCR2 in a low-speed - PCMCIA wait cycle */ -#define SH7750_PCR_A5PCW_0 0x0000 /* 0 waits inserted */ -#define SH7750_PCR_A5PCW_15 0x4000 /* 15 waits inserted */ -#define SH7750_PCR_A5PCW_30 0x8000 /* 30 waits inserted */ -#define SH7750_PCR_A5PCW_50 0xC000 /* 50 waits inserted */ - -#define SH7750_PCR_A6PCW 0x3000 /* Area 6 PCMCIA Wait - Number of wait - states to be added to the number of - waits specified by WCR2 in a low-speed - PCMCIA wait cycle */ -#define SH7750_PCR_A6PCW_0 0x0000 /* 0 waits inserted */ -#define SH7750_PCR_A6PCW_15 0x1000 /* 15 waits inserted */ -#define SH7750_PCR_A6PCW_30 0x2000 /* 30 waits inserted */ -#define SH7750_PCR_A6PCW_50 0x3000 /* 50 waits inserted */ - -#define SH7750_PCR_A5TED 0x0E00 /* Area 5 Address-OE\/WE\ Assertion Delay, - delay time from address output to - OE\/WE\ assertion on the connected - PCMCIA interface */ +#define SH7750_PCR_A5PCW 0xC000 /* Area 5 PCMCIA Wait - Number of wait */ + /* states to be added to the number of */ + /* waits specified by WCR2 in a */ + /* low-speed PCMCIA wait cycle */ +#define SH7750_PCR_A5PCW_0 0x0000 /* 0 waits inserted */ +#define SH7750_PCR_A5PCW_15 0x4000 /* 15 waits inserted */ +#define SH7750_PCR_A5PCW_30 0x8000 /* 30 waits inserted */ +#define SH7750_PCR_A5PCW_50 0xC000 /* 50 waits inserted */ + +#define SH7750_PCR_A6PCW 0x3000 /* Area 6 PCMCIA Wait - Number of wait */ + /* states to be added to the number of */ + /* waits specified by WCR2 in a */ + /* low-speed PCMCIA wait cycle */ +#define SH7750_PCR_A6PCW_0 0x0000 /* 0 waits inserted */ +#define SH7750_PCR_A6PCW_15 0x1000 /* 15 waits inserted */ +#define SH7750_PCR_A6PCW_30 0x2000 /* 30 waits inserted */ +#define SH7750_PCR_A6PCW_50 0x3000 /* 50 waits inserted */ + +#define SH7750_PCR_A5TED 0x0E00 /* Area 5 Addr-OE\/WE\ Assertion Delay */ + /* delay time from address output to */ + /* OE\/WE\ assertion on the connected */ + /* PCMCIA interface */ #define SH7750_PCR_A5TED_S 9 -#define SH7750_PCR_A6TED 0x01C0 /* Area 6 Address-OE\/WE\ Assertion Delay */ +#define SH7750_PCR_A6TED 0x01C0 /* Area 6 Addr-OE\/WE\ Assertion Delay */ #define SH7750_PCR_A6TED_S 6 -#define SH7750_PCR_TED_0WS 0 /* 0 Waits inserted */ -#define SH7750_PCR_TED_1WS 1 /* 1 Waits inserted */ -#define SH7750_PCR_TED_2WS 2 /* 2 Waits inserted */ -#define SH7750_PCR_TED_3WS 3 /* 3 Waits inserted */ -#define SH7750_PCR_TED_6WS 4 /* 6 Waits inserted */ -#define SH7750_PCR_TED_9WS 5 /* 9 Waits inserted */ -#define SH7750_PCR_TED_12WS 6 /* 12 Waits inserted */ -#define SH7750_PCR_TED_15WS 7 /* 15 Waits inserted */ - -#define SH7750_PCR_A5TEH 0x0038 /* Area 5 OE\/WE\ Negation Address delay, - address hold delay time from OE\/WE\ - negation in a write on the connected - PCMCIA interface */ +#define SH7750_PCR_TED_0WS 0 /* 0 Waits inserted */ +#define SH7750_PCR_TED_1WS 1 /* 1 Waits inserted */ +#define SH7750_PCR_TED_2WS 2 /* 2 Waits inserted */ +#define SH7750_PCR_TED_3WS 3 /* 3 Waits inserted */ +#define SH7750_PCR_TED_6WS 4 /* 6 Waits inserted */ +#define SH7750_PCR_TED_9WS 5 /* 9 Waits inserted */ +#define SH7750_PCR_TED_12WS 6 /* 12 Waits inserted */ +#define SH7750_PCR_TED_15WS 7 /* 15 Waits inserted */ + +#define SH7750_PCR_A5TEH 0x0038 /* Area 5 OE\/WE\ Negation Addr delay, */ + /* address hold delay time from OE\/WE\ */ + /* negation in a write on the connected */ + /* PCMCIA interface */ #define SH7750_PCR_A5TEH_S 3 -#define SH7750_PCR_A6TEH 0x0007 /* Area 6 OE\/WE\ Negation Address delay */ +#define SH7750_PCR_A6TEH 0x0007 /* Area 6 OE\/WE\ Negation Address delay */ #define SH7750_PCR_A6TEH_S 0 -#define SH7750_PCR_TEH_0WS 0 /* 0 Waits inserted */ -#define SH7750_PCR_TEH_1WS 1 /* 1 Waits inserted */ -#define SH7750_PCR_TEH_2WS 2 /* 2 Waits inserted */ -#define SH7750_PCR_TEH_3WS 3 /* 3 Waits inserted */ -#define SH7750_PCR_TEH_6WS 4 /* 6 Waits inserted */ -#define SH7750_PCR_TEH_9WS 5 /* 9 Waits inserted */ -#define SH7750_PCR_TEH_12WS 6 /* 12 Waits inserted */ -#define SH7750_PCR_TEH_15WS 7 /* 15 Waits inserted */ +#define SH7750_PCR_TEH_0WS 0 /* 0 Waits inserted */ +#define SH7750_PCR_TEH_1WS 1 /* 1 Waits inserted */ +#define SH7750_PCR_TEH_2WS 2 /* 2 Waits inserted */ +#define SH7750_PCR_TEH_3WS 3 /* 3 Waits inserted */ +#define SH7750_PCR_TEH_6WS 4 /* 6 Waits inserted */ +#define SH7750_PCR_TEH_9WS 5 /* 9 Waits inserted */ +#define SH7750_PCR_TEH_12WS 6 /* 12 Waits inserted */ +#define SH7750_PCR_TEH_15WS 7 /* 15 Waits inserted */ /* Refresh Timer Control/Status Register (half) - RTSCR */ -#define SH7750_RTCSR_REGOFS 0x80001C /* offset */ +#define SH7750_RTCSR_REGOFS 0x80001C /* offset */ #define SH7750_RTCSR SH7750_P4_REG32(SH7750_RTCSR_REGOFS) #define SH7750_RTCSR_A7 SH7750_A7_REG32(SH7750_RTCSR_REGOFS) -#define SH7750_RTCSR_KEY 0xA500 /* RTCSR write key */ -#define SH7750_RTCSR_CMF 0x0080 /* Compare-Match Flag (indicates a - match between the refresh timer - counter and refresh time constant) */ -#define SH7750_RTCSR_CMIE 0x0040 /* Compare-Match Interrupt Enable */ -#define SH7750_RTCSR_CKS 0x0038 /* Refresh Counter Clock Selects */ -#define SH7750_RTCSR_CKS_DIS 0x0000 /* Clock Input Disabled */ -#define SH7750_RTCSR_CKS_CKIO_DIV4 0x0008 /* Bus Clock / 4 */ -#define SH7750_RTCSR_CKS_CKIO_DIV16 0x0010 /* Bus Clock / 16 */ -#define SH7750_RTCSR_CKS_CKIO_DIV64 0x0018 /* Bus Clock / 64 */ -#define SH7750_RTCSR_CKS_CKIO_DIV256 0x0020 /* Bus Clock / 256 */ -#define SH7750_RTCSR_CKS_CKIO_DIV1024 0x0028 /* Bus Clock / 1024 */ -#define SH7750_RTCSR_CKS_CKIO_DIV2048 0x0030 /* Bus Clock / 2048 */ -#define SH7750_RTCSR_CKS_CKIO_DIV4096 0x0038 /* Bus Clock / 4096 */ - -#define SH7750_RTCSR_OVF 0x0004 /* Refresh Count Overflow Flag */ -#define SH7750_RTCSR_OVIE 0x0002 /* Refresh Count Overflow Interrupt - Enable */ -#define SH7750_RTCSR_LMTS 0x0001 /* Refresh Count Overflow Limit Select */ -#define SH7750_RTCSR_LMTS_1024 0x0000 /* Count Limit is 1024 */ -#define SH7750_RTCSR_LMTS_512 0x0001 /* Count Limit is 512 */ +#define SH7750_RTCSR_KEY 0xA500 /* RTCSR write key */ +#define SH7750_RTCSR_CMF 0x0080 /* Compare-Match Flag (indicates a */ + /* match between the refresh timer */ + /* counter and refresh time constant) */ +#define SH7750_RTCSR_CMIE 0x0040 /* Compare-Match Interrupt Enable */ +#define SH7750_RTCSR_CKS 0x0038 /* Refresh Counter Clock Selects */ +#define SH7750_RTCSR_CKS_DIS 0x0000 /* Clock Input Disabled */ +#define SH7750_RTCSR_CKS_CKIO_DIV4 0x0008 /* Bus Clock / 4 */ +#define SH7750_RTCSR_CKS_CKIO_DIV16 0x0010 /* Bus Clock / 16 */ +#define SH7750_RTCSR_CKS_CKIO_DIV64 0x0018 /* Bus Clock / 64 */ +#define SH7750_RTCSR_CKS_CKIO_DIV256 0x0020 /* Bus Clock / 256 */ +#define SH7750_RTCSR_CKS_CKIO_DIV1024 0x0028 /* Bus Clock / 1024 */ +#define SH7750_RTCSR_CKS_CKIO_DIV2048 0x0030 /* Bus Clock / 2048 */ +#define SH7750_RTCSR_CKS_CKIO_DIV4096 0x0038 /* Bus Clock / 4096 */ + +#define SH7750_RTCSR_OVF 0x0004 /* Refresh Count Overflow Flag */ +#define SH7750_RTCSR_OVIE 0x0002 /* Refresh Count Overflow Interrupt */ + /* Enable */ +#define SH7750_RTCSR_LMTS 0x0001 /* Refresh Count Overflow Limit Select */ +#define SH7750_RTCSR_LMTS_1024 0x0000 /* Count Limit is 1024 */ +#define SH7750_RTCSR_LMTS_512 0x0001 /* Count Limit is 512 */ /* Refresh Timer Counter (half) - RTCNT */ -#define SH7750_RTCNT_REGOFS 0x800020 /* offset */ +#define SH7750_RTCNT_REGOFS 0x800020 /* offset */ #define SH7750_RTCNT SH7750_P4_REG32(SH7750_RTCNT_REGOFS) #define SH7750_RTCNT_A7 SH7750_A7_REG32(SH7750_RTCNT_REGOFS) -#define SH7750_RTCNT_KEY 0xA500 /* RTCNT write key */ +#define SH7750_RTCNT_KEY 0xA500 /* RTCNT write key */ /* Refresh Time Constant Register (half) - RTCOR */ -#define SH7750_RTCOR_REGOFS 0x800024 /* offset */ +#define SH7750_RTCOR_REGOFS 0x800024 /* offset */ #define SH7750_RTCOR SH7750_P4_REG32(SH7750_RTCOR_REGOFS) #define SH7750_RTCOR_A7 SH7750_A7_REG32(SH7750_RTCOR_REGOFS) -#define SH7750_RTCOR_KEY 0xA500 /* RTCOR write key */ +#define SH7750_RTCOR_KEY 0xA500 /* RTCOR write key */ /* Refresh Count Register (half) - RFCR */ -#define SH7750_RFCR_REGOFS 0x800028 /* offset */ +#define SH7750_RFCR_REGOFS 0x800028 /* offset */ #define SH7750_RFCR SH7750_P4_REG32(SH7750_RFCR_REGOFS) #define SH7750_RFCR_A7 SH7750_A7_REG32(SH7750_RFCR_REGOFS) -#define SH7750_RFCR_KEY 0xA400 /* RFCR write key */ +#define SH7750_RFCR_KEY 0xA400 /* RFCR write key */ /* Synchronous DRAM mode registers - SDMR */ -#define SH7750_SDMR2_REGOFS 0x900000 /* base offset */ -#define SH7750_SDMR2_REGNB 0x0FFC /* nb of register */ +#define SH7750_SDMR2_REGOFS 0x900000 /* base offset */ +#define SH7750_SDMR2_REGNB 0x0FFC /* nb of register */ #define SH7750_SDMR2 SH7750_P4_REG32(SH7750_SDMR2_REGOFS) #define SH7750_SDMR2_A7 SH7750_A7_REG32(SH7750_SDMR2_REGOFS) -#define SH7750_SDMR3_REGOFS 0x940000 /* offset */ -#define SH7750_SDMR3_REGNB 0x0FFC /* nb of register */ +#define SH7750_SDMR3_REGOFS 0x940000 /* offset */ +#define SH7750_SDMR3_REGNB 0x0FFC /* nb of register */ #define SH7750_SDMR3 SH7750_P4_REG32(SH7750_SDMR3_REGOFS) #define SH7750_SDMR3_A7 SH7750_A7_REG32(SH7750_SDMR3_REGOFS) @@ -1015,7 +1015,7 @@ */ /* DMA Source Address Register - SAR0, SAR1, SAR2, SAR3 */ -#define SH7750_SAR_REGOFS(n) (0xA00000 + ((n)*16)) /* offset */ +#define SH7750_SAR_REGOFS(n) (0xA00000 + ((n) * 16)) /* offset */ #define SH7750_SAR(n) SH7750_P4_REG32(SH7750_SAR_REGOFS(n)) #define SH7750_SAR_A7(n) SH7750_A7_REG32(SH7750_SAR_REGOFS(n)) #define SH7750_SAR0 SH7750_SAR(0) @@ -1028,7 +1028,7 @@ #define SH7750_SAR3_A7 SH7750_SAR_A7(3) /* DMA Destination Address Register - DAR0, DAR1, DAR2, DAR3 */ -#define SH7750_DAR_REGOFS(n) (0xA00004 + ((n)*16)) /* offset */ +#define SH7750_DAR_REGOFS(n) (0xA00004 + ((n) * 16)) /* offset */ #define SH7750_DAR(n) SH7750_P4_REG32(SH7750_DAR_REGOFS(n)) #define SH7750_DAR_A7(n) SH7750_A7_REG32(SH7750_DAR_REGOFS(n)) #define SH7750_DAR0 SH7750_DAR(0) @@ -1041,7 +1041,7 @@ #define SH7750_DAR3_A7 SH7750_DAR_A7(3) /* DMA Transfer Count Register - DMATCR0, DMATCR1, DMATCR2, DMATCR3 */ -#define SH7750_DMATCR_REGOFS(n) (0xA00008 + ((n)*16)) /* offset */ +#define SH7750_DMATCR_REGOFS(n) (0xA00008 + ((n) * 16)) /* offset */ #define SH7750_DMATCR(n) SH7750_P4_REG32(SH7750_DMATCR_REGOFS(n)) #define SH7750_DMATCR_A7(n) SH7750_A7_REG32(SH7750_DMATCR_REGOFS(n)) #define SH7750_DMATCR0_P4 SH7750_DMATCR(0) @@ -1054,7 +1054,7 @@ #define SH7750_DMATCR3_A7 SH7750_DMATCR_A7(3) /* DMA Channel Control Register - CHCR0, CHCR1, CHCR2, CHCR3 */ -#define SH7750_CHCR_REGOFS(n) (0xA0000C + ((n)*16)) /* offset */ +#define SH7750_CHCR_REGOFS(n) (0xA0000C + ((n) * 16)) /* offset */ #define SH7750_CHCR(n) SH7750_P4_REG32(SH7750_CHCR_REGOFS(n)) #define SH7750_CHCR_A7(n) SH7750_A7_REG32(SH7750_CHCR_REGOFS(n)) #define SH7750_CHCR0 SH7750_CHCR(0) @@ -1066,227 +1066,227 @@ #define SH7750_CHCR2_A7 SH7750_CHCR_A7(2) #define SH7750_CHCR3_A7 SH7750_CHCR_A7(3) -#define SH7750_CHCR_SSA 0xE0000000 /* Source Address Space Attribute */ -#define SH7750_CHCR_SSA_PCMCIA 0x00000000 /* Reserved in PCMCIA access */ -#define SH7750_CHCR_SSA_DYNBSZ 0x20000000 /* Dynamic Bus Sizing I/O space */ -#define SH7750_CHCR_SSA_IO8 0x40000000 /* 8-bit I/O space */ -#define SH7750_CHCR_SSA_IO16 0x60000000 /* 16-bit I/O space */ -#define SH7750_CHCR_SSA_CMEM8 0x80000000 /* 8-bit common memory space */ -#define SH7750_CHCR_SSA_CMEM16 0xA0000000 /* 16-bit common memory space */ -#define SH7750_CHCR_SSA_AMEM8 0xC0000000 /* 8-bit attribute memory space */ -#define SH7750_CHCR_SSA_AMEM16 0xE0000000 /* 16-bit attribute memory space */ - -#define SH7750_CHCR_STC 0x10000000 /* Source Address Wait Control Select, - specifies CS5 or CS6 space wait - control for PCMCIA access */ - -#define SH7750_CHCR_DSA 0x0E000000 /* Source Address Space Attribute */ -#define SH7750_CHCR_DSA_PCMCIA 0x00000000 /* Reserved in PCMCIA access */ -#define SH7750_CHCR_DSA_DYNBSZ 0x02000000 /* Dynamic Bus Sizing I/O space */ -#define SH7750_CHCR_DSA_IO8 0x04000000 /* 8-bit I/O space */ -#define SH7750_CHCR_DSA_IO16 0x06000000 /* 16-bit I/O space */ -#define SH7750_CHCR_DSA_CMEM8 0x08000000 /* 8-bit common memory space */ -#define SH7750_CHCR_DSA_CMEM16 0x0A000000 /* 16-bit common memory space */ -#define SH7750_CHCR_DSA_AMEM8 0x0C000000 /* 8-bit attribute memory space */ -#define SH7750_CHCR_DSA_AMEM16 0x0E000000 /* 16-bit attribute memory space */ - -#define SH7750_CHCR_DTC 0x01000000 /* Destination Address Wait Control - Select, specifies CS5 or CS6 - space wait control for PCMCIA - access */ - -#define SH7750_CHCR_DS 0x00080000 /* DREQ\ Select : */ -#define SH7750_CHCR_DS_LOWLVL 0x00000000 /* Low Level Detection */ -#define SH7750_CHCR_DS_FALL 0x00080000 /* Falling Edge Detection */ - -#define SH7750_CHCR_RL 0x00040000 /* Request Check Level: */ -#define SH7750_CHCR_RL_ACTH 0x00000000 /* DRAK is an active high out */ -#define SH7750_CHCR_RL_ACTL 0x00040000 /* DRAK is an active low out */ - -#define SH7750_CHCR_AM 0x00020000 /* Acknowledge Mode: */ -#define SH7750_CHCR_AM_RD 0x00000000 /* DACK is output in read cycle */ -#define SH7750_CHCR_AM_WR 0x00020000 /* DACK is output in write cycle */ - -#define SH7750_CHCR_AL 0x00010000 /* Acknowledge Level: */ -#define SH7750_CHCR_AL_ACTH 0x00000000 /* DACK is an active high out */ -#define SH7750_CHCR_AL_ACTL 0x00010000 /* DACK is an active low out */ - -#define SH7750_CHCR_DM 0x0000C000 /* Destination Address Mode: */ -#define SH7750_CHCR_DM_FIX 0x00000000 /* Destination Addr Fixed */ -#define SH7750_CHCR_DM_INC 0x00004000 /* Destination Addr Incremented */ -#define SH7750_CHCR_DM_DEC 0x00008000 /* Destination Addr Decremented */ - -#define SH7750_CHCR_SM 0x00003000 /* Source Address Mode: */ -#define SH7750_CHCR_SM_FIX 0x00000000 /* Source Addr Fixed */ -#define SH7750_CHCR_SM_INC 0x00001000 /* Source Addr Incremented */ -#define SH7750_CHCR_SM_DEC 0x00002000 /* Source Addr Decremented */ - -#define SH7750_CHCR_RS 0x00000F00 /* Request Source Select: */ -#define SH7750_CHCR_RS_ER_DA_EA_TO_EA 0x000 /* External Request, Dual Address - Mode (External Addr Space-> - External Addr Space) */ -#define SH7750_CHCR_RS_ER_SA_EA_TO_ED 0x200 /* External Request, Single - Address Mode (External Addr - Space -> External Device) */ -#define SH7750_CHCR_RS_ER_SA_ED_TO_EA 0x300 /* External Request, Single - Address Mode, (External - Device -> External Addr - Space) */ -#define SH7750_CHCR_RS_AR_EA_TO_EA 0x400 /* Auto-Request (External Addr - Space -> External Addr Space) */ - -#define SH7750_CHCR_RS_AR_EA_TO_OCP 0x500 /* Auto-Request (External Addr - Space -> On-chip Peripheral - Module) */ -#define SH7750_CHCR_RS_AR_OCP_TO_EA 0x600 /* Auto-Request (On-chip - Peripheral Module -> - External Addr Space */ -#define SH7750_CHCR_RS_SCITX_EA_TO_SC 0x800 /* SCI Transmit-Data-Empty intr - transfer request (external - address space -> SCTDR1) */ -#define SH7750_CHCR_RS_SCIRX_SC_TO_EA 0x900 /* SCI Receive-Data-Full intr - transfer request (SCRDR1 -> - External Addr Space) */ -#define SH7750_CHCR_RS_SCIFTX_EA_TO_SC 0xA00 /* SCIF Transmit-Data-Empty intr - transfer request (external - address space -> SCFTDR1) */ -#define SH7750_CHCR_RS_SCIFRX_SC_TO_EA 0xB00 /* SCIF Receive-Data-Full intr - transfer request (SCFRDR2 -> - External Addr Space) */ -#define SH7750_CHCR_RS_TMU2_EA_TO_EA 0xC00 /* TMU Channel 2 (input capture - interrupt), (external address - space -> external address - space) */ -#define SH7750_CHCR_RS_TMU2_EA_TO_OCP 0xD00 /* TMU Channel 2 (input capture - interrupt), (external address - space -> on-chip peripheral - module) */ -#define SH7750_CHCR_RS_TMU2_OCP_TO_EA 0xE00 /* TMU Channel 2 (input capture - interrupt), (on-chip - peripheral module -> external - address space) */ - -#define SH7750_CHCR_TM 0x00000080 /* Transmit mode: */ -#define SH7750_CHCR_TM_CSTEAL 0x00000000 /* Cycle Steal Mode */ -#define SH7750_CHCR_TM_BURST 0x00000080 /* Burst Mode */ - -#define SH7750_CHCR_TS 0x00000070 /* Transmit Size: */ -#define SH7750_CHCR_TS_QUAD 0x00000000 /* Quadword Size (64 bits) */ -#define SH7750_CHCR_TS_BYTE 0x00000010 /* Byte Size (8 bit) */ -#define SH7750_CHCR_TS_WORD 0x00000020 /* Word Size (16 bit) */ -#define SH7750_CHCR_TS_LONG 0x00000030 /* Longword Size (32 bit) */ -#define SH7750_CHCR_TS_BLOCK 0x00000040 /* 32-byte block transfer */ - -#define SH7750_CHCR_IE 0x00000004 /* Interrupt Enable */ -#define SH7750_CHCR_TE 0x00000002 /* Transfer End */ -#define SH7750_CHCR_DE 0x00000001 /* DMAC Enable */ +#define SH7750_CHCR_SSA 0xE0000000 /* Source Address Space Attribute */ +#define SH7750_CHCR_SSA_PCMCIA 0x00000000 /* Reserved in PCMCIA access */ +#define SH7750_CHCR_SSA_DYNBSZ 0x20000000 /* Dynamic Bus Sizing I/O space */ +#define SH7750_CHCR_SSA_IO8 0x40000000 /* 8-bit I/O space */ +#define SH7750_CHCR_SSA_IO16 0x60000000 /* 16-bit I/O space */ +#define SH7750_CHCR_SSA_CMEM8 0x80000000 /* 8-bit common memory space */ +#define SH7750_CHCR_SSA_CMEM16 0xA0000000 /* 16-bit common memory space */ +#define SH7750_CHCR_SSA_AMEM8 0xC0000000 /* 8-bit attribute memory space */ +#define SH7750_CHCR_SSA_AMEM16 0xE0000000 /* 16-bit attribute memory space */ + +#define SH7750_CHCR_STC 0x10000000 /* Source Addr Wait Control Select */ + /* specifies CS5 or CS6 space wait */ + /* control for PCMCIA access */ + +#define SH7750_CHCR_DSA 0x0E000000 /* Source Address Space Attribute */ +#define SH7750_CHCR_DSA_PCMCIA 0x00000000 /* Reserved in PCMCIA access */ +#define SH7750_CHCR_DSA_DYNBSZ 0x02000000 /* Dynamic Bus Sizing I/O space */ +#define SH7750_CHCR_DSA_IO8 0x04000000 /* 8-bit I/O space */ +#define SH7750_CHCR_DSA_IO16 0x06000000 /* 16-bit I/O space */ +#define SH7750_CHCR_DSA_CMEM8 0x08000000 /* 8-bit common memory space */ +#define SH7750_CHCR_DSA_CMEM16 0x0A000000 /* 16-bit common memory space */ +#define SH7750_CHCR_DSA_AMEM8 0x0C000000 /* 8-bit attribute memory space */ +#define SH7750_CHCR_DSA_AMEM16 0x0E000000 /* 16-bit attribute memory space */ + +#define SH7750_CHCR_DTC 0x01000000 /* Destination Address Wait Control */ + /* Select, specifies CS5 or CS6 */ + /* space wait control for PCMCIA */ + /* access */ + +#define SH7750_CHCR_DS 0x00080000 /* DREQ\ Select : */ +#define SH7750_CHCR_DS_LOWLVL 0x00000000 /* Low Level Detection */ +#define SH7750_CHCR_DS_FALL 0x00080000 /* Falling Edge Detection */ + +#define SH7750_CHCR_RL 0x00040000 /* Request Check Level: */ +#define SH7750_CHCR_RL_ACTH 0x00000000 /* DRAK is an active high out */ +#define SH7750_CHCR_RL_ACTL 0x00040000 /* DRAK is an active low out */ + +#define SH7750_CHCR_AM 0x00020000 /* Acknowledge Mode: */ +#define SH7750_CHCR_AM_RD 0x00000000 /* DACK is output in read cycle */ +#define SH7750_CHCR_AM_WR 0x00020000 /* DACK is output in write cycle */ + +#define SH7750_CHCR_AL 0x00010000 /* Acknowledge Level: */ +#define SH7750_CHCR_AL_ACTH 0x00000000 /* DACK is an active high out */ +#define SH7750_CHCR_AL_ACTL 0x00010000 /* DACK is an active low out */ + +#define SH7750_CHCR_DM 0x0000C000 /* Destination Address Mode: */ +#define SH7750_CHCR_DM_FIX 0x00000000 /* Destination Addr Fixed */ +#define SH7750_CHCR_DM_INC 0x00004000 /* Destination Addr Incremented */ +#define SH7750_CHCR_DM_DEC 0x00008000 /* Destination Addr Decremented */ + +#define SH7750_CHCR_SM 0x00003000 /* Source Address Mode: */ +#define SH7750_CHCR_SM_FIX 0x00000000 /* Source Addr Fixed */ +#define SH7750_CHCR_SM_INC 0x00001000 /* Source Addr Incremented */ +#define SH7750_CHCR_SM_DEC 0x00002000 /* Source Addr Decremented */ + +#define SH7750_CHCR_RS 0x00000F00 /* Request Source Select: */ +#define SH7750_CHCR_RS_ER_DA_EA_TO_EA 0x000 /* External Request, Dual Addr */ + /* Mode, External Addr Space */ + /* -> External Addr Space) */ +#define SH7750_CHCR_RS_ER_SA_EA_TO_ED 0x200 /* External Request, Single */ + /* Address Mode (Ext. Addr */ + /* Space -> External Device) */ +#define SH7750_CHCR_RS_ER_SA_ED_TO_EA 0x300 /* External Request, Single */ + /* Address Mode, (External */ + /* Device -> External Addr */ + /* Space) */ +#define SH7750_CHCR_RS_AR_EA_TO_EA 0x400 /* Auto-Request (External Addr */ + /* Space -> Ext. Addr Space) */ + +#define SH7750_CHCR_RS_AR_EA_TO_OCP 0x500 /* Auto-Request (External Addr */ + /* Space -> On-chip */ + /* Peripheral Module) */ +#define SH7750_CHCR_RS_AR_OCP_TO_EA 0x600 /* Auto-Request (On-chip */ + /* Peripheral Module -> */ + /* External Addr Space */ +#define SH7750_CHCR_RS_SCITX_EA_TO_SC 0x800 /* SCI Transmit-Data-Empty intr */ + /* transfer request (external */ + /* address space -> SCTDR1) */ +#define SH7750_CHCR_RS_SCIRX_SC_TO_EA 0x900 /* SCI Receive-Data-Full intr */ + /* transfer request (SCRDR1 */ + /* -> External Addr Space) */ +#define SH7750_CHCR_RS_SCIFTX_EA_TO_SC 0xA00 /* SCIF TX-Data-Empty intr */ + /* transfer request (external */ + /* address space -> SCFTDR1) */ +#define SH7750_CHCR_RS_SCIFRX_SC_TO_EA 0xB00 /* SCIF Receive-Data-Full intr */ + /* transfer request (SCFRDR2 */ + /* -> External Addr Space) */ +#define SH7750_CHCR_RS_TMU2_EA_TO_EA 0xC00 /* TMU Channel 2 (input capture */ + /* interrupt), (external */ + /* address space -> external */ + /* address space) */ +#define SH7750_CHCR_RS_TMU2_EA_TO_OCP 0xD00 /* TMU Channel 2 (input capture */ + /* interrupt), (external */ + /* address space -> on-chip */ + /* peripheral module) */ +#define SH7750_CHCR_RS_TMU2_OCP_TO_EA 0xE00 /* TMU Channel 2 (input capture */ + /* interrupt), (on-chip */ + /* peripheral module -> */ + /* external address space) */ + +#define SH7750_CHCR_TM 0x00000080 /* Transmit mode: */ +#define SH7750_CHCR_TM_CSTEAL 0x00000000 /* Cycle Steal Mode */ +#define SH7750_CHCR_TM_BURST 0x00000080 /* Burst Mode */ + +#define SH7750_CHCR_TS 0x00000070 /* Transmit Size: */ +#define SH7750_CHCR_TS_QUAD 0x00000000 /* Quadword Size (64 bits) */ +#define SH7750_CHCR_TS_BYTE 0x00000010 /* Byte Size (8 bit) */ +#define SH7750_CHCR_TS_WORD 0x00000020 /* Word Size (16 bit) */ +#define SH7750_CHCR_TS_LONG 0x00000030 /* Longword Size (32 bit) */ +#define SH7750_CHCR_TS_BLOCK 0x00000040 /* 32-byte block transfer */ + +#define SH7750_CHCR_IE 0x00000004 /* Interrupt Enable */ +#define SH7750_CHCR_TE 0x00000002 /* Transfer End */ +#define SH7750_CHCR_DE 0x00000001 /* DMAC Enable */ /* DMA Operation Register - DMAOR */ -#define SH7750_DMAOR_REGOFS 0xA00040 /* offset */ +#define SH7750_DMAOR_REGOFS 0xA00040 /* offset */ #define SH7750_DMAOR SH7750_P4_REG32(SH7750_DMAOR_REGOFS) #define SH7750_DMAOR_A7 SH7750_A7_REG32(SH7750_DMAOR_REGOFS) -#define SH7750_DMAOR_DDT 0x00008000 /* On-Demand Data Transfer Mode */ +#define SH7750_DMAOR_DDT 0x00008000 /* On-Demand Data Transfer Mode */ -#define SH7750_DMAOR_PR 0x00000300 /* Priority Mode: */ -#define SH7750_DMAOR_PR_0123 0x00000000 /* CH0 > CH1 > CH2 > CH3 */ -#define SH7750_DMAOR_PR_0231 0x00000100 /* CH0 > CH2 > CH3 > CH1 */ -#define SH7750_DMAOR_PR_2013 0x00000200 /* CH2 > CH0 > CH1 > CH3 */ -#define SH7750_DMAOR_PR_RR 0x00000300 /* Round-robin mode */ +#define SH7750_DMAOR_PR 0x00000300 /* Priority Mode: */ +#define SH7750_DMAOR_PR_0123 0x00000000 /* CH0 > CH1 > CH2 > CH3 */ +#define SH7750_DMAOR_PR_0231 0x00000100 /* CH0 > CH2 > CH3 > CH1 */ +#define SH7750_DMAOR_PR_2013 0x00000200 /* CH2 > CH0 > CH1 > CH3 */ +#define SH7750_DMAOR_PR_RR 0x00000300 /* Round-robin mode */ -#define SH7750_DMAOR_COD 0x00000010 /* Check Overrun for DREQ\ */ -#define SH7750_DMAOR_AE 0x00000004 /* Address Error flag */ -#define SH7750_DMAOR_NMIF 0x00000002 /* NMI Flag */ -#define SH7750_DMAOR_DME 0x00000001 /* DMAC Master Enable */ +#define SH7750_DMAOR_COD 0x00000010 /* Check Overrun for DREQ\ */ +#define SH7750_DMAOR_AE 0x00000004 /* Address Error flag */ +#define SH7750_DMAOR_NMIF 0x00000002 /* NMI Flag */ +#define SH7750_DMAOR_DME 0x00000001 /* DMAC Master Enable */ /* * I/O Ports */ /* Port Control Register A - PCTRA */ -#define SH7750_PCTRA_REGOFS 0x80002C /* offset */ +#define SH7750_PCTRA_REGOFS 0x80002C /* offset */ #define SH7750_PCTRA SH7750_P4_REG32(SH7750_PCTRA_REGOFS) #define SH7750_PCTRA_A7 SH7750_A7_REG32(SH7750_PCTRA_REGOFS) -#define SH7750_PCTRA_PBPUP(n) 0 /* Bit n is pulled up */ -#define SH7750_PCTRA_PBNPUP(n) (1 << ((n)*2+1)) /* Bit n is not pulled up */ -#define SH7750_PCTRA_PBINP(n) 0 /* Bit n is an input */ -#define SH7750_PCTRA_PBOUT(n) (1 << ((n)*2)) /* Bit n is an output */ +#define SH7750_PCTRA_PBPUP(n) 0 /* Bit n is pulled up */ +#define SH7750_PCTRA_PBNPUP(n) (1 << ((n) * 2 + 1)) /* Bit n is not pulled up */ +#define SH7750_PCTRA_PBINP(n) 0 /* Bit n is an input */ +#define SH7750_PCTRA_PBOUT(n) (1 << ((n) * 2)) /* Bit n is an output */ /* Port Data Register A - PDTRA(half) */ -#define SH7750_PDTRA_REGOFS 0x800030 /* offset */ +#define SH7750_PDTRA_REGOFS 0x800030 /* offset */ #define SH7750_PDTRA SH7750_P4_REG32(SH7750_PDTRA_REGOFS) #define SH7750_PDTRA_A7 SH7750_A7_REG32(SH7750_PDTRA_REGOFS) #define SH7750_PDTRA_BIT(n) (1 << (n)) /* Port Control Register B - PCTRB */ -#define SH7750_PCTRB_REGOFS 0x800040 /* offset */ +#define SH7750_PCTRB_REGOFS 0x800040 /* offset */ #define SH7750_PCTRB SH7750_P4_REG32(SH7750_PCTRB_REGOFS) #define SH7750_PCTRB_A7 SH7750_A7_REG32(SH7750_PCTRB_REGOFS) -#define SH7750_PCTRB_PBPUP(n) 0 /* Bit n is pulled up */ -#define SH7750_PCTRB_PBNPUP(n) (1 << ((n-16)*2+1)) /* Bit n is not pulled up */ -#define SH7750_PCTRB_PBINP(n) 0 /* Bit n is an input */ -#define SH7750_PCTRB_PBOUT(n) (1 << ((n-16)*2)) /* Bit n is an output */ +#define SH7750_PCTRB_PBPUP(n) 0 /* Bit n is pulled up */ +#define SH7750_PCTRB_PBNPUP(n) (1 << ((n - 16) * 2 + 1)) /* Bit n is not pulled up */ +#define SH7750_PCTRB_PBINP(n) 0 /* Bit n is an input */ +#define SH7750_PCTRB_PBOUT(n) (1 << ((n - 16) * 2)) /* Bit n is an output */ /* Port Data Register B - PDTRB(half) */ -#define SH7750_PDTRB_REGOFS 0x800044 /* offset */ +#define SH7750_PDTRB_REGOFS 0x800044 /* offset */ #define SH7750_PDTRB SH7750_P4_REG32(SH7750_PDTRB_REGOFS) #define SH7750_PDTRB_A7 SH7750_A7_REG32(SH7750_PDTRB_REGOFS) -#define SH7750_PDTRB_BIT(n) (1 << ((n)-16)) +#define SH7750_PDTRB_BIT(n) (1 << ((n) - 16)) /* GPIO Interrupt Control Register - GPIOIC(half) */ -#define SH7750_GPIOIC_REGOFS 0x800048 /* offset */ +#define SH7750_GPIOIC_REGOFS 0x800048 /* offset */ #define SH7750_GPIOIC SH7750_P4_REG32(SH7750_GPIOIC_REGOFS) #define SH7750_GPIOIC_A7 SH7750_A7_REG32(SH7750_GPIOIC_REGOFS) -#define SH7750_GPIOIC_PTIREN(n) (1 << (n)) /* Port n is used as a GPIO int */ +#define SH7750_GPIOIC_PTIREN(n) (1 << (n)) /* Port n is used as a GPIO int */ /* * Interrupt Controller - INTC */ /* Interrupt Control Register - ICR (half) */ -#define SH7750_ICR_REGOFS 0xD00000 /* offset */ +#define SH7750_ICR_REGOFS 0xD00000 /* offset */ #define SH7750_ICR SH7750_P4_REG32(SH7750_ICR_REGOFS) #define SH7750_ICR_A7 SH7750_A7_REG32(SH7750_ICR_REGOFS) -#define SH7750_ICR_NMIL 0x8000 /* NMI Input Level */ -#define SH7750_ICR_MAI 0x4000 /* NMI Interrupt Mask */ +#define SH7750_ICR_NMIL 0x8000 /* NMI Input Level */ +#define SH7750_ICR_MAI 0x4000 /* NMI Interrupt Mask */ -#define SH7750_ICR_NMIB 0x0200 /* NMI Block Mode: */ -#define SH7750_ICR_NMIB_BLK 0x0000 /* NMI requests held pending while - SR.BL bit is set to 1 */ -#define SH7750_ICR_NMIB_NBLK 0x0200 /* NMI requests detected when SR.BL bit - set to 1 */ +#define SH7750_ICR_NMIB 0x0200 /* NMI Block Mode: */ +#define SH7750_ICR_NMIB_BLK 0x0000 /* NMI requests held pending while */ + /* SR.BL bit is set to 1 */ +#define SH7750_ICR_NMIB_NBLK 0x0200 /* NMI requests detected when SR.BL */ + /* bit set to 1 */ -#define SH7750_ICR_NMIE 0x0100 /* NMI Edge Select: */ -#define SH7750_ICR_NMIE_FALL 0x0000 /* Interrupt request detected on falling - edge of NMI input */ -#define SH7750_ICR_NMIE_RISE 0x0100 /* Interrupt request detected on rising - edge of NMI input */ +#define SH7750_ICR_NMIE 0x0100 /* NMI Edge Select: */ +#define SH7750_ICR_NMIE_FALL 0x0000 /* Interrupt request detected on */ + /* falling edge of NMI input */ +#define SH7750_ICR_NMIE_RISE 0x0100 /* Interrupt request detected on */ + /* rising edge of NMI input */ -#define SH7750_ICR_IRLM 0x0080 /* IRL Pin Mode: */ -#define SH7750_ICR_IRLM_ENC 0x0000 /* IRL\ pins used as a level-encoded - interrupt requests */ -#define SH7750_ICR_IRLM_RAW 0x0080 /* IRL\ pins used as a four independent - interrupt requests */ +#define SH7750_ICR_IRLM 0x0080 /* IRL Pin Mode: */ +#define SH7750_ICR_IRLM_ENC 0x0000 /* IRL\ pins used as a level-encoded */ + /* interrupt requests */ +#define SH7750_ICR_IRLM_RAW 0x0080 /* IRL\ pins used as a four */ + /* independent interrupt requests */ /* * User Break Controller registers */ -#define SH7750_BARA 0x200000 /* Break address regiser A */ -#define SH7750_BAMRA 0x200004 /* Break address mask regiser A */ -#define SH7750_BBRA 0x200008 /* Break bus cycle regiser A */ -#define SH7750_BARB 0x20000c /* Break address regiser B */ -#define SH7750_BAMRB 0x200010 /* Break address mask regiser B */ -#define SH7750_BBRB 0x200014 /* Break bus cycle regiser B */ -#define SH7750_BASRB 0x000018 /* Break ASID regiser B */ -#define SH7750_BDRB 0x200018 /* Break data regiser B */ -#define SH7750_BDMRB 0x20001c /* Break data mask regiser B */ -#define SH7750_BRCR 0x200020 /* Break control register */ - -#define SH7750_BRCR_UDBE 0x0001 /* User break debug enable bit */ +#define SH7750_BARA 0x200000 /* Break address regiser A */ +#define SH7750_BAMRA 0x200004 /* Break address mask regiser A */ +#define SH7750_BBRA 0x200008 /* Break bus cycle regiser A */ +#define SH7750_BARB 0x20000c /* Break address regiser B */ +#define SH7750_BAMRB 0x200010 /* Break address mask regiser B */ +#define SH7750_BBRB 0x200014 /* Break bus cycle regiser B */ +#define SH7750_BASRB 0x000018 /* Break ASID regiser B */ +#define SH7750_BDRB 0x200018 /* Break data regiser B */ +#define SH7750_BDMRB 0x20001c /* Break data mask regiser B */ +#define SH7750_BRCR 0x200020 /* Break control register */ + +#define SH7750_BRCR_UDBE 0x0001 /* User break debug enable bit */ /* * Missing in RTEMS, added for QEMU diff --git a/hw/sh4/shix.c b/hw/sh4/shix.c index d9a9fcbc598..aa812512f0c 100644 --- a/hw/sh4/shix.c +++ b/hw/sh4/shix.c @@ -22,20 +22,18 @@ * THE SOFTWARE. */ /* - Shix 2.0 board by Alexis Polti, described at - https://web.archive.org/web/20070917001736/perso.enst.fr/~polti/realisations/shix20 - - More information in target/sh4/README.sh4 -*/ + * Shix 2.0 board by Alexis Polti, described at + * https://web.archive.org/web/20070917001736/perso.enst.fr/~polti/realisations/shix20 + * + * More information in target/sh4/README.sh4 + */ #include "qemu/osdep.h" #include "qapi/error.h" #include "cpu.h" #include "hw/sh4/sh.h" -#include "sysemu/sysemu.h" #include "sysemu/qtest.h" #include "hw/boards.h" #include "hw/loader.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #define BIOS_FILENAME "shix_bios.bin" @@ -50,7 +48,7 @@ static void shix_init(MachineState *machine) MemoryRegion *rom = g_new(MemoryRegion, 1); MemoryRegion *sdram = g_new(MemoryRegion, 2); const char *bios_name = machine->firmware ?: BIOS_FILENAME; - + cpu = SUPERH_CPU(cpu_create(machine->cpu_type)); /* Allocate memory space */ diff --git a/hw/sh4/trace-events b/hw/sh4/trace-events new file mode 100644 index 00000000000..4b61cd56c89 --- /dev/null +++ b/hw/sh4/trace-events @@ -0,0 +1,3 @@ +# sh7750.c +sh7750_porta(uint16_t prev, uint16_t cur, uint16_t pdtr, uint16_t pctr) "porta changed from 0x%04x to 0x%04x\npdtra=0x%04x, pctra=0x%08x" +sh7750_portb(uint16_t prev, uint16_t cur, uint16_t pdtr, uint16_t pctr) "portb changed from 0x%04x to 0x%04x\npdtrb=0x%04x, pctrb=0x%08x" diff --git a/hw/sh4/trace.h b/hw/sh4/trace.h new file mode 100644 index 00000000000..e2c13323b7a --- /dev/null +++ b/hw/sh4/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_sh4.h" diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index f22c4f5b734..7397e567373 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -27,6 +27,7 @@ #include "hw/firmware/smbios.h" #include "hw/loader.h" #include "hw/boards.h" +#include "hw/pci/pci_bus.h" #include "smbios_build.h" /* legacy structures and constants for <= 2.0 machines */ @@ -118,6 +119,28 @@ static struct { uint16_t speed; } type17; +static QEnumLookup type41_kind_lookup = { + .array = (const char *const[]) { + "other", + "unknown", + "video", + "scsi", + "ethernet", + "tokenring", + "sound", + "pata", + "sata", + "sas", + }, + .size = 10 +}; +struct type41_instance { + const char *designation, *pcidev; + uint8_t instance, kind; + QTAILQ_ENTRY(type41_instance) next; +}; +static QTAILQ_HEAD(, type41_instance) type41 = QTAILQ_HEAD_INITIALIZER(type41); + static QemuOptsList qemu_smbios_opts = { .name = "smbios", .head = QTAILQ_HEAD_INITIALIZER(qemu_smbios_opts.head), @@ -358,6 +381,32 @@ static const QemuOptDesc qemu_smbios_type17_opts[] = { { /* end of list */ } }; +static const QemuOptDesc qemu_smbios_type41_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + },{ + .name = "designation", + .type = QEMU_OPT_STRING, + .help = "reference designation string", + },{ + .name = "kind", + .type = QEMU_OPT_STRING, + .help = "device type", + .def_value_str = "other", + },{ + .name = "instance", + .type = QEMU_OPT_NUMBER, + .help = "device type instance", + },{ + .name = "pcidev", + .type = QEMU_OPT_STRING, + .help = "PCI device", + }, + { /* end of list */ } +}; + static void smbios_register_config(void) { qemu_add_opts(&qemu_smbios_opts); @@ -773,6 +822,53 @@ static void smbios_build_type_32_table(void) SMBIOS_BUILD_TABLE_POST; } +static void smbios_build_type_41_table(Error **errp) +{ + unsigned instance = 0; + struct type41_instance *t41; + + QTAILQ_FOREACH(t41, &type41, next) { + SMBIOS_BUILD_TABLE_PRE(41, 0x2900 + instance, true); + + SMBIOS_TABLE_SET_STR(41, reference_designation_str, t41->designation); + t->device_type = t41->kind; + t->device_type_instance = t41->instance; + t->segment_group_number = cpu_to_le16(0); + t->bus_number = 0; + t->device_number = 0; + + if (t41->pcidev) { + PCIDevice *pdev = NULL; + int rc = pci_qdev_find_device(t41->pcidev, &pdev); + if (rc != 0) { + error_setg(errp, + "No PCI device %s for SMBIOS type 41 entry %s", + t41->pcidev, t41->designation); + return; + } + /* + * We only handle the case were the device is attached to + * the PCI root bus. The general case is more complex as + * bridges are enumerated later and the table would need + * to be updated at this moment. + */ + if (!pci_bus_is_root(pci_get_bus(pdev))) { + error_setg(errp, + "Cannot create type 41 entry for PCI device %s: " + "not attached to the root bus", + t41->pcidev); + return; + } + t->segment_group_number = cpu_to_le16(0); + t->bus_number = pci_dev_bus_num(pdev); + t->device_number = pdev->devfn; + } + + SMBIOS_BUILD_TABLE_POST; + instance++; + } +} + static void smbios_build_type_127_table(void) { SMBIOS_BUILD_TABLE_PRE(127, 0x7F00, true); /* required */ @@ -883,7 +979,8 @@ void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, const unsigned int mem_array_size, uint8_t **tables, size_t *tables_len, - uint8_t **anchor, size_t *anchor_len) + uint8_t **anchor, size_t *anchor_len, + Error **errp) { unsigned i, dimm_cnt; @@ -928,6 +1025,7 @@ void smbios_get_tables(MachineState *ms, smbios_build_type_32_table(); smbios_build_type_38_table(); + smbios_build_type_41_table(errp); smbios_build_type_127_table(); smbios_validate_table(ms); @@ -1224,6 +1322,30 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) save_opt(&type17.part, opts, "part"); type17.speed = qemu_opt_get_number(opts, "speed", 0); return; + case 41: { + struct type41_instance *t; + Error *local_err = NULL; + + if (!qemu_opts_validate(opts, qemu_smbios_type41_opts, errp)) { + return; + } + t = g_new0(struct type41_instance, 1); + save_opt(&t->designation, opts, "designation"); + t->kind = qapi_enum_parse(&type41_kind_lookup, + qemu_opt_get(opts, "kind"), + 0, &local_err) + 1; + t->kind |= 0x80; /* enabled */ + if (local_err != NULL) { + error_propagate(errp, local_err); + g_free(t); + return; + } + t->instance = qemu_opt_get_number(opts, "instance", 1); + save_opt(&t->pcidev, opts, "pcidev"); + + QTAILQ_INSERT_TAIL(&type41, t, next); + return; + } default: error_setg(errp, "Don't know how to build fields for SMBIOS type %ld", diff --git a/hw/sparc/Kconfig b/hw/sparc/Kconfig index 8dcb10086fd..79d58beb7a6 100644 --- a/hw/sparc/Kconfig +++ b/hw/sparc/Kconfig @@ -8,7 +8,7 @@ config SUN4M select UNIMP select ESCC select ESP - select FDC + select FDC_SYSBUS select SLAVIO select LANCE select M48T59 diff --git a/hw/sparc/leon3.c b/hw/sparc/leon3.c index 7e16eea9e67..7b4dec17211 100644 --- a/hw/sparc/leon3.c +++ b/hw/sparc/leon3.c @@ -40,7 +40,6 @@ #include "hw/loader.h" #include "elf.h" #include "trace.h" -#include "exec/address-spaces.h" #include "hw/sparc/grlib.h" #include "hw/misc/grlib_ahb_apb_pnp.h" @@ -137,7 +136,36 @@ static void main_cpu_reset(void *opaque) env->regbase[6] = s->sp; } -void leon3_irq_ack(void *irq_manager, int intno) +static void leon3_cache_control_int(CPUSPARCState *env) +{ + uint32_t state = 0; + + if (env->cache_control & CACHE_CTRL_IF) { + /* Instruction cache state */ + state = env->cache_control & CACHE_STATE_MASK; + if (state == CACHE_ENABLED) { + state = CACHE_FROZEN; + trace_int_helper_icache_freeze(); + } + + env->cache_control &= ~CACHE_STATE_MASK; + env->cache_control |= state; + } + + if (env->cache_control & CACHE_CTRL_DF) { + /* Data cache state */ + state = (env->cache_control >> 2) & CACHE_STATE_MASK; + if (state == CACHE_ENABLED) { + state = CACHE_FROZEN; + trace_int_helper_dcache_freeze(); + } + + env->cache_control &= ~(CACHE_STATE_MASK << 2); + env->cache_control |= (state << 2); + } +} + +static void leon3_irq_ack(void *irq_manager, int intno) { grlib_irqmp_ack((DeviceState *)irq_manager, intno); } @@ -181,6 +209,12 @@ static void leon3_set_pil_in(void *opaque, int n, int level) } } +static void leon3_irq_manager(CPUSPARCState *env, void *irq_manager, int intno) +{ + leon3_irq_ack(irq_manager, intno); + leon3_cache_control_int(env); +} + static void leon3_generic_hw_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 1a00816d9a8..7f3a7c00278 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -107,6 +107,17 @@ struct sun4m_hwdef { uint8_t nvram_machine_id; }; +struct Sun4mMachineClass { + /*< private >*/ + MachineClass parent_obj; + /*< public >*/ + const struct sun4m_hwdef *hwdef; +}; +typedef struct Sun4mMachineClass Sun4mMachineClass; + +#define TYPE_SUN4M_MACHINE MACHINE_TYPE_NAME("sun4m-common") +DECLARE_CLASS_CHECKERS(Sun4mMachineClass, SUN4M_MACHINE, TYPE_SUN4M_MACHINE) + const char *fw_cfg_arch_key_name(uint16_t key) { static const struct { @@ -159,38 +170,6 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, } } -void cpu_check_irqs(CPUSPARCState *env) -{ - CPUState *cs; - - /* We should be holding the BQL before we mess with IRQs */ - g_assert(qemu_mutex_iothread_locked()); - - if (env->pil_in && (env->interrupt_index == 0 || - (env->interrupt_index & ~15) == TT_EXTINT)) { - unsigned int i; - - for (i = 15; i > 0; i--) { - if (env->pil_in & (1 << i)) { - int old_interrupt = env->interrupt_index; - - env->interrupt_index = TT_EXTINT | i; - if (old_interrupt != env->interrupt_index) { - cs = env_cpu(env); - trace_sun4m_cpu_interrupt(i); - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } - break; - } - } - } else if (!env->pil_in && (env->interrupt_index & ~15) == TT_EXTINT) { - cs = env_cpu(env); - trace_sun4m_cpu_reset_interrupt(env->interrupt_index & 15); - env->interrupt_index = 0; - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } -} - static void cpu_kick_irq(SPARCCPU *cpu) { CPUSPARCState *env = &cpu->env; @@ -824,11 +803,11 @@ static void cpu_devinit(const char *cpu_type, unsigned int id, cpu = SPARC_CPU(object_new(cpu_type)); env = &cpu->env; - cpu_sparc_set_id(env, id); qemu_register_reset(sun4m_cpu_reset, cpu); object_property_set_bool(OBJECT(cpu), "start-powered-off", id != 0, &error_fatal); qdev_realize_and_unref(DEVICE(cpu), NULL, &error_fatal); + cpu_sparc_set_id(env, id); *cpu_irqs = qemu_allocate_irqs(cpu_set_irq, cpu, MAX_PILS); env->prom_addr = prom_addr; } @@ -837,9 +816,9 @@ static void dummy_fdc_tc(void *opaque, int irq, int level) { } -static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, - MachineState *machine) +static void sun4m_hw_init(MachineState *machine) { + const struct sun4m_hwdef *hwdef = SUN4M_MACHINE_GET_CLASS(machine)->hwdef; DeviceState *slavio_intctl; unsigned int i; Nvram *nvram; @@ -1127,9 +1106,22 @@ enum { ss600mp_id, }; -static const struct sun4m_hwdef sun4m_hwdefs[] = { - /* SS-5 */ - { +static void sun4m_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->init = sun4m_hw_init; + mc->block_default_type = IF_SCSI; + mc->default_boot_order = "c"; + mc->default_display = "tcx"; + mc->default_ram_id = "sun4m.ram"; +} + +static void ss5_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef ss5_hwdef = { .iommu_base = 0x10000000, .iommu_pad_base = 0x10004000, .iommu_pad_len = 0x0fffb000, @@ -1154,9 +1146,19 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .machine_id = ss5_id, .iommu_version = 0x05000000, .max_mem = 0x10000000, - }, - /* SS-10 */ - { + }; + + mc->desc = "Sun4m platform, SPARCstation 5"; + mc->is_default = true; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); + smc->hwdef = &ss5_hwdef; +} + +static void ss10_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef ss10_hwdef = { .iommu_base = 0xfe0000000ULL, .tcx_base = 0xe20000000ULL, .slavio_base = 0xff0000000ULL, @@ -1170,18 +1172,28 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .dma_base = 0xef0400000ULL, .esp_base = 0xef0800000ULL, .le_base = 0xef0c00000ULL, - .apc_base = 0xefa000000ULL, // XXX should not exist + .apc_base = 0xefa000000ULL, /* XXX should not exist */ .aux1_base = 0xff1800000ULL, .aux2_base = 0xff1a01000ULL, .ecc_base = 0xf00000000ULL, - .ecc_version = 0x10000000, // version 0, implementation 1 + .ecc_version = 0x10000000, /* version 0, implementation 1 */ .nvram_machine_id = 0x72, .machine_id = ss10_id, .iommu_version = 0x03000000, .max_mem = 0xf00000000ULL, - }, - /* SS-600MP */ - { + }; + + mc->desc = "Sun4m platform, SPARCstation 10"; + mc->max_cpus = 4; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); + smc->hwdef = &ss10_hwdef; +} + +static void ss600mp_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef ss600mp_hwdef = { .iommu_base = 0xfe0000000ULL, .tcx_base = 0xe20000000ULL, .slavio_base = 0xff0000000ULL, @@ -1193,18 +1205,28 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .dma_base = 0xef0081000ULL, .esp_base = 0xef0080000ULL, .le_base = 0xef0060000ULL, - .apc_base = 0xefa000000ULL, // XXX should not exist + .apc_base = 0xefa000000ULL, /* XXX should not exist */ .aux1_base = 0xff1800000ULL, - .aux2_base = 0xff1a01000ULL, // XXX should not exist + .aux2_base = 0xff1a01000ULL, /* XXX should not exist */ .ecc_base = 0xf00000000ULL, - .ecc_version = 0x00000000, // version 0, implementation 0 + .ecc_version = 0x00000000, /* version 0, implementation 0 */ .nvram_machine_id = 0x71, .machine_id = ss600mp_id, .iommu_version = 0x01000000, .max_mem = 0xf00000000ULL, - }, - /* SS-20 */ - { + }; + + mc->desc = "Sun4m platform, SPARCserver 600MP"; + mc->max_cpus = 4; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); + smc->hwdef = &ss600mp_hwdef; +} + +static void ss20_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef ss20_hwdef = { .iommu_base = 0xfe0000000ULL, .tcx_base = 0xe20000000ULL, .slavio_base = 0xff0000000ULL, @@ -1219,7 +1241,7 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .esp_base = 0xef0800000ULL, .le_base = 0xef0c00000ULL, .bpp_base = 0xef4800000ULL, - .apc_base = 0xefa000000ULL, // XXX should not exist + .apc_base = 0xefa000000ULL, /* XXX should not exist */ .aux1_base = 0xff1800000ULL, .aux2_base = 0xff1a01000ULL, .dbri_base = 0xee0000000ULL, @@ -1238,14 +1260,24 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { } }, .ecc_base = 0xf00000000ULL, - .ecc_version = 0x20000000, // version 0, implementation 2 + .ecc_version = 0x20000000, /* version 0, implementation 2 */ .nvram_machine_id = 0x72, .machine_id = ss20_id, .iommu_version = 0x13000000, .max_mem = 0xf00000000ULL, - }, - /* Voyager */ - { + }; + + mc->desc = "Sun4m platform, SPARCstation 20"; + mc->max_cpus = 4; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); + smc->hwdef = &ss20_hwdef; +} + +static void voyager_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef voyager_hwdef = { .iommu_base = 0x10000000, .tcx_base = 0x50000000, .slavio_base = 0x70000000, @@ -1259,16 +1291,25 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .dma_base = 0x78400000, .esp_base = 0x78800000, .le_base = 0x78c00000, - .apc_base = 0x71300000, // pmc + .apc_base = 0x71300000, /* pmc */ .aux1_base = 0x71900000, .aux2_base = 0x71910000, .nvram_machine_id = 0x80, .machine_id = vger_id, .iommu_version = 0x05000000, .max_mem = 0x10000000, - }, - /* LX */ - { + }; + + mc->desc = "Sun4m platform, SPARCstation Voyager"; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); + smc->hwdef = &voyager_hwdef; +} + +static void ss_lx_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef ss_lx_hwdef = { .iommu_base = 0x10000000, .iommu_pad_base = 0x10004000, .iommu_pad_len = 0x0fffb000, @@ -1290,9 +1331,18 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .machine_id = lx_id, .iommu_version = 0x04000000, .max_mem = 0x10000000, - }, - /* SS-4 */ - { + }; + + mc->desc = "Sun4m platform, SPARCstation LX"; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); + smc->hwdef = &ss_lx_hwdef; +} + +static void ss4_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef ss4_hwdef = { .iommu_base = 0x10000000, .tcx_base = 0x50000000, .cs_base = 0x6c000000, @@ -1314,9 +1364,18 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .machine_id = ss4_id, .iommu_version = 0x05000000, .max_mem = 0x10000000, - }, - /* SPARCClassic */ - { + }; + + mc->desc = "Sun4m platform, SPARCstation 4"; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); + smc->hwdef = &ss4_hwdef; +} + +static void scls_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef scls_hwdef = { .iommu_base = 0x10000000, .tcx_base = 0x50000000, .slavio_base = 0x70000000, @@ -1337,11 +1396,20 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .machine_id = scls_id, .iommu_version = 0x05000000, .max_mem = 0x10000000, - }, - /* SPARCbook */ - { + }; + + mc->desc = "Sun4m platform, SPARCClassic"; + mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); + smc->hwdef = &scls_hwdef; +} + +static void sbook_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc); + static const struct sun4m_hwdef sbook_hwdef = { .iommu_base = 0x10000000, - .tcx_base = 0x50000000, // XXX + .tcx_base = 0x50000000, /* XXX */ .slavio_base = 0x70000000, .ms_kb_base = 0x71000000, .serial_base = 0x71100000, @@ -1360,254 +1428,67 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = { .machine_id = sbook_id, .iommu_version = 0x05000000, .max_mem = 0x10000000, - }, -}; - -/* SPARCstation 5 hardware initialisation */ -static void ss5_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[0], machine); -} - -/* SPARCstation 10 hardware initialisation */ -static void ss10_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[1], machine); -} - -/* SPARCserver 600MP hardware initialisation */ -static void ss600mp_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[2], machine); -} - -/* SPARCstation 20 hardware initialisation */ -static void ss20_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[3], machine); -} - -/* SPARCstation Voyager hardware initialisation */ -static void vger_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[4], machine); -} - -/* SPARCstation LX hardware initialisation */ -static void ss_lx_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[5], machine); -} - -/* SPARCstation 4 hardware initialisation */ -static void ss4_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[6], machine); -} - -/* SPARCClassic hardware initialisation */ -static void scls_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[7], machine); -} - -/* SPARCbook hardware initialisation */ -static void sbook_init(MachineState *machine) -{ - sun4m_hw_init(&sun4m_hwdefs[8], machine); -} - -static void ss5_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCstation 5"; - mc->init = ss5_init; - mc->block_default_type = IF_SCSI; - mc->is_default = true; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo ss5_type = { - .name = MACHINE_TYPE_NAME("SS-5"), - .parent = TYPE_MACHINE, - .class_init = ss5_class_init, -}; - -static void ss10_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCstation 10"; - mc->init = ss10_init; - mc->block_default_type = IF_SCSI; - mc->max_cpus = 4; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo ss10_type = { - .name = MACHINE_TYPE_NAME("SS-10"), - .parent = TYPE_MACHINE, - .class_init = ss10_class_init, -}; - -static void ss600mp_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCserver 600MP"; - mc->init = ss600mp_init; - mc->block_default_type = IF_SCSI; - mc->max_cpus = 4; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo ss600mp_type = { - .name = MACHINE_TYPE_NAME("SS-600MP"), - .parent = TYPE_MACHINE, - .class_init = ss600mp_class_init, -}; - -static void ss20_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCstation 20"; - mc->init = ss20_init; - mc->block_default_type = IF_SCSI; - mc->max_cpus = 4; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo ss20_type = { - .name = MACHINE_TYPE_NAME("SS-20"), - .parent = TYPE_MACHINE, - .class_init = ss20_class_init, -}; - -static void voyager_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCstation Voyager"; - mc->init = vger_init; - mc->block_default_type = IF_SCSI; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo voyager_type = { - .name = MACHINE_TYPE_NAME("Voyager"), - .parent = TYPE_MACHINE, - .class_init = voyager_class_init, -}; - -static void ss_lx_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCstation LX"; - mc->init = ss_lx_init; - mc->block_default_type = IF_SCSI; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo ss_lx_type = { - .name = MACHINE_TYPE_NAME("LX"), - .parent = TYPE_MACHINE, - .class_init = ss_lx_class_init, -}; - -static void ss4_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCstation 4"; - mc->init = ss4_init; - mc->block_default_type = IF_SCSI; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo ss4_type = { - .name = MACHINE_TYPE_NAME("SS-4"), - .parent = TYPE_MACHINE, - .class_init = ss4_class_init, -}; - -static void scls_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - - mc->desc = "Sun4m platform, SPARCClassic"; - mc->init = scls_init; - mc->block_default_type = IF_SCSI; - mc->default_boot_order = "c"; - mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; -} - -static const TypeInfo scls_type = { - .name = MACHINE_TYPE_NAME("SPARCClassic"), - .parent = TYPE_MACHINE, - .class_init = scls_class_init, -}; - -static void sbook_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); + }; mc->desc = "Sun4m platform, SPARCbook"; - mc->init = sbook_init; - mc->block_default_type = IF_SCSI; - mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); - mc->default_display = "tcx"; - mc->default_ram_id = "sun4m.ram"; + smc->hwdef = &sbook_hwdef; } -static const TypeInfo sbook_type = { - .name = MACHINE_TYPE_NAME("SPARCbook"), - .parent = TYPE_MACHINE, - .class_init = sbook_class_init, +static const TypeInfo sun4m_machine_types[] = { + { + .name = MACHINE_TYPE_NAME("SS-5"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = ss5_class_init, + }, { + .name = MACHINE_TYPE_NAME("SS-10"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = ss10_class_init, + }, { + .name = MACHINE_TYPE_NAME("SS-600MP"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = ss600mp_class_init, + }, { + .name = MACHINE_TYPE_NAME("SS-20"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = ss20_class_init, + }, { + .name = MACHINE_TYPE_NAME("Voyager"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = voyager_class_init, + }, { + .name = MACHINE_TYPE_NAME("LX"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = ss_lx_class_init, + }, { + .name = MACHINE_TYPE_NAME("SS-4"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = ss4_class_init, + }, { + .name = MACHINE_TYPE_NAME("SPARCClassic"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = scls_class_init, + }, { + .name = MACHINE_TYPE_NAME("SPARCbook"), + .parent = TYPE_SUN4M_MACHINE, + .class_init = sbook_class_init, + }, { + .name = TYPE_SUN4M_MACHINE, + .parent = TYPE_MACHINE, + .class_size = sizeof(Sun4mMachineClass), + .class_init = sun4m_machine_class_init, + .abstract = true, + } }; +DEFINE_TYPES(sun4m_machine_types) + static void sun4m_register_types(void) { type_register_static(&idreg_info); type_register_static(&afx_info); type_register_static(&prom_info); type_register_static(&ram_info); - - type_register_static(&ss5_type); - type_register_static(&ss10_type); - type_register_static(&ss600mp_type); - type_register_static(&ss20_type); - type_register_static(&voyager_type); - type_register_static(&ss_lx_type); - type_register_static(&ss4_type); - type_register_static(&scls_type); - type_register_static(&sbook_type); } type_init(sun4m_register_types) diff --git a/hw/sparc/trace-events b/hw/sparc/trace-events index 355b07ae057..00b0212c3bd 100644 --- a/hw/sparc/trace-events +++ b/hw/sparc/trace-events @@ -1,8 +1,6 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # sun4m.c -sun4m_cpu_interrupt(unsigned int level) "Set CPU IRQ %d" -sun4m_cpu_reset_interrupt(unsigned int level) "Reset CPU IRQ %d" sun4m_cpu_set_irq_raise(int level) "Raise CPU IRQ %d" sun4m_cpu_set_irq_lower(int level) "Lower CPU IRQ %d" @@ -19,3 +17,5 @@ sun4m_iommu_bad_addr(uint64_t addr) "bad addr 0x%"PRIx64 # leon3.c leon3_set_irq(int intno) "Set CPU IRQ %d" leon3_reset_irq(int intno) "Reset CPU IRQ %d" +int_helper_icache_freeze(void) "Instruction cache: freeze" +int_helper_dcache_freeze(void) "Data cache: freeze" diff --git a/hw/sparc64/Kconfig b/hw/sparc64/Kconfig index 980a201bb73..7e557ad17b0 100644 --- a/hw/sparc64/Kconfig +++ b/hw/sparc64/Kconfig @@ -6,7 +6,7 @@ config SUN4U imply PARALLEL select M48T59 select ISA_BUS - select FDC + select FDC_ISA select SERIAL_ISA select PCI_SABRE select IDE_CMD646 diff --git a/hw/sparc64/niagara.c b/hw/sparc64/niagara.c index a87d55f6bb1..f3e42d03266 100644 --- a/hw/sparc64/niagara.c +++ b/hw/sparc64/niagara.c @@ -31,7 +31,6 @@ #include "hw/loader.h" #include "hw/sparc/sparc64.h" #include "hw/rtc/sun4v-rtc.h" -#include "exec/address-spaces.h" #include "sysemu/block-backend.h" #include "qemu/error-report.h" #include "sysemu/qtest.h" diff --git a/hw/sparc64/sparc64.c b/hw/sparc64/sparc64.c index e3f9219a101..8654e955eb1 100644 --- a/hw/sparc64/sparc64.c +++ b/hw/sparc64/sparc64.c @@ -26,7 +26,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "hw/boards.h" -#include "hw/char/serial.h" #include "hw/sparc/sparc64.h" #include "qemu/timer.h" #include "sysemu/reset.h" @@ -35,68 +34,6 @@ #define TICK_MAX 0x7fffffffffffffffULL -void cpu_check_irqs(CPUSPARCState *env) -{ - CPUState *cs; - uint32_t pil = env->pil_in | - (env->softint & ~(SOFTINT_TIMER | SOFTINT_STIMER)); - - /* We should be holding the BQL before we mess with IRQs */ - g_assert(qemu_mutex_iothread_locked()); - - /* TT_IVEC has a higher priority (16) than TT_EXTINT (31..17) */ - if (env->ivec_status & 0x20) { - return; - } - cs = env_cpu(env); - /* check if TM or SM in SOFTINT are set - setting these also causes interrupt 14 */ - if (env->softint & (SOFTINT_TIMER | SOFTINT_STIMER)) { - pil |= 1 << 14; - } - - /* The bit corresponding to psrpil is (1<< psrpil), the next bit - is (2 << psrpil). */ - if (pil < (2 << env->psrpil)) { - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { - trace_sparc64_cpu_check_irqs_reset_irq(env->interrupt_index); - env->interrupt_index = 0; - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } - return; - } - - if (cpu_interrupts_enabled(env)) { - - unsigned int i; - - for (i = 15; i > env->psrpil; i--) { - if (pil & (1 << i)) { - int old_interrupt = env->interrupt_index; - int new_interrupt = TT_EXTINT | i; - - if (unlikely(env->tl > 0 && cpu_tsptr(env)->tt > new_interrupt - && ((cpu_tsptr(env)->tt & 0x1f0) == TT_EXTINT))) { - trace_sparc64_cpu_check_irqs_noset_irq(env->tl, - cpu_tsptr(env)->tt, - new_interrupt); - } else if (old_interrupt != new_interrupt) { - env->interrupt_index = new_interrupt; - trace_sparc64_cpu_check_irqs_set_irq(i, old_interrupt, - new_interrupt); - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } - break; - } - } - } else if (cs->interrupt_request & CPU_INTERRUPT_HARD) { - trace_sparc64_cpu_check_irqs_disabled(pil, env->pil_in, env->softint, - env->interrupt_index); - env->interrupt_index = 0; - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } -} - static void cpu_kick_irq(SPARCCPU *cpu) { CPUState *cs = CPU(cpu); diff --git a/hw/sparc64/trace-events b/hw/sparc64/trace-events index a0b29987d2b..3eb4bacf796 100644 --- a/hw/sparc64/trace-events +++ b/hw/sparc64/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # sun4u.c ebus_isa_irq_handler(int n, int level) "Set ISA IRQ %d level %d" @@ -9,10 +9,6 @@ sun4u_iommu_mem_write(uint64_t addr, uint64_t val, int size) "addr: 0x%"PRIx64" sun4u_iommu_translate(uint64_t addr, uint64_t trans_addr, uint64_t tte) "xlate 0x%"PRIx64" => pa 0x%"PRIx64" tte: 0x%"PRIx64 # sparc64.c -sparc64_cpu_check_irqs_reset_irq(int intno) "Reset CPU IRQ (current interrupt 0x%x)" -sparc64_cpu_check_irqs_noset_irq(uint32_t tl, uint32_t tt, int intno) "Not setting CPU IRQ: TL=%d current 0x%x >= pending 0x%x" -sparc64_cpu_check_irqs_set_irq(unsigned int i, int old, int new) "Set CPU IRQ %d old=0x%x new=0x%x" -sparc64_cpu_check_irqs_disabled(uint32_t pil, uint32_t pil_in, uint32_t softint, int intno) "Interrupts disabled, pil=0x%08x pil_in=0x%08x softint=0x%08x current interrupt 0x%x" sparc64_cpu_ivec_raise_irq(int irq) "Raise IVEC IRQ %d" sparc64_cpu_ivec_lower_irq(int irq) "Lower IVEC IRQ %d" sparc64_cpu_tick_irq_disabled(void) "tick_irq: softint disabled" diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c index 16addee4dc8..ff154eb84f8 100644 --- a/hw/ssi/aspeed_smc.c +++ b/hw/ssi/aspeed_smc.c @@ -29,7 +29,6 @@ #include "qemu/module.h" #include "qemu/error-report.h" #include "qapi/error.h" -#include "exec/address-spaces.h" #include "qemu/units.h" #include "trace.h" @@ -125,8 +124,17 @@ /* SPI dummy cycle data */ #define R_DUMMY_DATA (0x54 / 4) +/* FMC_WDT2 Control/Status Register for Alternate Boot (AST2600) */ +#define R_FMC_WDT2_CTRL (0x64 / 4) +#define FMC_WDT2_CTRL_ALT_BOOT_MODE BIT(6) /* O: 2 chips 1: 1 chip */ +#define FMC_WDT2_CTRL_SINGLE_BOOT_MODE BIT(5) +#define FMC_WDT2_CTRL_BOOT_SOURCE BIT(4) /* O: primary 1: alternate */ +#define FMC_WDT2_CTRL_EN BIT(0) + /* DMA Control/Status Register */ #define R_DMA_CTRL (0x80 / 4) +#define DMA_CTRL_REQUEST (1 << 31) +#define DMA_CTRL_GRANT (1 << 30) #define DMA_CTRL_DELAY_MASK 0xf #define DMA_CTRL_DELAY_SHIFT 8 #define DMA_CTRL_FREQ_MASK 0xf @@ -161,11 +169,6 @@ #define ASPEED_SMC_R_SPI_MAX (0x20 / 4) #define ASPEED_SMC_R_SMC_MAX (0x20 / 4) -#define ASPEED_SOC_SMC_FLASH_BASE 0x10000000 -#define ASPEED_SOC_FMC_FLASH_BASE 0x20000000 -#define ASPEED_SOC_SPI_FLASH_BASE 0x30000000 -#define ASPEED_SOC_SPI2_FLASH_BASE 0x38000000 - /* * DMA DRAM addresses should be 4 bytes aligned and the valid address * range is 0x40000000 - 0x5FFFFFFF (AST2400) @@ -178,10 +181,8 @@ * 0: 4 bytes * 0x7FFFFF: 32M bytes */ -#define DMA_DRAM_ADDR(s, val) ((s)->sdram_base | \ - ((val) & (s)->ctrl->dma_dram_mask)) -#define DMA_FLASH_ADDR(s, val) ((s)->ctrl->flash_window_base | \ - ((val) & (s)->ctrl->dma_flash_mask)) +#define DMA_DRAM_ADDR(asc, val) ((val) & (asc)->dma_dram_mask) +#define DMA_FLASH_ADDR(asc, val) ((val) & (asc)->dma_flash_mask) #define DMA_LENGTH(val) ((val) & 0x01FFFFFC) /* Flash opcodes. */ @@ -195,311 +196,48 @@ * controller. These can be changed when board is initialized with the * Segment Address Registers. */ -static const AspeedSegments aspeed_segments_legacy[] = { - { 0x10000000, 32 * 1024 * 1024 }, -}; - -static const AspeedSegments aspeed_segments_fmc[] = { - { 0x20000000, 64 * 1024 * 1024 }, /* start address is readonly */ - { 0x24000000, 32 * 1024 * 1024 }, - { 0x26000000, 32 * 1024 * 1024 }, - { 0x28000000, 32 * 1024 * 1024 }, - { 0x2A000000, 32 * 1024 * 1024 } -}; - -static const AspeedSegments aspeed_segments_spi[] = { - { 0x30000000, 64 * 1024 * 1024 }, -}; - -static const AspeedSegments aspeed_segments_ast2500_fmc[] = { - { 0x20000000, 128 * 1024 * 1024 }, /* start address is readonly */ - { 0x28000000, 32 * 1024 * 1024 }, - { 0x2A000000, 32 * 1024 * 1024 }, -}; - -static const AspeedSegments aspeed_segments_ast2500_spi1[] = { - { 0x30000000, 32 * 1024 * 1024 }, /* start address is readonly */ - { 0x32000000, 96 * 1024 * 1024 }, /* end address is readonly */ -}; - -static const AspeedSegments aspeed_segments_ast2500_spi2[] = { - { 0x38000000, 32 * 1024 * 1024 }, /* start address is readonly */ - { 0x3A000000, 96 * 1024 * 1024 }, /* end address is readonly */ -}; -static uint32_t aspeed_smc_segment_to_reg(const AspeedSMCState *s, - const AspeedSegments *seg); -static void aspeed_smc_reg_to_segment(const AspeedSMCState *s, uint32_t reg, - AspeedSegments *seg); - -/* - * AST2600 definitions - */ -#define ASPEED26_SOC_FMC_FLASH_BASE 0x20000000 -#define ASPEED26_SOC_SPI_FLASH_BASE 0x30000000 -#define ASPEED26_SOC_SPI2_FLASH_BASE 0x50000000 +static const AspeedSegments aspeed_2500_spi1_segments[]; +static const AspeedSegments aspeed_2500_spi2_segments[]; -static const AspeedSegments aspeed_segments_ast2600_fmc[] = { - { 0x0, 128 * MiB }, /* start address is readonly */ - { 128 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */ - { 0x0, 0 }, /* disabled */ -}; - -static const AspeedSegments aspeed_segments_ast2600_spi1[] = { - { 0x0, 128 * MiB }, /* start address is readonly */ - { 0x0, 0 }, /* disabled */ -}; - -static const AspeedSegments aspeed_segments_ast2600_spi2[] = { - { 0x0, 128 * MiB }, /* start address is readonly */ - { 0x0, 0 }, /* disabled */ - { 0x0, 0 }, /* disabled */ -}; - -static uint32_t aspeed_2600_smc_segment_to_reg(const AspeedSMCState *s, - const AspeedSegments *seg); -static void aspeed_2600_smc_reg_to_segment(const AspeedSMCState *s, - uint32_t reg, AspeedSegments *seg); - -static const AspeedSMCController controllers[] = { - { - .name = "aspeed.smc-ast2400", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 1, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 1, - .segments = aspeed_segments_legacy, - .flash_window_base = ASPEED_SOC_SMC_FLASH_BASE, - .flash_window_size = 0x6000000, - .has_dma = false, - .nregs = ASPEED_SMC_R_SMC_MAX, - .segment_to_reg = aspeed_smc_segment_to_reg, - .reg_to_segment = aspeed_smc_reg_to_segment, - }, { - .name = "aspeed.fmc-ast2400", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 1, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 5, - .segments = aspeed_segments_fmc, - .flash_window_base = ASPEED_SOC_FMC_FLASH_BASE, - .flash_window_size = 0x10000000, - .has_dma = true, - .dma_flash_mask = 0x0FFFFFFC, - .dma_dram_mask = 0x1FFFFFFC, - .nregs = ASPEED_SMC_R_MAX, - .segment_to_reg = aspeed_smc_segment_to_reg, - .reg_to_segment = aspeed_smc_reg_to_segment, - }, { - .name = "aspeed.spi1-ast2400", - .r_conf = R_SPI_CONF, - .r_ce_ctrl = 0xff, - .r_ctrl0 = R_SPI_CTRL0, - .r_timings = R_SPI_TIMINGS, - .nregs_timings = 1, - .conf_enable_w0 = SPI_CONF_ENABLE_W0, - .max_peripherals = 1, - .segments = aspeed_segments_spi, - .flash_window_base = ASPEED_SOC_SPI_FLASH_BASE, - .flash_window_size = 0x10000000, - .has_dma = false, - .nregs = ASPEED_SMC_R_SPI_MAX, - .segment_to_reg = aspeed_smc_segment_to_reg, - .reg_to_segment = aspeed_smc_reg_to_segment, - }, { - .name = "aspeed.fmc-ast2500", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 1, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 3, - .segments = aspeed_segments_ast2500_fmc, - .flash_window_base = ASPEED_SOC_FMC_FLASH_BASE, - .flash_window_size = 0x10000000, - .has_dma = true, - .dma_flash_mask = 0x0FFFFFFC, - .dma_dram_mask = 0x3FFFFFFC, - .nregs = ASPEED_SMC_R_MAX, - .segment_to_reg = aspeed_smc_segment_to_reg, - .reg_to_segment = aspeed_smc_reg_to_segment, - }, { - .name = "aspeed.spi1-ast2500", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 1, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 2, - .segments = aspeed_segments_ast2500_spi1, - .flash_window_base = ASPEED_SOC_SPI_FLASH_BASE, - .flash_window_size = 0x8000000, - .has_dma = false, - .nregs = ASPEED_SMC_R_MAX, - .segment_to_reg = aspeed_smc_segment_to_reg, - .reg_to_segment = aspeed_smc_reg_to_segment, - }, { - .name = "aspeed.spi2-ast2500", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 1, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 2, - .segments = aspeed_segments_ast2500_spi2, - .flash_window_base = ASPEED_SOC_SPI2_FLASH_BASE, - .flash_window_size = 0x8000000, - .has_dma = false, - .nregs = ASPEED_SMC_R_MAX, - .segment_to_reg = aspeed_smc_segment_to_reg, - .reg_to_segment = aspeed_smc_reg_to_segment, - }, { - .name = "aspeed.fmc-ast2600", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 1, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 3, - .segments = aspeed_segments_ast2600_fmc, - .flash_window_base = ASPEED26_SOC_FMC_FLASH_BASE, - .flash_window_size = 0x10000000, - .has_dma = true, - .dma_flash_mask = 0x0FFFFFFC, - .dma_dram_mask = 0x3FFFFFFC, - .nregs = ASPEED_SMC_R_MAX, - .segment_to_reg = aspeed_2600_smc_segment_to_reg, - .reg_to_segment = aspeed_2600_smc_reg_to_segment, - }, { - .name = "aspeed.spi1-ast2600", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 2, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 2, - .segments = aspeed_segments_ast2600_spi1, - .flash_window_base = ASPEED26_SOC_SPI_FLASH_BASE, - .flash_window_size = 0x10000000, - .has_dma = true, - .dma_flash_mask = 0x0FFFFFFC, - .dma_dram_mask = 0x3FFFFFFC, - .nregs = ASPEED_SMC_R_MAX, - .segment_to_reg = aspeed_2600_smc_segment_to_reg, - .reg_to_segment = aspeed_2600_smc_reg_to_segment, - }, { - .name = "aspeed.spi2-ast2600", - .r_conf = R_CONF, - .r_ce_ctrl = R_CE_CTRL, - .r_ctrl0 = R_CTRL0, - .r_timings = R_TIMINGS, - .nregs_timings = 3, - .conf_enable_w0 = CONF_ENABLE_W0, - .max_peripherals = 3, - .segments = aspeed_segments_ast2600_spi2, - .flash_window_base = ASPEED26_SOC_SPI2_FLASH_BASE, - .flash_window_size = 0x10000000, - .has_dma = true, - .dma_flash_mask = 0x0FFFFFFC, - .dma_dram_mask = 0x3FFFFFFC, - .nregs = ASPEED_SMC_R_MAX, - .segment_to_reg = aspeed_2600_smc_segment_to_reg, - .reg_to_segment = aspeed_2600_smc_reg_to_segment, - }, -}; - -/* - * The Segment Registers of the AST2400 and AST2500 have a 8MB - * unit. The address range of a flash SPI peripheral is encoded with - * absolute addresses which should be part of the overall controller - * window. - */ -static uint32_t aspeed_smc_segment_to_reg(const AspeedSMCState *s, - const AspeedSegments *seg) -{ - uint32_t reg = 0; - reg |= ((seg->addr >> 23) & SEG_START_MASK) << SEG_START_SHIFT; - reg |= (((seg->addr + seg->size) >> 23) & SEG_END_MASK) << SEG_END_SHIFT; - return reg; -} +#define ASPEED_SMC_FEATURE_DMA 0x1 +#define ASPEED_SMC_FEATURE_DMA_GRANT 0x2 +#define ASPEED_SMC_FEATURE_WDT_CONTROL 0x4 -static void aspeed_smc_reg_to_segment(const AspeedSMCState *s, - uint32_t reg, AspeedSegments *seg) +static inline bool aspeed_smc_has_dma(const AspeedSMCClass *asc) { - seg->addr = ((reg >> SEG_START_SHIFT) & SEG_START_MASK) << 23; - seg->size = (((reg >> SEG_END_SHIFT) & SEG_END_MASK) << 23) - seg->addr; + return !!(asc->features & ASPEED_SMC_FEATURE_DMA); } -/* - * The Segment Registers of the AST2600 have a 1MB unit. The address - * range of a flash SPI peripheral is encoded with offsets in the overall - * controller window. The previous SoC AST2400 and AST2500 used - * absolute addresses. Only bits [27:20] are relevant and the end - * address is an upper bound limit. - */ -#define AST2600_SEG_ADDR_MASK 0x0ff00000 - -static uint32_t aspeed_2600_smc_segment_to_reg(const AspeedSMCState *s, - const AspeedSegments *seg) +static inline bool aspeed_smc_has_wdt_control(const AspeedSMCClass *asc) { - uint32_t reg = 0; - - /* Disabled segments have a nil register */ - if (!seg->size) { - return 0; - } - - reg |= (seg->addr & AST2600_SEG_ADDR_MASK) >> 16; /* start offset */ - reg |= (seg->addr + seg->size - 1) & AST2600_SEG_ADDR_MASK; /* end offset */ - return reg; + return !!(asc->features & ASPEED_SMC_FEATURE_WDT_CONTROL); } -static void aspeed_2600_smc_reg_to_segment(const AspeedSMCState *s, - uint32_t reg, AspeedSegments *seg) -{ - uint32_t start_offset = (reg << 16) & AST2600_SEG_ADDR_MASK; - uint32_t end_offset = reg & AST2600_SEG_ADDR_MASK; - - if (reg) { - seg->addr = s->ctrl->flash_window_base + start_offset; - seg->size = end_offset + MiB - start_offset; - } else { - seg->addr = s->ctrl->flash_window_base; - seg->size = 0; - } -} +#define aspeed_smc_error(fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt "\n", __func__, ## __VA_ARGS__) static bool aspeed_smc_flash_overlap(const AspeedSMCState *s, const AspeedSegments *new, int cs) { + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); AspeedSegments seg; int i; - for (i = 0; i < s->ctrl->max_peripherals; i++) { + for (i = 0; i < asc->max_peripherals; i++) { if (i == cs) { continue; } - s->ctrl->reg_to_segment(s, s->regs[R_SEG_ADDR0 + i], &seg); + asc->reg_to_segment(s, s->regs[R_SEG_ADDR0 + i], &seg); if (new->addr + new->size > seg.addr && new->addr < seg.addr + seg.size) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment CS%d [ 0x%" - HWADDR_PRIx" - 0x%"HWADDR_PRIx" ] overlaps with " - "CS%d [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n", - s->ctrl->name, cs, new->addr, new->addr + new->size, - i, seg.addr, seg.addr + seg.size); + aspeed_smc_error("new segment CS%d [ 0x%" + HWADDR_PRIx" - 0x%"HWADDR_PRIx" ] overlaps with " + "CS%d [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]", + cs, new->addr, new->addr + new->size, + i, seg.addr, seg.addr + seg.size); return true; } } @@ -509,14 +247,15 @@ static bool aspeed_smc_flash_overlap(const AspeedSMCState *s, static void aspeed_smc_flash_set_segment_region(AspeedSMCState *s, int cs, uint64_t regval) { + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); AspeedSMCFlash *fl = &s->flashes[cs]; AspeedSegments seg; - s->ctrl->reg_to_segment(s, regval, &seg); + asc->reg_to_segment(s, regval, &seg); memory_region_transaction_begin(); memory_region_set_size(&fl->mmio, seg.size); - memory_region_set_address(&fl->mmio, seg.addr - s->ctrl->flash_window_base); + memory_region_set_address(&fl->mmio, seg.addr - asc->flash_window_base); memory_region_set_enabled(&fl->mmio, !!seg.size); memory_region_transaction_commit(); @@ -526,53 +265,52 @@ static void aspeed_smc_flash_set_segment_region(AspeedSMCState *s, int cs, static void aspeed_smc_flash_set_segment(AspeedSMCState *s, int cs, uint64_t new) { + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); AspeedSegments seg; - s->ctrl->reg_to_segment(s, new, &seg); + asc->reg_to_segment(s, new, &seg); trace_aspeed_smc_flash_set_segment(cs, new, seg.addr, seg.addr + seg.size); /* The start address of CS0 is read-only */ - if (cs == 0 && seg.addr != s->ctrl->flash_window_base) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Tried to change CS0 start address to 0x%" - HWADDR_PRIx "\n", s->ctrl->name, seg.addr); - seg.addr = s->ctrl->flash_window_base; - new = s->ctrl->segment_to_reg(s, &seg); + if (cs == 0 && seg.addr != asc->flash_window_base) { + aspeed_smc_error("Tried to change CS0 start address to 0x%" + HWADDR_PRIx, seg.addr); + seg.addr = asc->flash_window_base; + new = asc->segment_to_reg(s, &seg); } /* * The end address of the AST2500 spi controllers is also * read-only. */ - if ((s->ctrl->segments == aspeed_segments_ast2500_spi1 || - s->ctrl->segments == aspeed_segments_ast2500_spi2) && - cs == s->ctrl->max_peripherals && - seg.addr + seg.size != s->ctrl->segments[cs].addr + - s->ctrl->segments[cs].size) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Tried to change CS%d end address to 0x%" - HWADDR_PRIx "\n", s->ctrl->name, cs, seg.addr + seg.size); - seg.size = s->ctrl->segments[cs].addr + s->ctrl->segments[cs].size - + if ((asc->segments == aspeed_2500_spi1_segments || + asc->segments == aspeed_2500_spi2_segments) && + cs == asc->max_peripherals && + seg.addr + seg.size != asc->segments[cs].addr + + asc->segments[cs].size) { + aspeed_smc_error("Tried to change CS%d end address to 0x%" + HWADDR_PRIx, cs, seg.addr + seg.size); + seg.size = asc->segments[cs].addr + asc->segments[cs].size - seg.addr; - new = s->ctrl->segment_to_reg(s, &seg); + new = asc->segment_to_reg(s, &seg); } /* Keep the segment in the overall flash window */ if (seg.size && - (seg.addr + seg.size <= s->ctrl->flash_window_base || - seg.addr > s->ctrl->flash_window_base + s->ctrl->flash_window_size)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment for CS%d is invalid : " - "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n", - s->ctrl->name, cs, seg.addr, seg.addr + seg.size); + (seg.addr + seg.size <= asc->flash_window_base || + seg.addr > asc->flash_window_base + asc->flash_window_size)) { + aspeed_smc_error("new segment for CS%d is invalid : " + "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]", + cs, seg.addr, seg.addr + seg.size); return; } /* Check start address vs. alignment */ if (seg.size && !QEMU_IS_ALIGNED(seg.addr, seg.size)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment for CS%d is not " - "aligned : [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n", - s->ctrl->name, cs, seg.addr, seg.addr + seg.size); + aspeed_smc_error("new segment for CS%d is not " + "aligned : [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]", + cs, seg.addr, seg.addr + seg.size); } /* And segments should not overlap (in the specs) */ @@ -585,16 +323,15 @@ static void aspeed_smc_flash_set_segment(AspeedSMCState *s, int cs, static uint64_t aspeed_smc_flash_default_read(void *opaque, hwaddr addr, unsigned size) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: To 0x%" HWADDR_PRIx " of size %u" - PRIx64 "\n", __func__, addr, size); + aspeed_smc_error("To 0x%" HWADDR_PRIx " of size %u" PRIx64, addr, size); return 0; } static void aspeed_smc_flash_default_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: To 0x%" HWADDR_PRIx " of size %u: 0x%" - PRIx64 "\n", __func__, addr, size, data); + aspeed_smc_error("To 0x%" HWADDR_PRIx " of size %u: 0x%" PRIx64, + addr, size, data); } static const MemoryRegionOps aspeed_smc_flash_default_ops = { @@ -611,20 +348,20 @@ static inline int aspeed_smc_flash_mode(const AspeedSMCFlash *fl) { const AspeedSMCState *s = fl->controller; - return s->regs[s->r_ctrl0 + fl->id] & CTRL_CMD_MODE_MASK; + return s->regs[s->r_ctrl0 + fl->cs] & CTRL_CMD_MODE_MASK; } static inline bool aspeed_smc_is_writable(const AspeedSMCFlash *fl) { const AspeedSMCState *s = fl->controller; - return s->regs[s->r_conf] & (1 << (s->conf_enable_w0 + fl->id)); + return s->regs[s->r_conf] & (1 << (s->conf_enable_w0 + fl->cs)); } static inline int aspeed_smc_flash_cmd(const AspeedSMCFlash *fl) { const AspeedSMCState *s = fl->controller; - int cmd = (s->regs[s->r_ctrl0 + fl->id] >> CTRL_CMD_SHIFT) & CTRL_CMD_MASK; + int cmd = (s->regs[s->r_ctrl0 + fl->cs] >> CTRL_CMD_SHIFT) & CTRL_CMD_MASK; /* * In read mode, the default SPI command is READ (0x3). In other @@ -637,21 +374,22 @@ static inline int aspeed_smc_flash_cmd(const AspeedSMCFlash *fl) } if (!cmd) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: no command defined for mode %d\n", - __func__, aspeed_smc_flash_mode(fl)); + aspeed_smc_error("no command defined for mode %d", + aspeed_smc_flash_mode(fl)); } return cmd; } -static inline int aspeed_smc_flash_is_4byte(const AspeedSMCFlash *fl) +static inline int aspeed_smc_flash_addr_width(const AspeedSMCFlash *fl) { const AspeedSMCState *s = fl->controller; + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); - if (s->ctrl->segments == aspeed_segments_spi) { - return s->regs[s->r_ctrl0] & CTRL_AST2400_SPI_4BYTE; + if (asc->addr_width) { + return asc->addr_width(s); } else { - return s->regs[s->r_ce_ctrl] & (1 << (CTRL_EXTENDED0 + fl->id)); + return s->regs[s->r_ce_ctrl] & (1 << (CTRL_EXTENDED0 + fl->cs)) ? 4 : 3; } } @@ -659,9 +397,9 @@ static void aspeed_smc_flash_do_select(AspeedSMCFlash *fl, bool unselect) { AspeedSMCState *s = fl->controller; - trace_aspeed_smc_flash_select(fl->id, unselect ? "un" : ""); + trace_aspeed_smc_flash_select(fl->cs, unselect ? "un" : ""); - qemu_set_irq(s->cs_lines[fl->id], unselect); + qemu_set_irq(s->cs_lines[fl->cs], unselect); } static void aspeed_smc_flash_select(AspeedSMCFlash *fl) @@ -678,15 +416,14 @@ static uint32_t aspeed_smc_check_segment_addr(const AspeedSMCFlash *fl, uint32_t addr) { const AspeedSMCState *s = fl->controller; + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); AspeedSegments seg; - s->ctrl->reg_to_segment(s, s->regs[R_SEG_ADDR0 + fl->id], &seg); + asc->reg_to_segment(s, s->regs[R_SEG_ADDR0 + fl->cs], &seg); if ((addr % seg.size) != addr) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: invalid address 0x%08x for CS%d segment : " - "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n", - s->ctrl->name, addr, fl->id, seg.addr, - seg.addr + seg.size); + aspeed_smc_error("invalid address 0x%08x for CS%d segment : " + "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]", + addr, fl->cs, seg.addr, seg.addr + seg.size); addr %= seg.size; } @@ -696,7 +433,7 @@ static uint32_t aspeed_smc_check_segment_addr(const AspeedSMCFlash *fl, static int aspeed_smc_flash_dummies(const AspeedSMCFlash *fl) { const AspeedSMCState *s = fl->controller; - uint32_t r_ctrl0 = s->regs[s->r_ctrl0 + fl->id]; + uint32_t r_ctrl0 = s->regs[s->r_ctrl0 + fl->cs]; uint32_t dummy_high = (r_ctrl0 >> CTRL_DUMMY_HIGH_SHIFT) & 0x1; uint32_t dummy_low = (r_ctrl0 >> CTRL_DUMMY_LOW_SHIFT) & 0x3; uint32_t dummies = ((dummy_high << 2) | dummy_low) * 8; @@ -712,7 +449,7 @@ static void aspeed_smc_flash_setup(AspeedSMCFlash *fl, uint32_t addr) { const AspeedSMCState *s = fl->controller; uint8_t cmd = aspeed_smc_flash_cmd(fl); - int i = aspeed_smc_flash_is_4byte(fl) ? 4 : 3; + int i = aspeed_smc_flash_addr_width(fl); /* Flash access can not exceed CS segment */ addr = aspeed_smc_check_segment_addr(fl, addr); @@ -762,11 +499,10 @@ static uint64_t aspeed_smc_flash_read(void *opaque, hwaddr addr, unsigned size) aspeed_smc_flash_unselect(fl); break; default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid flash mode %d\n", - __func__, aspeed_smc_flash_mode(fl)); + aspeed_smc_error("invalid flash mode %d", aspeed_smc_flash_mode(fl)); } - trace_aspeed_smc_flash_read(fl->id, addr, size, ret, + trace_aspeed_smc_flash_read(fl->cs, addr, size, ret, aspeed_smc_flash_mode(fl)); return ret; } @@ -821,9 +557,9 @@ static bool aspeed_smc_do_snoop(AspeedSMCFlash *fl, uint64_t data, unsigned size) { AspeedSMCState *s = fl->controller; - uint8_t addr_width = aspeed_smc_flash_is_4byte(fl) ? 4 : 3; + uint8_t addr_width = aspeed_smc_flash_addr_width(fl); - trace_aspeed_smc_do_snoop(fl->id, s->snoop_index, s->snoop_dummies, + trace_aspeed_smc_do_snoop(fl->cs, s->snoop_index, s->snoop_dummies, (uint8_t) data & 0xff); if (s->snoop_index == SNOOP_OFF) { @@ -876,12 +612,11 @@ static void aspeed_smc_flash_write(void *opaque, hwaddr addr, uint64_t data, AspeedSMCState *s = fl->controller; int i; - trace_aspeed_smc_flash_write(fl->id, addr, size, data, + trace_aspeed_smc_flash_write(fl->cs, addr, size, data, aspeed_smc_flash_mode(fl)); if (!aspeed_smc_is_writable(fl)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: flash is not writable at 0x%" - HWADDR_PRIx "\n", __func__, addr); + aspeed_smc_error("flash is not writable at 0x%" HWADDR_PRIx, addr); return; } @@ -906,8 +641,7 @@ static void aspeed_smc_flash_write(void *opaque, hwaddr addr, uint64_t data, aspeed_smc_flash_unselect(fl); break; default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid flash mode %d\n", - __func__, aspeed_smc_flash_mode(fl)); + aspeed_smc_error("invalid flash mode %d", aspeed_smc_flash_mode(fl)); } } @@ -930,12 +664,12 @@ static void aspeed_smc_flash_update_ctrl(AspeedSMCFlash *fl, uint32_t value) unselect = (value & CTRL_CMD_MODE_MASK) != CTRL_USERMODE; /* A change of CTRL_CE_STOP_ACTIVE from 0 to 1, unselects the CS */ - if (!(s->regs[s->r_ctrl0 + fl->id] & CTRL_CE_STOP_ACTIVE) && + if (!(s->regs[s->r_ctrl0 + fl->cs] & CTRL_CE_STOP_ACTIVE) && value & CTRL_CE_STOP_ACTIVE) { unselect = true; } - s->regs[s->r_ctrl0 + fl->id] = value; + s->regs[s->r_ctrl0 + fl->cs] = value; s->snoop_index = unselect ? SNOOP_OFF : SNOOP_START; @@ -945,9 +679,14 @@ static void aspeed_smc_flash_update_ctrl(AspeedSMCFlash *fl, uint32_t value) static void aspeed_smc_reset(DeviceState *d) { AspeedSMCState *s = ASPEED_SMC(d); + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); int i; - memset(s->regs, 0, sizeof s->regs); + if (asc->resets) { + memcpy(s->regs, asc->resets, sizeof s->regs); + } else { + memset(s->regs, 0, sizeof s->regs); + } /* Unselect all peripherals */ for (i = 0; i < s->num_cs; ++i) { @@ -956,30 +695,9 @@ static void aspeed_smc_reset(DeviceState *d) } /* setup the default segment register values and regions for all */ - for (i = 0; i < s->ctrl->max_peripherals; ++i) { + for (i = 0; i < asc->max_peripherals; ++i) { aspeed_smc_flash_set_segment_region(s, i, - s->ctrl->segment_to_reg(s, &s->ctrl->segments[i])); - } - - /* HW strapping flash type for the AST2600 controllers */ - if (s->ctrl->segments == aspeed_segments_ast2600_fmc) { - /* flash type is fixed to SPI for all */ - s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0); - s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1); - s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE2); - } - - /* HW strapping flash type for FMC controllers */ - if (s->ctrl->segments == aspeed_segments_ast2500_fmc) { - /* flash type is fixed to SPI for CE0 and CE1 */ - s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0); - s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1); - } - - /* HW strapping for AST2400 FMC controllers (SCU70). Let's use the - * configuration of the palmetto-bmc machine */ - if (s->ctrl->segments == aspeed_segments_fmc) { - s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0); + asc->segment_to_reg(s, &asc->segments[i])); } s->snoop_index = SNOOP_OFF; @@ -989,26 +707,28 @@ static void aspeed_smc_reset(DeviceState *d) static uint64_t aspeed_smc_read(void *opaque, hwaddr addr, unsigned int size) { AspeedSMCState *s = ASPEED_SMC(opaque); + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(opaque); addr >>= 2; if (addr == s->r_conf || (addr >= s->r_timings && - addr < s->r_timings + s->ctrl->nregs_timings) || + addr < s->r_timings + asc->nregs_timings) || addr == s->r_ce_ctrl || addr == R_CE_CMD_CTRL || addr == R_INTR_CTRL || addr == R_DUMMY_DATA || - (s->ctrl->has_dma && addr == R_DMA_CTRL) || - (s->ctrl->has_dma && addr == R_DMA_FLASH_ADDR) || - (s->ctrl->has_dma && addr == R_DMA_DRAM_ADDR) || - (s->ctrl->has_dma && addr == R_DMA_LEN) || - (s->ctrl->has_dma && addr == R_DMA_CHECKSUM) || + (aspeed_smc_has_wdt_control(asc) && addr == R_FMC_WDT2_CTRL) || + (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) || + (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR) || + (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR) || + (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN) || + (aspeed_smc_has_dma(asc) && addr == R_DMA_CHECKSUM) || (addr >= R_SEG_ADDR0 && - addr < R_SEG_ADDR0 + s->ctrl->max_peripherals) || - (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + s->ctrl->max_peripherals)) { + addr < R_SEG_ADDR0 + asc->max_peripherals) || + (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + asc->max_peripherals)) { - trace_aspeed_smc_read(addr, size, s->regs[addr]); + trace_aspeed_smc_read(addr << 2, size, s->regs[addr]); return s->regs[addr]; } else { @@ -1032,7 +752,7 @@ static uint8_t aspeed_smc_hclk_divisor(uint8_t hclk_mask) } } - qemu_log_mask(LOG_GUEST_ERROR, "invalid HCLK mask %x", hclk_mask); + aspeed_smc_error("invalid HCLK mask %x", hclk_mask); return 0; } @@ -1112,8 +832,7 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s) uint32_t data; if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: invalid direction for DMA checksum\n", __func__); + aspeed_smc_error("invalid direction for DMA checksum"); return; } @@ -1125,8 +844,8 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s) data = address_space_ldl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR], MEMTXATTRS_UNSPECIFIED, &result); if (result != MEMTX_OK) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: Flash read failed @%08x\n", - __func__, s->regs[R_DMA_FLASH_ADDR]); + aspeed_smc_error("Flash read failed @%08x", + s->regs[R_DMA_FLASH_ADDR]); return; } trace_aspeed_smc_dma_checksum(s->regs[R_DMA_FLASH_ADDR], data); @@ -1161,32 +880,32 @@ static void aspeed_smc_dma_rw(AspeedSMCState *s) data = address_space_ldl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR], MEMTXATTRS_UNSPECIFIED, &result); if (result != MEMTX_OK) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM read failed @%08x\n", - __func__, s->regs[R_DMA_DRAM_ADDR]); + aspeed_smc_error("DRAM read failed @%08x", + s->regs[R_DMA_DRAM_ADDR]); return; } address_space_stl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR], data, MEMTXATTRS_UNSPECIFIED, &result); if (result != MEMTX_OK) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: Flash write failed @%08x\n", - __func__, s->regs[R_DMA_FLASH_ADDR]); + aspeed_smc_error("Flash write failed @%08x", + s->regs[R_DMA_FLASH_ADDR]); return; } } else { data = address_space_ldl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR], MEMTXATTRS_UNSPECIFIED, &result); if (result != MEMTX_OK) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: Flash read failed @%08x\n", - __func__, s->regs[R_DMA_FLASH_ADDR]); + aspeed_smc_error("Flash read failed @%08x", + s->regs[R_DMA_FLASH_ADDR]); return; } address_space_stl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR], data, MEMTXATTRS_UNSPECIFIED, &result); if (result != MEMTX_OK) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM write failed @%08x\n", - __func__, s->regs[R_DMA_DRAM_ADDR]); + aspeed_smc_error("DRAM write failed @%08x", + s->regs[R_DMA_DRAM_ADDR]); return; } } @@ -1236,7 +955,7 @@ static void aspeed_smc_dma_done(AspeedSMCState *s) } } -static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint64_t dma_ctrl) +static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint32_t dma_ctrl) { if (!(dma_ctrl & DMA_CTRL_ENABLE)) { s->regs[R_DMA_CTRL] = dma_ctrl; @@ -1246,7 +965,7 @@ static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint64_t dma_ctrl) } if (aspeed_smc_dma_in_progress(s)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: DMA in progress\n", __func__); + aspeed_smc_error("DMA in progress !"); return; } @@ -1261,26 +980,69 @@ static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint64_t dma_ctrl) aspeed_smc_dma_done(s); } +static inline bool aspeed_smc_dma_granted(AspeedSMCState *s) +{ + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); + + if (!(asc->features & ASPEED_SMC_FEATURE_DMA_GRANT)) { + return true; + } + + if (!(s->regs[R_DMA_CTRL] & DMA_CTRL_GRANT)) { + aspeed_smc_error("DMA not granted"); + return false; + } + + return true; +} + +static void aspeed_2600_smc_dma_ctrl(AspeedSMCState *s, uint32_t dma_ctrl) +{ + /* Preserve DMA bits */ + dma_ctrl |= s->regs[R_DMA_CTRL] & (DMA_CTRL_REQUEST | DMA_CTRL_GRANT); + + if (dma_ctrl == 0xAEED0000) { + /* automatically grant request */ + s->regs[R_DMA_CTRL] |= (DMA_CTRL_REQUEST | DMA_CTRL_GRANT); + return; + } + + /* clear request */ + if (dma_ctrl == 0xDEEA0000) { + s->regs[R_DMA_CTRL] &= ~(DMA_CTRL_REQUEST | DMA_CTRL_GRANT); + return; + } + + if (!aspeed_smc_dma_granted(s)) { + aspeed_smc_error("DMA not granted"); + return; + } + + aspeed_smc_dma_ctrl(s, dma_ctrl); + s->regs[R_DMA_CTRL] &= ~(DMA_CTRL_REQUEST | DMA_CTRL_GRANT); +} + static void aspeed_smc_write(void *opaque, hwaddr addr, uint64_t data, unsigned int size) { AspeedSMCState *s = ASPEED_SMC(opaque); + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); uint32_t value = data; - addr >>= 2; - trace_aspeed_smc_write(addr, size, data); + addr >>= 2; + if (addr == s->r_conf || (addr >= s->r_timings && - addr < s->r_timings + s->ctrl->nregs_timings) || + addr < s->r_timings + asc->nregs_timings) || addr == s->r_ce_ctrl) { s->regs[addr] = value; } else if (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + s->num_cs) { int cs = addr - s->r_ctrl0; aspeed_smc_flash_update_ctrl(&s->flashes[cs], value); } else if (addr >= R_SEG_ADDR0 && - addr < R_SEG_ADDR0 + s->ctrl->max_peripherals) { + addr < R_SEG_ADDR0 + asc->max_peripherals) { int cs = addr - R_SEG_ADDR0; if (value != s->regs[R_SEG_ADDR0 + cs]) { @@ -1290,15 +1052,20 @@ static void aspeed_smc_write(void *opaque, hwaddr addr, uint64_t data, s->regs[addr] = value & 0xff; } else if (addr == R_DUMMY_DATA) { s->regs[addr] = value & 0xff; + } else if (aspeed_smc_has_wdt_control(asc) && addr == R_FMC_WDT2_CTRL) { + s->regs[addr] = value & FMC_WDT2_CTRL_EN; } else if (addr == R_INTR_CTRL) { s->regs[addr] = value; - } else if (s->ctrl->has_dma && addr == R_DMA_CTRL) { - aspeed_smc_dma_ctrl(s, value); - } else if (s->ctrl->has_dma && addr == R_DMA_DRAM_ADDR) { - s->regs[addr] = DMA_DRAM_ADDR(s, value); - } else if (s->ctrl->has_dma && addr == R_DMA_FLASH_ADDR) { - s->regs[addr] = DMA_FLASH_ADDR(s, value); - } else if (s->ctrl->has_dma && addr == R_DMA_LEN) { + } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) { + asc->dma_ctrl(s, value); + } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR && + aspeed_smc_dma_granted(s)) { + s->regs[addr] = DMA_DRAM_ADDR(asc, value); + } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR && + aspeed_smc_dma_granted(s)) { + s->regs[addr] = DMA_FLASH_ADDR(asc, value); + } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN && + aspeed_smc_dma_granted(s)) { s->regs[addr] = DMA_LENGTH(value); } else { qemu_log_mask(LOG_UNIMP, "%s: not implemented: 0x%" HWADDR_PRIx "\n", @@ -1313,50 +1080,53 @@ static const MemoryRegionOps aspeed_smc_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; +static void aspeed_smc_instance_init(Object *obj) +{ + AspeedSMCState *s = ASPEED_SMC(obj); + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); + int i; + + for (i = 0; i < asc->max_peripherals; i++) { + object_initialize_child(obj, "flash[*]", &s->flashes[i], + TYPE_ASPEED_SMC_FLASH); + } +} + /* * Initialize the custom address spaces for DMAs */ static void aspeed_smc_dma_setup(AspeedSMCState *s, Error **errp) { - char *name; - if (!s->dram_mr) { error_setg(errp, TYPE_ASPEED_SMC ": 'dram' link not set"); return; } - name = g_strdup_printf("%s-dma-flash", s->ctrl->name); - address_space_init(&s->flash_as, &s->mmio_flash, name); - g_free(name); - - name = g_strdup_printf("%s-dma-dram", s->ctrl->name); - address_space_init(&s->dram_as, s->dram_mr, name); - g_free(name); + address_space_init(&s->flash_as, &s->mmio_flash, + TYPE_ASPEED_SMC ".dma-flash"); + address_space_init(&s->dram_as, s->dram_mr, + TYPE_ASPEED_SMC ".dma-dram"); } static void aspeed_smc_realize(DeviceState *dev, Error **errp) { SysBusDevice *sbd = SYS_BUS_DEVICE(dev); AspeedSMCState *s = ASPEED_SMC(dev); - AspeedSMCClass *mc = ASPEED_SMC_GET_CLASS(s); + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); int i; - char name[32]; hwaddr offset = 0; - s->ctrl = mc->ctrl; - /* keep a copy under AspeedSMCState to speed up accesses */ - s->r_conf = s->ctrl->r_conf; - s->r_ce_ctrl = s->ctrl->r_ce_ctrl; - s->r_ctrl0 = s->ctrl->r_ctrl0; - s->r_timings = s->ctrl->r_timings; - s->conf_enable_w0 = s->ctrl->conf_enable_w0; + s->r_conf = asc->r_conf; + s->r_ce_ctrl = asc->r_ce_ctrl; + s->r_ctrl0 = asc->r_ctrl0; + s->r_timings = asc->r_timings; + s->conf_enable_w0 = asc->conf_enable_w0; /* Enforce some real HW limits */ - if (s->num_cs > s->ctrl->max_peripherals) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: num_cs cannot exceed: %d\n", - __func__, s->ctrl->max_peripherals); - s->num_cs = s->ctrl->max_peripherals; + if (s->num_cs > asc->max_peripherals) { + aspeed_smc_error("num_cs cannot exceed: %d", asc->max_peripherals); + s->num_cs = asc->max_peripherals; } /* DMA irq. Keep it first for the initialization in the SoC */ @@ -1373,7 +1143,7 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp) /* The memory region for the controller registers */ memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_ops, s, - s->ctrl->name, s->ctrl->nregs * 4); + TYPE_ASPEED_SMC, asc->nregs * 4); sysbus_init_mmio(sbd, &s->mmio); /* @@ -1381,14 +1151,17 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp) * window in which the flash modules are mapped. The size and * address depends on the SoC model and controller type. */ - snprintf(name, sizeof(name), "%s.flash", s->ctrl->name); + memory_region_init(&s->mmio_flash_container, OBJECT(s), + TYPE_ASPEED_SMC ".container", + asc->flash_window_size); + sysbus_init_mmio(sbd, &s->mmio_flash_container); memory_region_init_io(&s->mmio_flash, OBJECT(s), - &aspeed_smc_flash_default_ops, s, name, - s->ctrl->flash_window_size); - sysbus_init_mmio(sbd, &s->mmio_flash); - - s->flashes = g_new0(AspeedSMCFlash, s->ctrl->max_peripherals); + &aspeed_smc_flash_default_ops, s, + TYPE_ASPEED_SMC ".flash", + asc->flash_window_size); + memory_region_add_subregion(&s->mmio_flash_container, 0x0, + &s->mmio_flash); /* * Let's create a sub memory region for each possible peripheral. All @@ -1397,22 +1170,26 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp) * module behind to handle the memory accesses. This depends on * the board configuration. */ - for (i = 0; i < s->ctrl->max_peripherals; ++i) { + for (i = 0; i < asc->max_peripherals; ++i) { AspeedSMCFlash *fl = &s->flashes[i]; - snprintf(name, sizeof(name), "%s.%d", s->ctrl->name, i); + if (!object_property_set_link(OBJECT(fl), "controller", OBJECT(s), + errp)) { + return; + } + if (!object_property_set_uint(OBJECT(fl), "cs", i, errp)) { + return; + } + if (!sysbus_realize(SYS_BUS_DEVICE(fl), errp)) { + return; + } - fl->id = i; - fl->controller = s; - fl->size = s->ctrl->segments[i].size; - memory_region_init_io(&fl->mmio, OBJECT(s), &aspeed_smc_flash_ops, - fl, name, fl->size); memory_region_add_subregion(&s->mmio_flash, offset, &fl->mmio); - offset += fl->size; + offset += asc->segments[i].size; } /* DMA support */ - if (s->ctrl->has_dma) { + if (aspeed_smc_has_dma(asc)) { aspeed_smc_dma_setup(s, errp); } } @@ -1432,7 +1209,6 @@ static const VMStateDescription vmstate_aspeed_smc = { static Property aspeed_smc_properties[] = { DEFINE_PROP_UINT32("num-cs", AspeedSMCState, num_cs, 1), DEFINE_PROP_BOOL("inject-failure", AspeedSMCState, inject_failure, false), - DEFINE_PROP_UINT64("sdram-base", AspeedSMCState, sdram_base, 0), DEFINE_PROP_LINK("dram", AspeedSMCState, dram_mr, TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), @@ -1441,37 +1217,494 @@ static Property aspeed_smc_properties[] = { static void aspeed_smc_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - AspeedSMCClass *mc = ASPEED_SMC_CLASS(klass); dc->realize = aspeed_smc_realize; dc->reset = aspeed_smc_reset; device_class_set_props(dc, aspeed_smc_properties); dc->vmsd = &vmstate_aspeed_smc; - mc->ctrl = data; } static const TypeInfo aspeed_smc_info = { .name = TYPE_ASPEED_SMC, .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = aspeed_smc_instance_init, .instance_size = sizeof(AspeedSMCState), .class_size = sizeof(AspeedSMCClass), + .class_init = aspeed_smc_class_init, .abstract = true, }; -static void aspeed_smc_register_types(void) +static void aspeed_smc_flash_realize(DeviceState *dev, Error **errp) { - int i; + AspeedSMCFlash *s = ASPEED_SMC_FLASH(dev); + AspeedSMCClass *asc; + g_autofree char *name = g_strdup_printf(TYPE_ASPEED_SMC_FLASH ".%d", s->cs); - type_register_static(&aspeed_smc_info); - for (i = 0; i < ARRAY_SIZE(controllers); ++i) { - TypeInfo ti = { - .name = controllers[i].name, - .parent = TYPE_ASPEED_SMC, - .class_init = aspeed_smc_class_init, - .class_data = (void *)&controllers[i], - }; - type_register(&ti); + if (!s->controller) { + error_setg(errp, TYPE_ASPEED_SMC_FLASH ": 'controller' link not set"); + return; + } + + asc = ASPEED_SMC_GET_CLASS(s->controller); + + /* + * Use the default segment value to size the memory region. This + * can be changed by FW at runtime. + */ + memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_flash_ops, + s, name, asc->segments[s->cs].size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio); +} + +static Property aspeed_smc_flash_properties[] = { + DEFINE_PROP_UINT8("cs", AspeedSMCFlash, cs, 0), + DEFINE_PROP_LINK("controller", AspeedSMCFlash, controller, TYPE_ASPEED_SMC, + AspeedSMCState *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void aspeed_smc_flash_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "Aspeed SMC Flash device region"; + dc->realize = aspeed_smc_flash_realize; + device_class_set_props(dc, aspeed_smc_flash_properties); +} + +static const TypeInfo aspeed_smc_flash_info = { + .name = TYPE_ASPEED_SMC_FLASH, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AspeedSMCFlash), + .class_init = aspeed_smc_flash_class_init, +}; + +/* + * The Segment Registers of the AST2400 and AST2500 have a 8MB + * unit. The address range of a flash SPI peripheral is encoded with + * absolute addresses which should be part of the overall controller + * window. + */ +static uint32_t aspeed_smc_segment_to_reg(const AspeedSMCState *s, + const AspeedSegments *seg) +{ + uint32_t reg = 0; + reg |= ((seg->addr >> 23) & SEG_START_MASK) << SEG_START_SHIFT; + reg |= (((seg->addr + seg->size) >> 23) & SEG_END_MASK) << SEG_END_SHIFT; + return reg; +} + +static void aspeed_smc_reg_to_segment(const AspeedSMCState *s, + uint32_t reg, AspeedSegments *seg) +{ + seg->addr = ((reg >> SEG_START_SHIFT) & SEG_START_MASK) << 23; + seg->size = (((reg >> SEG_END_SHIFT) & SEG_END_MASK) << 23) - seg->addr; +} + +static const AspeedSegments aspeed_2400_smc_segments[] = { + { 0x10000000, 32 * MiB }, +}; + +static void aspeed_2400_smc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2400 SMC Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 1; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 1; + asc->segments = aspeed_2400_smc_segments; + asc->flash_window_base = 0x10000000; + asc->flash_window_size = 0x6000000; + asc->features = 0x0; + asc->nregs = ASPEED_SMC_R_SMC_MAX; + asc->segment_to_reg = aspeed_smc_segment_to_reg; + asc->reg_to_segment = aspeed_smc_reg_to_segment; + asc->dma_ctrl = aspeed_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2400_smc_info = { + .name = "aspeed.smc-ast2400", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2400_smc_class_init, +}; + +static const uint32_t aspeed_2400_fmc_resets[ASPEED_SMC_R_MAX] = { + /* + * CE0 and CE1 types are HW strapped in SCU70. Do it here to + * simplify the model. + */ + [R_CONF] = CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0, +}; + +static const AspeedSegments aspeed_2400_fmc_segments[] = { + { 0x20000000, 64 * MiB }, /* start address is readonly */ + { 0x24000000, 32 * MiB }, + { 0x26000000, 32 * MiB }, + { 0x28000000, 32 * MiB }, + { 0x2A000000, 32 * MiB } +}; + +static void aspeed_2400_fmc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2400 FMC Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 1; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 5; + asc->segments = aspeed_2400_fmc_segments; + asc->resets = aspeed_2400_fmc_resets; + asc->flash_window_base = 0x20000000; + asc->flash_window_size = 0x10000000; + asc->features = ASPEED_SMC_FEATURE_DMA; + asc->dma_flash_mask = 0x0FFFFFFC; + asc->dma_dram_mask = 0x1FFFFFFC; + asc->nregs = ASPEED_SMC_R_MAX; + asc->segment_to_reg = aspeed_smc_segment_to_reg; + asc->reg_to_segment = aspeed_smc_reg_to_segment; + asc->dma_ctrl = aspeed_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2400_fmc_info = { + .name = "aspeed.fmc-ast2400", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2400_fmc_class_init, +}; + +static const AspeedSegments aspeed_2400_spi1_segments[] = { + { 0x30000000, 64 * MiB }, +}; + +static int aspeed_2400_spi1_addr_width(const AspeedSMCState *s) +{ + return s->regs[R_SPI_CTRL0] & CTRL_AST2400_SPI_4BYTE ? 4 : 3; +} + +static void aspeed_2400_spi1_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2400 SPI1 Controller"; + asc->r_conf = R_SPI_CONF; + asc->r_ce_ctrl = 0xff; + asc->r_ctrl0 = R_SPI_CTRL0; + asc->r_timings = R_SPI_TIMINGS; + asc->nregs_timings = 1; + asc->conf_enable_w0 = SPI_CONF_ENABLE_W0; + asc->max_peripherals = 1; + asc->segments = aspeed_2400_spi1_segments; + asc->flash_window_base = 0x30000000; + asc->flash_window_size = 0x10000000; + asc->features = 0x0; + asc->nregs = ASPEED_SMC_R_SPI_MAX; + asc->segment_to_reg = aspeed_smc_segment_to_reg; + asc->reg_to_segment = aspeed_smc_reg_to_segment; + asc->dma_ctrl = aspeed_smc_dma_ctrl; + asc->addr_width = aspeed_2400_spi1_addr_width; +} + +static const TypeInfo aspeed_2400_spi1_info = { + .name = "aspeed.spi1-ast2400", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2400_spi1_class_init, +}; + +static const uint32_t aspeed_2500_fmc_resets[ASPEED_SMC_R_MAX] = { + [R_CONF] = (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0 | + CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1), +}; + +static const AspeedSegments aspeed_2500_fmc_segments[] = { + { 0x20000000, 128 * MiB }, /* start address is readonly */ + { 0x28000000, 32 * MiB }, + { 0x2A000000, 32 * MiB }, +}; + +static void aspeed_2500_fmc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2600 FMC Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 1; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 3; + asc->segments = aspeed_2500_fmc_segments; + asc->resets = aspeed_2500_fmc_resets; + asc->flash_window_base = 0x20000000; + asc->flash_window_size = 0x10000000; + asc->features = ASPEED_SMC_FEATURE_DMA; + asc->dma_flash_mask = 0x0FFFFFFC; + asc->dma_dram_mask = 0x3FFFFFFC; + asc->nregs = ASPEED_SMC_R_MAX; + asc->segment_to_reg = aspeed_smc_segment_to_reg; + asc->reg_to_segment = aspeed_smc_reg_to_segment; + asc->dma_ctrl = aspeed_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2500_fmc_info = { + .name = "aspeed.fmc-ast2500", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2500_fmc_class_init, +}; + +static const AspeedSegments aspeed_2500_spi1_segments[] = { + { 0x30000000, 32 * MiB }, /* start address is readonly */ + { 0x32000000, 96 * MiB }, /* end address is readonly */ +}; + +static void aspeed_2500_spi1_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2600 SPI1 Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 1; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 2; + asc->segments = aspeed_2500_spi1_segments; + asc->flash_window_base = 0x30000000; + asc->flash_window_size = 0x8000000; + asc->features = 0x0; + asc->nregs = ASPEED_SMC_R_MAX; + asc->segment_to_reg = aspeed_smc_segment_to_reg; + asc->reg_to_segment = aspeed_smc_reg_to_segment; + asc->dma_ctrl = aspeed_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2500_spi1_info = { + .name = "aspeed.spi1-ast2500", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2500_spi1_class_init, +}; + +static const AspeedSegments aspeed_2500_spi2_segments[] = { + { 0x38000000, 32 * MiB }, /* start address is readonly */ + { 0x3A000000, 96 * MiB }, /* end address is readonly */ +}; + +static void aspeed_2500_spi2_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2600 SPI2 Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 1; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 2; + asc->segments = aspeed_2500_spi2_segments; + asc->flash_window_base = 0x38000000; + asc->flash_window_size = 0x8000000; + asc->features = 0x0; + asc->nregs = ASPEED_SMC_R_MAX; + asc->segment_to_reg = aspeed_smc_segment_to_reg; + asc->reg_to_segment = aspeed_smc_reg_to_segment; + asc->dma_ctrl = aspeed_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2500_spi2_info = { + .name = "aspeed.spi2-ast2500", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2500_spi2_class_init, +}; + +/* + * The Segment Registers of the AST2600 have a 1MB unit. The address + * range of a flash SPI peripheral is encoded with offsets in the overall + * controller window. The previous SoC AST2400 and AST2500 used + * absolute addresses. Only bits [27:20] are relevant and the end + * address is an upper bound limit. + */ +#define AST2600_SEG_ADDR_MASK 0x0ff00000 + +static uint32_t aspeed_2600_smc_segment_to_reg(const AspeedSMCState *s, + const AspeedSegments *seg) +{ + uint32_t reg = 0; + + /* Disabled segments have a nil register */ + if (!seg->size) { + return 0; + } + + reg |= (seg->addr & AST2600_SEG_ADDR_MASK) >> 16; /* start offset */ + reg |= (seg->addr + seg->size - 1) & AST2600_SEG_ADDR_MASK; /* end offset */ + return reg; +} + +static void aspeed_2600_smc_reg_to_segment(const AspeedSMCState *s, + uint32_t reg, AspeedSegments *seg) +{ + uint32_t start_offset = (reg << 16) & AST2600_SEG_ADDR_MASK; + uint32_t end_offset = reg & AST2600_SEG_ADDR_MASK; + AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); + + if (reg) { + seg->addr = asc->flash_window_base + start_offset; + seg->size = end_offset + MiB - start_offset; + } else { + seg->addr = asc->flash_window_base; + seg->size = 0; } } +static const uint32_t aspeed_2600_fmc_resets[ASPEED_SMC_R_MAX] = { + [R_CONF] = (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0 | + CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1 | + CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE2), +}; + +static const AspeedSegments aspeed_2600_fmc_segments[] = { + { 0x0, 128 * MiB }, /* start address is readonly */ + { 128 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */ + { 0x0, 0 }, /* disabled */ +}; + +static void aspeed_2600_fmc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2600 FMC Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 1; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 3; + asc->segments = aspeed_2600_fmc_segments; + asc->resets = aspeed_2600_fmc_resets; + asc->flash_window_base = 0x20000000; + asc->flash_window_size = 0x10000000; + asc->features = ASPEED_SMC_FEATURE_DMA | + ASPEED_SMC_FEATURE_WDT_CONTROL; + asc->dma_flash_mask = 0x0FFFFFFC; + asc->dma_dram_mask = 0x3FFFFFFC; + asc->nregs = ASPEED_SMC_R_MAX; + asc->segment_to_reg = aspeed_2600_smc_segment_to_reg; + asc->reg_to_segment = aspeed_2600_smc_reg_to_segment; + asc->dma_ctrl = aspeed_2600_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2600_fmc_info = { + .name = "aspeed.fmc-ast2600", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2600_fmc_class_init, +}; + +static const AspeedSegments aspeed_2600_spi1_segments[] = { + { 0x0, 128 * MiB }, /* start address is readonly */ + { 0x0, 0 }, /* disabled */ +}; + +static void aspeed_2600_spi1_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2600 SPI1 Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 2; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 2; + asc->segments = aspeed_2600_spi1_segments; + asc->flash_window_base = 0x30000000; + asc->flash_window_size = 0x10000000; + asc->features = ASPEED_SMC_FEATURE_DMA | + ASPEED_SMC_FEATURE_DMA_GRANT; + asc->dma_flash_mask = 0x0FFFFFFC; + asc->dma_dram_mask = 0x3FFFFFFC; + asc->nregs = ASPEED_SMC_R_MAX; + asc->segment_to_reg = aspeed_2600_smc_segment_to_reg; + asc->reg_to_segment = aspeed_2600_smc_reg_to_segment; + asc->dma_ctrl = aspeed_2600_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2600_spi1_info = { + .name = "aspeed.spi1-ast2600", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2600_spi1_class_init, +}; + +static const AspeedSegments aspeed_2600_spi2_segments[] = { + { 0x0, 128 * MiB }, /* start address is readonly */ + { 0x0, 0 }, /* disabled */ + { 0x0, 0 }, /* disabled */ +}; + +static void aspeed_2600_spi2_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass); + + dc->desc = "Aspeed 2600 SPI2 Controller"; + asc->r_conf = R_CONF; + asc->r_ce_ctrl = R_CE_CTRL; + asc->r_ctrl0 = R_CTRL0; + asc->r_timings = R_TIMINGS; + asc->nregs_timings = 3; + asc->conf_enable_w0 = CONF_ENABLE_W0; + asc->max_peripherals = 3; + asc->segments = aspeed_2600_spi2_segments; + asc->flash_window_base = 0x50000000; + asc->flash_window_size = 0x10000000; + asc->features = ASPEED_SMC_FEATURE_DMA | + ASPEED_SMC_FEATURE_DMA_GRANT; + asc->dma_flash_mask = 0x0FFFFFFC; + asc->dma_dram_mask = 0x3FFFFFFC; + asc->nregs = ASPEED_SMC_R_MAX; + asc->segment_to_reg = aspeed_2600_smc_segment_to_reg; + asc->reg_to_segment = aspeed_2600_smc_reg_to_segment; + asc->dma_ctrl = aspeed_2600_smc_dma_ctrl; +} + +static const TypeInfo aspeed_2600_spi2_info = { + .name = "aspeed.spi2-ast2600", + .parent = TYPE_ASPEED_SMC, + .class_init = aspeed_2600_spi2_class_init, +}; + +static void aspeed_smc_register_types(void) +{ + type_register_static(&aspeed_smc_flash_info); + type_register_static(&aspeed_smc_info); + type_register_static(&aspeed_2400_smc_info); + type_register_static(&aspeed_2400_fmc_info); + type_register_static(&aspeed_2400_spi1_info); + type_register_static(&aspeed_2500_fmc_info); + type_register_static(&aspeed_2500_spi1_info); + type_register_static(&aspeed_2500_spi2_info); + type_register_static(&aspeed_2600_fmc_info); + type_register_static(&aspeed_2600_spi1_info); + type_register_static(&aspeed_2600_spi2_info); +} + type_init(aspeed_smc_register_types) diff --git a/hw/ssi/sifive_spi.c b/hw/ssi/sifive_spi.c index 0c9ebca3c86..03540cf5ca6 100644 --- a/hw/ssi/sifive_spi.c +++ b/hw/ssi/sifive_spi.c @@ -24,7 +24,6 @@ #include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "hw/ssi/ssi.h" -#include "sysemu/sysemu.h" #include "qemu/fifo8.h" #include "qemu/log.h" #include "qemu/module.h" diff --git a/hw/ssi/ssi.c b/hw/ssi/ssi.c index e5d7ce95237..003931fb509 100644 --- a/hw/ssi/ssi.c +++ b/hw/ssi/ssi.c @@ -107,7 +107,7 @@ DeviceState *ssi_create_peripheral(SSIBus *bus, const char *name) SSIBus *ssi_create_bus(DeviceState *parent, const char *name) { BusState *bus; - bus = qbus_create(TYPE_SSI_BUS, parent, name); + bus = qbus_new(TYPE_SSI_BUS, parent, name); return SSI_BUS(bus); } diff --git a/hw/ssi/xilinx_spi.c b/hw/ssi/xilinx_spi.c index 49ff2755935..b2819a7ff09 100644 --- a/hw/ssi/xilinx_spi.c +++ b/hw/ssi/xilinx_spi.c @@ -27,7 +27,6 @@ #include "qemu/osdep.h" #include "hw/sysbus.h" #include "migration/vmstate.h" -#include "qemu/log.h" #include "qemu/module.h" #include "qemu/fifo8.h" diff --git a/hw/timer/Kconfig b/hw/timer/Kconfig index bac25117155..010be7ed1f5 100644 --- a/hw/timer/Kconfig +++ b/hw/timer/Kconfig @@ -25,6 +25,9 @@ config ALLWINNER_A10_PIT bool select PTIMER +config SIFIVE_PWM + bool + config STM32F2XX_TIMER bool @@ -52,5 +55,8 @@ config SSE_COUNTER config SSE_TIMER bool +config STELLARIS_GPTM + bool + config AVR_TIMER16 bool diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c index 2f192011eb0..3bd951dd044 100644 --- a/hw/timer/armv7m_systick.c +++ b/hw/timer/armv7m_systick.c @@ -14,28 +14,32 @@ #include "migration/vmstate.h" #include "hw/irq.h" #include "hw/sysbus.h" +#include "hw/qdev-clock.h" #include "qemu/timer.h" #include "qemu/log.h" #include "qemu/module.h" +#include "qapi/error.h" #include "trace.h" -/* qemu timers run at 1GHz. We want something closer to 1MHz. */ -#define SYSTICK_SCALE 1000ULL - #define SYSTICK_ENABLE (1 << 0) #define SYSTICK_TICKINT (1 << 1) #define SYSTICK_CLKSOURCE (1 << 2) #define SYSTICK_COUNTFLAG (1 << 16) -int system_clock_scale; +#define SYSCALIB_NOREF (1U << 31) +#define SYSCALIB_SKEW (1U << 30) +#define SYSCALIB_TENMS ((1U << 24) - 1) -/* Conversion factor from qemu timer to SysTick frequencies. */ -static inline int64_t systick_scale(SysTickState *s) +static void systick_set_period_from_clock(SysTickState *s) { + /* + * Set the ptimer period from whichever clock is selected. + * Must be called from within a ptimer transaction block. + */ if (s->control & SYSTICK_CLKSOURCE) { - return system_clock_scale; + ptimer_set_period_from_clock(s->ptimer, s->cpuclk, 1); } else { - return 1000; + ptimer_set_period_from_clock(s->ptimer, s->refclk, 1); } } @@ -82,7 +86,28 @@ static MemTxResult systick_read(void *opaque, hwaddr addr, uint64_t *data, val = ptimer_get_count(s->ptimer); break; case 0xc: /* SysTick Calibration Value. */ - val = 10000; + /* + * In real hardware it is possible to make this register report + * a different value from what the reference clock is actually + * running at. We don't model that (which usually happens due + * to integration errors in the real hardware) and instead always + * report the theoretical correct value as described in the + * knowledgebase article at + * https://developer.arm.com/documentation/ka001325/latest + * If necessary, we could implement an extra QOM property on this + * device to force the STCALIB value to something different from + * the "correct" value. + */ + if (!clock_has_source(s->refclk)) { + val = SYSCALIB_NOREF; + break; + } + val = clock_ns_to_ticks(s->refclk, 10 * SCALE_MS) - 1; + val &= SYSCALIB_TENMS; + if (clock_ticks_to_ns(s->refclk, val + 1) != 10 * SCALE_MS) { + /* report that tick count does not yield exactly 10ms */ + val |= SYSCALIB_SKEW; + } break; default: val = 0; @@ -114,6 +139,11 @@ static MemTxResult systick_write(void *opaque, hwaddr addr, { uint32_t oldval; + if (!clock_has_source(s->refclk)) { + /* This bit is always 1 if there is no external refclk */ + value |= SYSTICK_CLKSOURCE; + } + ptimer_transaction_begin(s->ptimer); oldval = s->control; s->control &= 0xfffffff8; @@ -121,19 +151,14 @@ static MemTxResult systick_write(void *opaque, hwaddr addr, if ((oldval ^ value) & SYSTICK_ENABLE) { if (value & SYSTICK_ENABLE) { - /* - * Always reload the period in case board code has - * changed system_clock_scale. If we ever replace that - * global with a more sensible API then we might be able - * to set the period only when it actually changes. - */ - ptimer_set_period(s->ptimer, systick_scale(s)); ptimer_run(s->ptimer, 0); } else { ptimer_stop(s->ptimer); } - } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) { - ptimer_set_period(s->ptimer, systick_scale(s)); + } + + if ((oldval ^ value) & SYSTICK_CLKSOURCE) { + systick_set_period_from_clock(s); } ptimer_transaction_commit(s->ptimer); break; @@ -176,20 +201,42 @@ static void systick_reset(DeviceState *dev) { SysTickState *s = SYSTICK(dev); - /* - * Forgetting to set system_clock_scale is always a board code - * bug. We can't check this earlier because for some boards - * (like stellaris) it is not yet configured at the point where - * the systick device is realized. - */ - assert(system_clock_scale != 0); - ptimer_transaction_begin(s->ptimer); s->control = 0; + if (!clock_has_source(s->refclk)) { + /* This bit is always 1 if there is no external refclk */ + s->control |= SYSTICK_CLKSOURCE; + } ptimer_stop(s->ptimer); ptimer_set_count(s->ptimer, 0); ptimer_set_limit(s->ptimer, 0, 0); - ptimer_set_period(s->ptimer, systick_scale(s)); + systick_set_period_from_clock(s); + ptimer_transaction_commit(s->ptimer); +} + +static void systick_cpuclk_update(void *opaque, ClockEvent event) +{ + SysTickState *s = SYSTICK(opaque); + + if (!(s->control & SYSTICK_CLKSOURCE)) { + /* currently using refclk, we can ignore cpuclk changes */ + } + + ptimer_transaction_begin(s->ptimer); + ptimer_set_period_from_clock(s->ptimer, s->cpuclk, 1); + ptimer_transaction_commit(s->ptimer); +} + +static void systick_refclk_update(void *opaque, ClockEvent event) +{ + SysTickState *s = SYSTICK(opaque); + + if (s->control & SYSTICK_CLKSOURCE) { + /* currently using cpuclk, we can ignore refclk changes */ + } + + ptimer_transaction_begin(s->ptimer); + ptimer_set_period_from_clock(s->ptimer, s->refclk, 1); ptimer_transaction_commit(s->ptimer); } @@ -201,6 +248,11 @@ static void systick_instance_init(Object *obj) memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0); sysbus_init_mmio(sbd, &s->iomem); sysbus_init_irq(sbd, &s->irq); + + s->refclk = qdev_init_clock_in(DEVICE(obj), "refclk", + systick_refclk_update, s, ClockUpdate); + s->cpuclk = qdev_init_clock_in(DEVICE(obj), "cpuclk", + systick_cpuclk_update, s, ClockUpdate); } static void systick_realize(DeviceState *dev, Error **errp) @@ -211,13 +263,21 @@ static void systick_realize(DeviceState *dev, Error **errp) PTIMER_POLICY_NO_COUNTER_ROUND_DOWN | PTIMER_POLICY_NO_IMMEDIATE_RELOAD | PTIMER_POLICY_TRIGGER_ONLY_ON_DECREMENT); + + if (!clock_has_source(s->cpuclk)) { + error_setg(errp, "systick: cpuclk must be connected"); + return; + } + /* It's OK not to connect the refclk */ } static const VMStateDescription vmstate_systick = { .name = "armv7m_systick", - .version_id = 2, - .minimum_version_id = 2, + .version_id = 3, + .minimum_version_id = 3, .fields = (VMStateField[]) { + VMSTATE_CLOCK(refclk, SysTickState), + VMSTATE_CLOCK(cpuclk, SysTickState), VMSTATE_UINT32(control, SysTickState), VMSTATE_INT64(tick, SysTickState), VMSTATE_PTIMER(ptimer, SysTickState), diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c index 5379006086f..4ba662190de 100644 --- a/hw/timer/etraxfs_timer.c +++ b/hw/timer/etraxfs_timer.c @@ -309,9 +309,9 @@ static const MemoryRegionOps timer_ops = { } }; -static void etraxfs_timer_reset(void *opaque) +static void etraxfs_timer_reset_enter(Object *obj, ResetType type) { - ETRAXTimerState *t = opaque; + ETRAXTimerState *t = ETRAX_TIMER(obj); ptimer_transaction_begin(t->ptimer_t0); ptimer_stop(t->ptimer_t0); @@ -325,6 +325,12 @@ static void etraxfs_timer_reset(void *opaque) t->rw_wd_ctrl = 0; t->r_intr = 0; t->rw_intr_mask = 0; +} + +static void etraxfs_timer_reset_hold(Object *obj) +{ + ETRAXTimerState *t = ETRAX_TIMER(obj); + qemu_irq_lower(t->irq); } @@ -343,14 +349,16 @@ static void etraxfs_timer_realize(DeviceState *dev, Error **errp) memory_region_init_io(&t->mmio, OBJECT(t), &timer_ops, t, "etraxfs-timer", 0x5c); sysbus_init_mmio(sbd, &t->mmio); - qemu_register_reset(etraxfs_timer_reset, t); } static void etraxfs_timer_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); dc->realize = etraxfs_timer_realize; + rc->phases.enter = etraxfs_timer_reset_enter; + rc->phases.hold = etraxfs_timer_reset_hold; } static const TypeInfo etraxfs_timer_info = { diff --git a/hw/timer/ibex_timer.c b/hw/timer/ibex_timer.c new file mode 100644 index 00000000000..66e1f8e48cb --- /dev/null +++ b/hw/timer/ibex_timer.c @@ -0,0 +1,312 @@ +/* + * QEMU lowRISC Ibex Timer device + * + * Copyright (c) 2021 Western Digital + * + * For details check the documentation here: + * https://docs.opentitan.org/hw/ip/rv_timer/doc/ + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/timer.h" +#include "hw/timer/ibex_timer.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "target/riscv/cpu.h" +#include "migration/vmstate.h" + +REG32(CTRL, 0x00) + FIELD(CTRL, ACTIVE, 0, 1) +REG32(CFG0, 0x100) + FIELD(CFG0, PRESCALE, 0, 12) + FIELD(CFG0, STEP, 16, 8) +REG32(LOWER0, 0x104) +REG32(UPPER0, 0x108) +REG32(COMPARE_LOWER0, 0x10C) +REG32(COMPARE_UPPER0, 0x110) +REG32(INTR_ENABLE, 0x114) + FIELD(INTR_ENABLE, IE_0, 0, 1) +REG32(INTR_STATE, 0x118) + FIELD(INTR_STATE, IS_0, 0, 1) +REG32(INTR_TEST, 0x11C) + FIELD(INTR_TEST, T_0, 0, 1) + +static uint64_t cpu_riscv_read_rtc(uint32_t timebase_freq) +{ + return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + timebase_freq, NANOSECONDS_PER_SECOND); +} + +static void ibex_timer_update_irqs(IbexTimerState *s) +{ + CPUState *cs = qemu_get_cpu(0); + RISCVCPU *cpu = RISCV_CPU(cs); + uint64_t value = s->timer_compare_lower0 | + ((uint64_t)s->timer_compare_upper0 << 32); + uint64_t next, diff; + uint64_t now = cpu_riscv_read_rtc(s->timebase_freq); + + if (!(s->timer_ctrl & R_CTRL_ACTIVE_MASK)) { + /* Timer isn't active */ + return; + } + + /* Update the CPUs mtimecmp */ + cpu->env.timecmp = value; + + if (cpu->env.timecmp <= now) { + /* + * If the mtimecmp was in the past raise the interrupt now. + */ + qemu_irq_raise(s->m_timer_irq); + if (s->timer_intr_enable & R_INTR_ENABLE_IE_0_MASK) { + s->timer_intr_state |= R_INTR_STATE_IS_0_MASK; + qemu_set_irq(s->irq, true); + } + return; + } + + /* Setup a timer to trigger the interrupt in the future */ + qemu_irq_lower(s->m_timer_irq); + qemu_set_irq(s->irq, false); + + diff = cpu->env.timecmp - now; + next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + muldiv64(diff, + NANOSECONDS_PER_SECOND, + s->timebase_freq); + + if (next < qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)) { + /* We overflowed the timer, just set it as large as we can */ + timer_mod(cpu->env.timer, 0x7FFFFFFFFFFFFFFF); + } else { + timer_mod(cpu->env.timer, next); + } +} + +static void ibex_timer_cb(void *opaque) +{ + IbexTimerState *s = opaque; + + qemu_irq_raise(s->m_timer_irq); + if (s->timer_intr_enable & R_INTR_ENABLE_IE_0_MASK) { + s->timer_intr_state |= R_INTR_STATE_IS_0_MASK; + qemu_set_irq(s->irq, true); + } +} + +static void ibex_timer_reset(DeviceState *dev) +{ + IbexTimerState *s = IBEX_TIMER(dev); + + CPUState *cpu = qemu_get_cpu(0); + CPURISCVState *env = cpu->env_ptr; + env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + &ibex_timer_cb, s); + env->timecmp = 0; + + s->timer_ctrl = 0x00000000; + s->timer_cfg0 = 0x00010000; + s->timer_compare_lower0 = 0xFFFFFFFF; + s->timer_compare_upper0 = 0xFFFFFFFF; + s->timer_intr_enable = 0x00000000; + s->timer_intr_state = 0x00000000; + s->timer_intr_test = 0x00000000; + + ibex_timer_update_irqs(s); +} + +static uint64_t ibex_timer_read(void *opaque, hwaddr addr, + unsigned int size) +{ + IbexTimerState *s = opaque; + uint64_t now = cpu_riscv_read_rtc(s->timebase_freq); + uint64_t retvalue = 0; + + switch (addr >> 2) { + case R_CTRL: + retvalue = s->timer_ctrl; + break; + case R_CFG0: + retvalue = s->timer_cfg0; + break; + case R_LOWER0: + retvalue = now; + break; + case R_UPPER0: + retvalue = now >> 32; + break; + case R_COMPARE_LOWER0: + retvalue = s->timer_compare_lower0; + break; + case R_COMPARE_UPPER0: + retvalue = s->timer_compare_upper0; + break; + case R_INTR_ENABLE: + retvalue = s->timer_intr_enable; + break; + case R_INTR_STATE: + retvalue = s->timer_intr_state; + break; + case R_INTR_TEST: + retvalue = s->timer_intr_test; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + return 0; + } + + return retvalue; +} + +static void ibex_timer_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + IbexTimerState *s = opaque; + uint32_t val = val64; + + switch (addr >> 2) { + case R_CTRL: + s->timer_ctrl = val; + break; + case R_CFG0: + qemu_log_mask(LOG_UNIMP, "Changing prescale or step not supported"); + s->timer_cfg0 = val; + break; + case R_LOWER0: + qemu_log_mask(LOG_UNIMP, "Changing timer value is not supported"); + break; + case R_UPPER0: + qemu_log_mask(LOG_UNIMP, "Changing timer value is not supported"); + break; + case R_COMPARE_LOWER0: + s->timer_compare_lower0 = val; + ibex_timer_update_irqs(s); + break; + case R_COMPARE_UPPER0: + s->timer_compare_upper0 = val; + ibex_timer_update_irqs(s); + break; + case R_INTR_ENABLE: + s->timer_intr_enable = val; + break; + case R_INTR_STATE: + /* Write 1 to clear */ + s->timer_intr_state &= ~val; + break; + case R_INTR_TEST: + s->timer_intr_test = val; + if (s->timer_intr_enable & + s->timer_intr_test & + R_INTR_ENABLE_IE_0_MASK) { + s->timer_intr_state |= R_INTR_STATE_IS_0_MASK; + qemu_set_irq(s->irq, true); + } + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + } +} + +static const MemoryRegionOps ibex_timer_ops = { + .read = ibex_timer_read, + .write = ibex_timer_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 4, + .impl.max_access_size = 4, +}; + +static int ibex_timer_post_load(void *opaque, int version_id) +{ + IbexTimerState *s = opaque; + + ibex_timer_update_irqs(s); + return 0; +} + +static const VMStateDescription vmstate_ibex_timer = { + .name = TYPE_IBEX_TIMER, + .version_id = 1, + .minimum_version_id = 1, + .post_load = ibex_timer_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(timer_ctrl, IbexTimerState), + VMSTATE_UINT32(timer_cfg0, IbexTimerState), + VMSTATE_UINT32(timer_compare_lower0, IbexTimerState), + VMSTATE_UINT32(timer_compare_upper0, IbexTimerState), + VMSTATE_UINT32(timer_intr_enable, IbexTimerState), + VMSTATE_UINT32(timer_intr_state, IbexTimerState), + VMSTATE_UINT32(timer_intr_test, IbexTimerState), + VMSTATE_END_OF_LIST() + } +}; + +static Property ibex_timer_properties[] = { + DEFINE_PROP_UINT32("timebase-freq", IbexTimerState, timebase_freq, 10000), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ibex_timer_init(Object *obj) +{ + IbexTimerState *s = IBEX_TIMER(obj); + + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, &ibex_timer_ops, s, + TYPE_IBEX_TIMER, 0x400); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); +} + +static void ibex_timer_realize(DeviceState *dev, Error **errp) +{ + IbexTimerState *s = IBEX_TIMER(dev); + + qdev_init_gpio_out(dev, &s->m_timer_irq, 1); +} + + +static void ibex_timer_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = ibex_timer_reset; + dc->vmsd = &vmstate_ibex_timer; + dc->realize = ibex_timer_realize; + device_class_set_props(dc, ibex_timer_properties); +} + +static const TypeInfo ibex_timer_info = { + .name = TYPE_IBEX_TIMER, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(IbexTimerState), + .instance_init = ibex_timer_init, + .class_init = ibex_timer_class_init, +}; + +static void ibex_timer_register_types(void) +{ + type_register_static(&ibex_timer_info); +} + +type_init(ibex_timer_register_types) diff --git a/hw/timer/lm32_timer.c b/hw/timer/lm32_timer.c deleted file mode 100644 index eeaf0ada5fa..00000000000 --- a/hw/timer/lm32_timer.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * QEMU model of the LatticeMico32 timer block. - * - * Copyright (c) 2010 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://www.latticesemi.com/documents/mico32timer.pdf - */ - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "qemu/timer.h" -#include "hw/ptimer.h" -#include "hw/qdev-properties.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -#define DEFAULT_FREQUENCY (50*1000000) - -enum { - R_SR = 0, - R_CR, - R_PERIOD, - R_SNAPSHOT, - R_MAX -}; - -enum { - SR_TO = (1 << 0), - SR_RUN = (1 << 1), -}; - -enum { - CR_ITO = (1 << 0), - CR_CONT = (1 << 1), - CR_START = (1 << 2), - CR_STOP = (1 << 3), -}; - -#define TYPE_LM32_TIMER "lm32-timer" -OBJECT_DECLARE_SIMPLE_TYPE(LM32TimerState, LM32_TIMER) - -struct LM32TimerState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - - ptimer_state *ptimer; - - qemu_irq irq; - uint32_t freq_hz; - - uint32_t regs[R_MAX]; -}; - -static void timer_update_irq(LM32TimerState *s) -{ - int state = (s->regs[R_SR] & SR_TO) && (s->regs[R_CR] & CR_ITO); - - trace_lm32_timer_irq_state(state); - qemu_set_irq(s->irq, state); -} - -static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size) -{ - LM32TimerState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_SR: - case R_CR: - case R_PERIOD: - r = s->regs[addr]; - break; - case R_SNAPSHOT: - r = (uint32_t)ptimer_get_count(s->ptimer); - break; - default: - error_report("lm32_timer: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_lm32_timer_memory_read(addr << 2, r); - return r; -} - -static void timer_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size) -{ - LM32TimerState *s = opaque; - - trace_lm32_timer_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_SR: - s->regs[R_SR] &= ~SR_TO; - break; - case R_CR: - ptimer_transaction_begin(s->ptimer); - s->regs[R_CR] = value; - if (s->regs[R_CR] & CR_START) { - ptimer_run(s->ptimer, 1); - } - if (s->regs[R_CR] & CR_STOP) { - ptimer_stop(s->ptimer); - } - ptimer_transaction_commit(s->ptimer); - break; - case R_PERIOD: - s->regs[R_PERIOD] = value; - ptimer_transaction_begin(s->ptimer); - ptimer_set_count(s->ptimer, value); - ptimer_transaction_commit(s->ptimer); - break; - case R_SNAPSHOT: - error_report("lm32_timer: write access to read only register 0x" - TARGET_FMT_plx, addr << 2); - break; - default: - error_report("lm32_timer: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - timer_update_irq(s); -} - -static const MemoryRegionOps timer_ops = { - .read = timer_read, - .write = timer_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, -}; - -static void timer_hit(void *opaque) -{ - LM32TimerState *s = opaque; - - trace_lm32_timer_hit(); - - s->regs[R_SR] |= SR_TO; - - if (s->regs[R_CR] & CR_CONT) { - ptimer_set_count(s->ptimer, s->regs[R_PERIOD]); - ptimer_run(s->ptimer, 1); - } - timer_update_irq(s); -} - -static void timer_reset(DeviceState *d) -{ - LM32TimerState *s = LM32_TIMER(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - ptimer_transaction_begin(s->ptimer); - ptimer_stop(s->ptimer); - ptimer_transaction_commit(s->ptimer); -} - -static void lm32_timer_init(Object *obj) -{ - LM32TimerState *s = LM32_TIMER(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); - - sysbus_init_irq(dev, &s->irq); - - memory_region_init_io(&s->iomem, obj, &timer_ops, s, - "timer", R_MAX * 4); - sysbus_init_mmio(dev, &s->iomem); -} - -static void lm32_timer_realize(DeviceState *dev, Error **errp) -{ - LM32TimerState *s = LM32_TIMER(dev); - - s->ptimer = ptimer_init(timer_hit, s, PTIMER_POLICY_DEFAULT); - - ptimer_transaction_begin(s->ptimer); - ptimer_set_freq(s->ptimer, s->freq_hz); - ptimer_transaction_commit(s->ptimer); -} - -static const VMStateDescription vmstate_lm32_timer = { - .name = "lm32-timer", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_PTIMER(ptimer, LM32TimerState), - VMSTATE_UINT32(freq_hz, LM32TimerState), - VMSTATE_UINT32_ARRAY(regs, LM32TimerState, R_MAX), - VMSTATE_END_OF_LIST() - } -}; - -static Property lm32_timer_properties[] = { - DEFINE_PROP_UINT32("frequency", LM32TimerState, freq_hz, DEFAULT_FREQUENCY), - DEFINE_PROP_END_OF_LIST(), -}; - -static void lm32_timer_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = lm32_timer_realize; - dc->reset = timer_reset; - dc->vmsd = &vmstate_lm32_timer; - device_class_set_props(dc, lm32_timer_properties); -} - -static const TypeInfo lm32_timer_info = { - .name = TYPE_LM32_TIMER, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(LM32TimerState), - .instance_init = lm32_timer_init, - .class_init = lm32_timer_class_init, -}; - -static void lm32_timer_register_types(void) -{ - type_register_static(&lm32_timer_info); -} - -type_init(lm32_timer_register_types) diff --git a/hw/timer/meson.build b/hw/timer/meson.build index 598d0585064..03092e2cebf 100644 --- a/hw/timer/meson.build +++ b/hw/timer/meson.build @@ -19,22 +19,22 @@ softmmu_ss.add(when: 'CONFIG_HPET', if_true: files('hpet.c')) softmmu_ss.add(when: 'CONFIG_I8254', if_true: files('i8254_common.c', 'i8254.c')) softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_epit.c')) softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_gpt.c')) -softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_timer.c')) -softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-sysctl.c')) softmmu_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('mips_gictimer.c')) softmmu_ss.add(when: 'CONFIG_MSF2', if_true: files('mss-timer.c')) softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_timer.c')) softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_timer.c')) softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_gptimer.c')) softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_synctimer.c')) -softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_ost.c')) softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_timer.c')) softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_systmr.c')) softmmu_ss.add(when: 'CONFIG_SH_TIMER', if_true: files('sh_timer.c')) softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_timer.c')) softmmu_ss.add(when: 'CONFIG_SSE_COUNTER', if_true: files('sse-counter.c')) softmmu_ss.add(when: 'CONFIG_SSE_TIMER', if_true: files('sse-timer.c')) +softmmu_ss.add(when: 'CONFIG_STELLARIS_GPTM', if_true: files('stellaris-gptm.c')) softmmu_ss.add(when: 'CONFIG_STM32F2XX_TIMER', if_true: files('stm32f2xx_timer.c')) softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_timer.c')) +specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_timer.c')) +softmmu_ss.add(when: 'CONFIG_SIFIVE_PWM', if_true: files('sifive_pwm.c')) specific_ss.add(when: 'CONFIG_AVR_TIMER16', if_true: files('avr_timer16.c')) diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c deleted file mode 100644 index 9ecea63861c..00000000000 --- a/hw/timer/milkymist-sysctl.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * QEMU model of the Milkymist System Controller. - * - * Copyright (c) 2010-2012 Michael Walle - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/sysctl.pdf - */ - -#include "qemu/osdep.h" -#include "hw/irq.h" -#include "hw/sysbus.h" -#include "migration/vmstate.h" -#include "trace.h" -#include "qemu/timer.h" -#include "sysemu/runstate.h" -#include "hw/ptimer.h" -#include "hw/qdev-properties.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -enum { - CTRL_ENABLE = (1<<0), - CTRL_AUTORESTART = (1<<1), -}; - -enum { - ICAP_READY = (1<<0), -}; - -enum { - R_GPIO_IN = 0, - R_GPIO_OUT, - R_GPIO_INTEN, - R_TIMER0_CONTROL = 4, - R_TIMER0_COMPARE, - R_TIMER0_COUNTER, - R_TIMER1_CONTROL = 8, - R_TIMER1_COMPARE, - R_TIMER1_COUNTER, - R_ICAP = 16, - R_DBG_SCRATCHPAD = 20, - R_DBG_WRITE_LOCK, - R_CLK_FREQUENCY = 29, - R_CAPABILITIES, - R_SYSTEM_ID, - R_MAX -}; - -#define TYPE_MILKYMIST_SYSCTL "milkymist-sysctl" -OBJECT_DECLARE_SIMPLE_TYPE(MilkymistSysctlState, MILKYMIST_SYSCTL) - -struct MilkymistSysctlState { - SysBusDevice parent_obj; - - MemoryRegion regs_region; - - ptimer_state *ptimer0; - ptimer_state *ptimer1; - - uint32_t freq_hz; - uint32_t capabilities; - uint32_t systemid; - uint32_t strappings; - - uint32_t regs[R_MAX]; - - qemu_irq gpio_irq; - qemu_irq timer0_irq; - qemu_irq timer1_irq; -}; - -static void sysctl_icap_write(MilkymistSysctlState *s, uint32_t value) -{ - trace_milkymist_sysctl_icap_write(value); - switch (value & 0xffff) { - case 0x000e: - qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); - break; - } -} - -static uint64_t sysctl_read(void *opaque, hwaddr addr, - unsigned size) -{ - MilkymistSysctlState *s = opaque; - uint32_t r = 0; - - addr >>= 2; - switch (addr) { - case R_TIMER0_COUNTER: - r = (uint32_t)ptimer_get_count(s->ptimer0); - /* milkymist timer counts up */ - r = s->regs[R_TIMER0_COMPARE] - r; - break; - case R_TIMER1_COUNTER: - r = (uint32_t)ptimer_get_count(s->ptimer1); - /* milkymist timer counts up */ - r = s->regs[R_TIMER1_COMPARE] - r; - break; - case R_GPIO_IN: - case R_GPIO_OUT: - case R_GPIO_INTEN: - case R_TIMER0_CONTROL: - case R_TIMER0_COMPARE: - case R_TIMER1_CONTROL: - case R_TIMER1_COMPARE: - case R_ICAP: - case R_DBG_SCRATCHPAD: - case R_DBG_WRITE_LOCK: - case R_CLK_FREQUENCY: - case R_CAPABILITIES: - case R_SYSTEM_ID: - r = s->regs[addr]; - break; - - default: - error_report("milkymist_sysctl: read access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } - - trace_milkymist_sysctl_memory_read(addr << 2, r); - - return r; -} - -static void sysctl_write(void *opaque, hwaddr addr, uint64_t value, - unsigned size) -{ - MilkymistSysctlState *s = opaque; - - trace_milkymist_sysctl_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_GPIO_OUT: - case R_GPIO_INTEN: - case R_TIMER0_COUNTER: - case R_TIMER1_COUNTER: - case R_DBG_SCRATCHPAD: - s->regs[addr] = value; - break; - case R_TIMER0_COMPARE: - ptimer_transaction_begin(s->ptimer0); - ptimer_set_limit(s->ptimer0, value, 0); - s->regs[addr] = value; - ptimer_transaction_commit(s->ptimer0); - break; - case R_TIMER1_COMPARE: - ptimer_transaction_begin(s->ptimer1); - ptimer_set_limit(s->ptimer1, value, 0); - s->regs[addr] = value; - ptimer_transaction_commit(s->ptimer1); - break; - case R_TIMER0_CONTROL: - ptimer_transaction_begin(s->ptimer0); - s->regs[addr] = value; - if (s->regs[R_TIMER0_CONTROL] & CTRL_ENABLE) { - trace_milkymist_sysctl_start_timer0(); - ptimer_set_count(s->ptimer0, - s->regs[R_TIMER0_COMPARE] - s->regs[R_TIMER0_COUNTER]); - ptimer_run(s->ptimer0, 0); - } else { - trace_milkymist_sysctl_stop_timer0(); - ptimer_stop(s->ptimer0); - } - ptimer_transaction_commit(s->ptimer0); - break; - case R_TIMER1_CONTROL: - ptimer_transaction_begin(s->ptimer1); - s->regs[addr] = value; - if (s->regs[R_TIMER1_CONTROL] & CTRL_ENABLE) { - trace_milkymist_sysctl_start_timer1(); - ptimer_set_count(s->ptimer1, - s->regs[R_TIMER1_COMPARE] - s->regs[R_TIMER1_COUNTER]); - ptimer_run(s->ptimer1, 0); - } else { - trace_milkymist_sysctl_stop_timer1(); - ptimer_stop(s->ptimer1); - } - ptimer_transaction_commit(s->ptimer1); - break; - case R_ICAP: - sysctl_icap_write(s, value); - break; - case R_DBG_WRITE_LOCK: - s->regs[addr] = 1; - break; - case R_SYSTEM_ID: - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - break; - - case R_GPIO_IN: - case R_CLK_FREQUENCY: - case R_CAPABILITIES: - error_report("milkymist_sysctl: write to read-only register 0x" - TARGET_FMT_plx, addr << 2); - break; - - default: - error_report("milkymist_sysctl: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } -} - -static const MemoryRegionOps sysctl_mmio_ops = { - .read = sysctl_read, - .write = sysctl_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void timer0_hit(void *opaque) -{ - MilkymistSysctlState *s = opaque; - - if (!(s->regs[R_TIMER0_CONTROL] & CTRL_AUTORESTART)) { - s->regs[R_TIMER0_CONTROL] &= ~CTRL_ENABLE; - trace_milkymist_sysctl_stop_timer0(); - ptimer_stop(s->ptimer0); - } - - trace_milkymist_sysctl_pulse_irq_timer0(); - qemu_irq_pulse(s->timer0_irq); -} - -static void timer1_hit(void *opaque) -{ - MilkymistSysctlState *s = opaque; - - if (!(s->regs[R_TIMER1_CONTROL] & CTRL_AUTORESTART)) { - s->regs[R_TIMER1_CONTROL] &= ~CTRL_ENABLE; - trace_milkymist_sysctl_stop_timer1(); - ptimer_stop(s->ptimer1); - } - - trace_milkymist_sysctl_pulse_irq_timer1(); - qemu_irq_pulse(s->timer1_irq); -} - -static void milkymist_sysctl_reset(DeviceState *d) -{ - MilkymistSysctlState *s = MILKYMIST_SYSCTL(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - - ptimer_transaction_begin(s->ptimer0); - ptimer_stop(s->ptimer0); - ptimer_transaction_commit(s->ptimer0); - ptimer_transaction_begin(s->ptimer1); - ptimer_stop(s->ptimer1); - ptimer_transaction_commit(s->ptimer1); - - /* defaults */ - s->regs[R_ICAP] = ICAP_READY; - s->regs[R_SYSTEM_ID] = s->systemid; - s->regs[R_CLK_FREQUENCY] = s->freq_hz; - s->regs[R_CAPABILITIES] = s->capabilities; - s->regs[R_GPIO_IN] = s->strappings; -} - -static void milkymist_sysctl_init(Object *obj) -{ - MilkymistSysctlState *s = MILKYMIST_SYSCTL(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); - - sysbus_init_irq(dev, &s->gpio_irq); - sysbus_init_irq(dev, &s->timer0_irq); - sysbus_init_irq(dev, &s->timer1_irq); - - memory_region_init_io(&s->regs_region, obj, &sysctl_mmio_ops, s, - "milkymist-sysctl", R_MAX * 4); - sysbus_init_mmio(dev, &s->regs_region); -} - -static void milkymist_sysctl_realize(DeviceState *dev, Error **errp) -{ - MilkymistSysctlState *s = MILKYMIST_SYSCTL(dev); - - s->ptimer0 = ptimer_init(timer0_hit, s, PTIMER_POLICY_DEFAULT); - s->ptimer1 = ptimer_init(timer1_hit, s, PTIMER_POLICY_DEFAULT); - - ptimer_transaction_begin(s->ptimer0); - ptimer_set_freq(s->ptimer0, s->freq_hz); - ptimer_transaction_commit(s->ptimer0); - ptimer_transaction_begin(s->ptimer1); - ptimer_set_freq(s->ptimer1, s->freq_hz); - ptimer_transaction_commit(s->ptimer1); -} - -static const VMStateDescription vmstate_milkymist_sysctl = { - .name = "milkymist-sysctl", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, MilkymistSysctlState, R_MAX), - VMSTATE_PTIMER(ptimer0, MilkymistSysctlState), - VMSTATE_PTIMER(ptimer1, MilkymistSysctlState), - VMSTATE_END_OF_LIST() - } -}; - -static Property milkymist_sysctl_properties[] = { - DEFINE_PROP_UINT32("frequency", MilkymistSysctlState, - freq_hz, 80000000), - DEFINE_PROP_UINT32("capabilities", MilkymistSysctlState, - capabilities, 0x00000000), - DEFINE_PROP_UINT32("systemid", MilkymistSysctlState, - systemid, 0x10014d31), - DEFINE_PROP_UINT32("gpio_strappings", MilkymistSysctlState, - strappings, 0x00000001), - DEFINE_PROP_END_OF_LIST(), -}; - -static void milkymist_sysctl_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = milkymist_sysctl_realize; - dc->reset = milkymist_sysctl_reset; - dc->vmsd = &vmstate_milkymist_sysctl; - device_class_set_props(dc, milkymist_sysctl_properties); -} - -static const TypeInfo milkymist_sysctl_info = { - .name = TYPE_MILKYMIST_SYSCTL, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(MilkymistSysctlState), - .instance_init = milkymist_sysctl_init, - .class_init = milkymist_sysctl_class_init, -}; - -static void milkymist_sysctl_register_types(void) -{ - type_register_static(&milkymist_sysctl_info); -} - -type_init(milkymist_sysctl_register_types) diff --git a/hw/timer/mips_gictimer.c b/hw/timer/mips_gictimer.c index bc44cd934e8..2b0696d4acb 100644 --- a/hw/timer/mips_gictimer.c +++ b/hw/timer/mips_gictimer.c @@ -7,7 +7,6 @@ */ #include "qemu/osdep.h" -#include "hw/sysbus.h" #include "qemu/timer.h" #include "hw/timer/mips_gictimer.h" diff --git a/hw/timer/puv3_ost.c b/hw/timer/puv3_ost.c deleted file mode 100644 index d5bf26b56bc..00000000000 --- a/hw/timer/puv3_ost.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * OSTimer device simulation in PKUnity SoC - * - * Copyright (C) 2010-2012 Guan Xuetao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation, or any later version. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "hw/irq.h" -#include "hw/ptimer.h" -#include "qemu/module.h" -#include "qemu/log.h" -#include "qom/object.h" - -#undef DEBUG_PUV3 -#include "hw/unicore32/puv3.h" - -#define TYPE_PUV3_OST "puv3_ost" -OBJECT_DECLARE_SIMPLE_TYPE(PUV3OSTState, PUV3_OST) - -/* puv3 ostimer implementation. */ -struct PUV3OSTState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - qemu_irq irq; - ptimer_state *ptimer; - - uint32_t reg_OSMR0; - uint32_t reg_OSCR; - uint32_t reg_OSSR; - uint32_t reg_OIER; -}; - -static uint64_t puv3_ost_read(void *opaque, hwaddr offset, - unsigned size) -{ - PUV3OSTState *s = opaque; - uint32_t ret = 0; - - switch (offset) { - case 0x10: /* Counter Register */ - ret = s->reg_OSMR0 - (uint32_t)ptimer_get_count(s->ptimer); - break; - case 0x14: /* Status Register */ - ret = s->reg_OSSR; - break; - case 0x1c: /* Interrupt Enable Register */ - ret = s->reg_OIER; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad read offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } - DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); - return ret; -} - -static void puv3_ost_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) -{ - PUV3OSTState *s = opaque; - - DPRINTF("offset 0x%x, value 0x%x\n", offset, value); - switch (offset) { - case 0x00: /* Match Register 0 */ - ptimer_transaction_begin(s->ptimer); - s->reg_OSMR0 = value; - if (s->reg_OSMR0 > s->reg_OSCR) { - ptimer_set_count(s->ptimer, s->reg_OSMR0 - s->reg_OSCR); - } else { - ptimer_set_count(s->ptimer, s->reg_OSMR0 + - (0xffffffff - s->reg_OSCR)); - } - ptimer_run(s->ptimer, 2); - ptimer_transaction_commit(s->ptimer); - break; - case 0x14: /* Status Register */ - assert(value == 0); - if (s->reg_OSSR) { - s->reg_OSSR = value; - qemu_irq_lower(s->irq); - } - break; - case 0x1c: /* Interrupt Enable Register */ - s->reg_OIER = value; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Bad write offset 0x%"HWADDR_PRIx"\n", - __func__, offset); - } -} - -static const MemoryRegionOps puv3_ost_ops = { - .read = puv3_ost_read, - .write = puv3_ost_write, - .impl = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void puv3_ost_tick(void *opaque) -{ - PUV3OSTState *s = opaque; - - DPRINTF("ost hit when ptimer counter from 0x%x to 0x%x!\n", - s->reg_OSCR, s->reg_OSMR0); - - s->reg_OSCR = s->reg_OSMR0; - if (s->reg_OIER) { - s->reg_OSSR = 1; - qemu_irq_raise(s->irq); - } -} - -static void puv3_ost_realize(DeviceState *dev, Error **errp) -{ - PUV3OSTState *s = PUV3_OST(dev); - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - - s->reg_OIER = 0; - s->reg_OSSR = 0; - s->reg_OSMR0 = 0; - s->reg_OSCR = 0; - - sysbus_init_irq(sbd, &s->irq); - - s->ptimer = ptimer_init(puv3_ost_tick, s, PTIMER_POLICY_DEFAULT); - ptimer_transaction_begin(s->ptimer); - ptimer_set_freq(s->ptimer, 50 * 1000 * 1000); - ptimer_transaction_commit(s->ptimer); - - memory_region_init_io(&s->iomem, OBJECT(s), &puv3_ost_ops, s, "puv3_ost", - PUV3_REGS_OFFSET); - sysbus_init_mmio(sbd, &s->iomem); -} - -static void puv3_ost_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = puv3_ost_realize; -} - -static const TypeInfo puv3_ost_info = { - .name = TYPE_PUV3_OST, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(PUV3OSTState), - .class_init = puv3_ost_class_init, -}; - -static void puv3_ost_register_type(void) -{ - type_register_static(&puv3_ost_info); -} - -type_init(puv3_ost_register_type) diff --git a/hw/timer/sh_timer.c b/hw/timer/sh_timer.c index 58af1a1edbd..c72c327bfaf 100644 --- a/hw/timer/sh_timer.c +++ b/hw/timer/sh_timer.c @@ -10,13 +10,12 @@ #include "qemu/osdep.h" #include "exec/memory.h" -#include "hw/hw.h" +#include "qemu/log.h" #include "hw/irq.h" #include "hw/sh4/sh.h" #include "hw/timer/tmu012.h" #include "hw/ptimer.h" - -//#define DEBUG_TIMER +#include "trace.h" #define TIMER_TCR_TPSC (7 << 0) #define TIMER_TCR_CKEG (3 << 3) @@ -46,24 +45,24 @@ typedef struct { int feat; int enabled; qemu_irq irq; -} sh_timer_state; +} SHTimerState; /* Check all active timers, and schedule the next timer interrupt. */ -static void sh_timer_update(sh_timer_state *s) +static void sh_timer_update(SHTimerState *s) { int new_level = s->int_level && (s->tcr & TIMER_TCR_UNIE); - if (new_level != s->old_level) - qemu_set_irq (s->irq, new_level); - + if (new_level != s->old_level) { + qemu_set_irq(s->irq, new_level); + } s->old_level = s->int_level; s->int_level = new_level; } static uint32_t sh_timer_read(void *opaque, hwaddr offset) { - sh_timer_state *s = (sh_timer_state *)opaque; + SHTimerState *s = opaque; switch (offset >> 2) { case OFFSET_TCOR: @@ -73,19 +72,18 @@ static uint32_t sh_timer_read(void *opaque, hwaddr offset) case OFFSET_TCR: return s->tcr | (s->int_level ? TIMER_TCR_UNF : 0); case OFFSET_TCPR: - if (s->feat & TIMER_FEAT_CAPT) + if (s->feat & TIMER_FEAT_CAPT) { return s->tcpr; - /* fall through */ - default: - hw_error("sh_timer_read: Bad offset %x\n", (int)offset); - return 0; + } } + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, offset); + return 0; } -static void sh_timer_write(void *opaque, hwaddr offset, - uint32_t value) +static void sh_timer_write(void *opaque, hwaddr offset, uint32_t value) { - sh_timer_state *s = (sh_timer_state *)opaque; + SHTimerState *s = opaque; int freq; switch (offset >> 2) { @@ -104,19 +102,30 @@ static void sh_timer_write(void *opaque, hwaddr offset, case OFFSET_TCR: ptimer_transaction_begin(s->timer); if (s->enabled) { - /* Pause the timer if it is running. This may cause some - inaccuracy dure to rounding, but avoids a whole lot of other - messyness. */ + /* + * Pause the timer if it is running. This may cause some inaccuracy + * due to rounding, but avoids a whole lot of other messiness + */ ptimer_stop(s->timer); } freq = s->freq; /* ??? Need to recalculate expiry time after changing divisor. */ switch (value & TIMER_TCR_TPSC) { - case 0: freq >>= 2; break; - case 1: freq >>= 4; break; - case 2: freq >>= 6; break; - case 3: freq >>= 8; break; - case 4: freq >>= 10; break; + case 0: + freq >>= 2; + break; + case 1: + freq >>= 4; + break; + case 2: + freq >>= 6; + break; + case 3: + freq >>= 8; + break; + case 4: + freq >>= 10; + break; case 6: case 7: if (s->feat & TIMER_FEAT_EXTCLK) { @@ -124,7 +133,8 @@ static void sh_timer_write(void *opaque, hwaddr offset, } /* fallthrough */ default: - hw_error("sh_timer_write: Reserved TPSC value\n"); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Reserved TPSC value\n", __func__); } switch ((value & TIMER_TCR_CKEG) >> 3) { case 0: @@ -137,7 +147,8 @@ static void sh_timer_write(void *opaque, hwaddr offset, } /* fallthrough */ default: - hw_error("sh_timer_write: Reserved CKEG value\n"); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Reserved CKEG value\n", __func__); } switch ((value & TIMER_TCR_ICPE) >> 6) { case 0: @@ -149,7 +160,8 @@ static void sh_timer_write(void *opaque, hwaddr offset, } /* fallthrough */ default: - hw_error("sh_timer_write: Reserved ICPE value\n"); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Reserved ICPE value\n", __func__); } if ((value & TIMER_TCR_UNF) == 0) { s->int_level = 0; @@ -158,13 +170,15 @@ static void sh_timer_write(void *opaque, hwaddr offset, value &= ~TIMER_TCR_UNF; if ((value & TIMER_TCR_ICPF) && (!(s->feat & TIMER_FEAT_CAPT))) { - hw_error("sh_timer_write: Reserved ICPF value\n"); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Reserved ICPF value\n", __func__); } value &= ~TIMER_TCR_ICPF; /* capture not supported */ if (value & TIMER_TCR_RESERVED) { - hw_error("sh_timer_write: Reserved TCR bits set\n"); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Reserved TCR bits set\n", __func__); } s->tcr = value; ptimer_set_limit(s->timer, s->tcor, 0); @@ -182,19 +196,17 @@ static void sh_timer_write(void *opaque, hwaddr offset, } /* fallthrough */ default: - hw_error("sh_timer_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, offset); } sh_timer_update(s); } static void sh_timer_start_stop(void *opaque, int enable) { - sh_timer_state *s = (sh_timer_state *)opaque; - -#ifdef DEBUG_TIMER - printf("sh_timer_start_stop %d (%d)\n", enable, s->enabled); -#endif + SHTimerState *s = opaque; + trace_sh_timer_start_stop(enable, s->enabled); ptimer_transaction_begin(s->timer); if (s->enabled && !enable) { ptimer_stop(s->timer); @@ -204,24 +216,20 @@ static void sh_timer_start_stop(void *opaque, int enable) } ptimer_transaction_commit(s->timer); s->enabled = !!enable; - -#ifdef DEBUG_TIMER - printf("sh_timer_start_stop done %d\n", s->enabled); -#endif } static void sh_timer_tick(void *opaque) { - sh_timer_state *s = (sh_timer_state *)opaque; + SHTimerState *s = opaque; s->int_level = s->enabled; sh_timer_update(s); } static void *sh_timer_init(uint32_t freq, int feat, qemu_irq irq) { - sh_timer_state *s; + SHTimerState *s; - s = (sh_timer_state *)g_malloc0(sizeof(sh_timer_state)); + s = g_malloc0(sizeof(*s)); s->freq = freq; s->feat = feat; s->tcor = 0xffffffff; @@ -252,50 +260,49 @@ typedef struct { int feat; } tmu012_state; -static uint64_t tmu012_read(void *opaque, hwaddr offset, - unsigned size) +static uint64_t tmu012_read(void *opaque, hwaddr offset, unsigned size) { - tmu012_state *s = (tmu012_state *)opaque; - -#ifdef DEBUG_TIMER - printf("tmu012_read 0x%lx\n", (unsigned long) offset); -#endif + tmu012_state *s = opaque; + trace_sh_timer_read(offset); if (offset >= 0x20) { if (!(s->feat & TMU012_FEAT_3CHAN)) { - hw_error("tmu012_write: Bad channel offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad channel offset 0x%" HWADDR_PRIx "\n", + __func__, offset); } return sh_timer_read(s->timer[2], offset - 0x20); } - if (offset >= 0x14) + if (offset >= 0x14) { return sh_timer_read(s->timer[1], offset - 0x14); - - if (offset >= 0x08) + } + if (offset >= 0x08) { return sh_timer_read(s->timer[0], offset - 0x08); - - if (offset == 4) + } + if (offset == 4) { return s->tstr; - - if ((s->feat & TMU012_FEAT_TOCR) && offset == 0) + } + if ((s->feat & TMU012_FEAT_TOCR) && offset == 0) { return s->tocr; + } - hw_error("tmu012_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, offset); return 0; } static void tmu012_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { - tmu012_state *s = (tmu012_state *)opaque; - -#ifdef DEBUG_TIMER - printf("tmu012_write 0x%lx 0x%08x\n", (unsigned long) offset, value); -#endif + tmu012_state *s = opaque; + trace_sh_timer_write(offset, value); if (offset >= 0x20) { if (!(s->feat & TMU012_FEAT_3CHAN)) { - hw_error("tmu012_write: Bad channel offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad channel offset 0x%" HWADDR_PRIx "\n", + __func__, offset); } sh_timer_write(s->timer[2], offset - 0x20, value); return; @@ -318,7 +325,7 @@ static void tmu012_write(void *opaque, hwaddr offset, sh_timer_start_stop(s->timer[2], value & (1 << 2)); } else { if (value & (1 << 2)) { - hw_error("tmu012_write: Bad channel\n"); + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad channel\n", __func__); } } @@ -337,15 +344,14 @@ static const MemoryRegionOps tmu012_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -void tmu012_init(MemoryRegion *sysmem, hwaddr base, - int feat, uint32_t freq, +void tmu012_init(MemoryRegion *sysmem, hwaddr base, int feat, uint32_t freq, qemu_irq ch0_irq, qemu_irq ch1_irq, qemu_irq ch2_irq0, qemu_irq ch2_irq1) { tmu012_state *s; int timer_feat = (feat & TMU012_FEAT_EXTCLK) ? TIMER_FEAT_EXTCLK : 0; - s = (tmu012_state *)g_malloc0(sizeof(tmu012_state)); + s = g_malloc0(sizeof(*s)); s->feat = feat; s->timer[0] = sh_timer_init(freq, timer_feat, ch0_irq); s->timer[1] = sh_timer_init(freq, timer_feat, ch1_irq); @@ -354,15 +360,14 @@ void tmu012_init(MemoryRegion *sysmem, hwaddr base, ch2_irq0); /* ch2_irq1 not supported */ } - memory_region_init_io(&s->iomem, NULL, &tmu012_ops, s, - "timer", 0x100000000ULL); + memory_region_init_io(&s->iomem, NULL, &tmu012_ops, s, "timer", 0x30); memory_region_init_alias(&s->iomem_p4, NULL, "timer-p4", - &s->iomem, 0, 0x1000); + &s->iomem, 0, memory_region_size(&s->iomem)); memory_region_add_subregion(sysmem, P4ADDR(base), &s->iomem_p4); memory_region_init_alias(&s->iomem_a7, NULL, "timer-a7", - &s->iomem, 0, 0x1000); + &s->iomem, 0, memory_region_size(&s->iomem)); memory_region_add_subregion(sysmem, A7ADDR(base), &s->iomem_a7); /* ??? Save/restore. */ } diff --git a/hw/timer/sifive_pwm.c b/hw/timer/sifive_pwm.c new file mode 100644 index 00000000000..c664480ccf5 --- /dev/null +++ b/hw/timer/sifive_pwm.c @@ -0,0 +1,468 @@ +/* + * SiFive PWM + * + * Copyright (c) 2020 Western Digital + * + * Author: Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "trace.h" +#include "hw/irq.h" +#include "hw/timer/sifive_pwm.h" +#include "hw/qdev-properties.h" +#include "hw/registerfields.h" +#include "migration/vmstate.h" +#include "qemu/log.h" +#include "qemu/module.h" + +#define HAS_PWM_EN_BITS(cfg) ((cfg & R_CONFIG_ENONESHOT_MASK) || \ + (cfg & R_CONFIG_ENALWAYS_MASK)) + +#define PWMCMP_MASK 0xFFFF +#define PWMCOUNT_MASK 0x7FFFFFFF + +REG32(CONFIG, 0x00) + FIELD(CONFIG, SCALE, 0, 4) + FIELD(CONFIG, STICKY, 8, 1) + FIELD(CONFIG, ZEROCMP, 9, 1) + FIELD(CONFIG, DEGLITCH, 10, 1) + FIELD(CONFIG, ENALWAYS, 12, 1) + FIELD(CONFIG, ENONESHOT, 13, 1) + FIELD(CONFIG, CMP0CENTER, 16, 1) + FIELD(CONFIG, CMP1CENTER, 17, 1) + FIELD(CONFIG, CMP2CENTER, 18, 1) + FIELD(CONFIG, CMP3CENTER, 19, 1) + FIELD(CONFIG, CMP0GANG, 24, 1) + FIELD(CONFIG, CMP1GANG, 25, 1) + FIELD(CONFIG, CMP2GANG, 26, 1) + FIELD(CONFIG, CMP3GANG, 27, 1) + FIELD(CONFIG, CMP0IP, 28, 1) + FIELD(CONFIG, CMP1IP, 29, 1) + FIELD(CONFIG, CMP2IP, 30, 1) + FIELD(CONFIG, CMP3IP, 31, 1) +REG32(COUNT, 0x08) +REG32(PWMS, 0x10) +REG32(PWMCMP0, 0x20) +REG32(PWMCMP1, 0x24) +REG32(PWMCMP2, 0x28) +REG32(PWMCMP3, 0x2C) + +static inline uint64_t sifive_pwm_ns_to_ticks(SiFivePwmState *s, + uint64_t time) +{ + return muldiv64(time, s->freq_hz, NANOSECONDS_PER_SECOND); +} + +static inline uint64_t sifive_pwm_ticks_to_ns(SiFivePwmState *s, + uint64_t ticks) +{ + return muldiv64(ticks, NANOSECONDS_PER_SECOND, s->freq_hz); +} + +static inline uint64_t sifive_pwm_compute_scale(SiFivePwmState *s) +{ + return s->pwmcfg & R_CONFIG_SCALE_MASK; +} + +static void sifive_pwm_set_alarms(SiFivePwmState *s) +{ + uint64_t now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + if (HAS_PWM_EN_BITS(s->pwmcfg)) { + /* + * Subtract ticks from number of ticks when the timer was zero + * and mask to the register width. + */ + uint64_t pwmcount = (sifive_pwm_ns_to_ticks(s, now_ns) - + s->tick_offset) & PWMCOUNT_MASK; + uint64_t scale = sifive_pwm_compute_scale(s); + /* PWMs only contains PWMCMP_MASK bits starting at scale */ + uint64_t pwms = (pwmcount & (PWMCMP_MASK << scale)) >> scale; + + for (int i = 0; i < SIFIVE_PWM_CHANS; i++) { + uint64_t pwmcmp = s->pwmcmp[i] & PWMCMP_MASK; + uint64_t pwmcmp_ticks = pwmcmp << scale; + + /* + * Per circuit diagram and spec, both cases raises corresponding + * IP bit one clock cycle after time expires. + */ + if (pwmcmp > pwms) { + uint64_t offset = pwmcmp_ticks - pwmcount + 1; + uint64_t when_to_fire = now_ns + + sifive_pwm_ticks_to_ns(s, offset); + + trace_sifive_pwm_set_alarm(when_to_fire, now_ns); + timer_mod(&s->timer[i], when_to_fire); + } else { + /* Schedule interrupt for next cycle */ + trace_sifive_pwm_set_alarm(now_ns + 1, now_ns); + timer_mod(&s->timer[i], now_ns + 1); + } + + } + } else { + /* + * If timer incrementing disabled, just do pwms > pwmcmp check since + * a write may have happened to PWMs. + */ + uint64_t pwmcount = (s->tick_offset) & PWMCOUNT_MASK; + uint64_t scale = sifive_pwm_compute_scale(s); + uint64_t pwms = (pwmcount & (PWMCMP_MASK << scale)) >> scale; + + for (int i = 0; i < SIFIVE_PWM_CHANS; i++) { + uint64_t pwmcmp = s->pwmcmp[i] & PWMCMP_MASK; + + if (pwms >= pwmcmp) { + trace_sifive_pwm_set_alarm(now_ns + 1, now_ns); + timer_mod(&s->timer[i], now_ns + 1); + } else { + /* Effectively disable timer by scheduling far in future. */ + trace_sifive_pwm_set_alarm(0xFFFFFFFFFFFFFF, now_ns); + timer_mod(&s->timer[i], 0xFFFFFFFFFFFFFF); + } + } + } +} + +static void sifive_pwm_interrupt(SiFivePwmState *s, int num) +{ + uint64_t now = sifive_pwm_ns_to_ticks(s, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + bool was_incrementing = HAS_PWM_EN_BITS(s->pwmcfg); + + trace_sifive_pwm_interrupt(num); + + s->pwmcfg |= R_CONFIG_CMP0IP_MASK << num; + qemu_irq_raise(s->irqs[num]); + + /* + * If the zerocmp is set and pwmcmp0 raised the interrupt + * reset the zero ticks. + */ + if ((s->pwmcfg & R_CONFIG_ZEROCMP_MASK) && (num == 0)) { + /* If reset signal conditions, disable ENONESHOT. */ + s->pwmcfg &= ~R_CONFIG_ENONESHOT_MASK; + + if (was_incrementing) { + /* If incrementing, time in ticks is when pwmcount is zero */ + s->tick_offset = now; + } else { + /* If not incrementing, pwmcount = 0 */ + s->tick_offset = 0; + } + } + + /* + * If carryout bit set, which we discern via looking for overflow, + * also reset ENONESHOT. + */ + if (was_incrementing && + ((now & PWMCOUNT_MASK) < (s->tick_offset & PWMCOUNT_MASK))) { + s->pwmcfg &= ~R_CONFIG_ENONESHOT_MASK; + } + + /* Schedule or disable interrupts */ + sifive_pwm_set_alarms(s); + + /* If was enabled, and now not enabled, switch tick rep */ + if (was_incrementing && !HAS_PWM_EN_BITS(s->pwmcfg)) { + s->tick_offset = (now - s->tick_offset) & PWMCOUNT_MASK; + } +} + +static void sifive_pwm_interrupt_0(void *opaque) +{ + SiFivePwmState *s = opaque; + + sifive_pwm_interrupt(s, 0); +} + +static void sifive_pwm_interrupt_1(void *opaque) +{ + SiFivePwmState *s = opaque; + + sifive_pwm_interrupt(s, 1); +} + +static void sifive_pwm_interrupt_2(void *opaque) +{ + SiFivePwmState *s = opaque; + + sifive_pwm_interrupt(s, 2); +} + +static void sifive_pwm_interrupt_3(void *opaque) +{ + SiFivePwmState *s = opaque; + + sifive_pwm_interrupt(s, 3); +} + +static uint64_t sifive_pwm_read(void *opaque, hwaddr addr, + unsigned int size) +{ + SiFivePwmState *s = opaque; + uint64_t cur_time, scale; + uint64_t now = sifive_pwm_ns_to_ticks(s, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + + trace_sifive_pwm_read(addr); + + switch (addr) { + case A_CONFIG: + return s->pwmcfg; + case A_COUNT: + cur_time = s->tick_offset; + + if (HAS_PWM_EN_BITS(s->pwmcfg)) { + cur_time = now - cur_time; + } + + /* + * Return the value in the counter with bit 31 always 0 + * This is allowed to wrap around so we don't need to check that. + */ + return cur_time & PWMCOUNT_MASK; + case A_PWMS: + cur_time = s->tick_offset; + scale = sifive_pwm_compute_scale(s); + + if (HAS_PWM_EN_BITS(s->pwmcfg)) { + cur_time = now - cur_time; + } + + return ((cur_time & PWMCOUNT_MASK) >> scale) & PWMCMP_MASK; + case A_PWMCMP0: + return s->pwmcmp[0] & PWMCMP_MASK; + case A_PWMCMP1: + return s->pwmcmp[1] & PWMCMP_MASK; + case A_PWMCMP2: + return s->pwmcmp[2] & PWMCMP_MASK; + case A_PWMCMP3: + return s->pwmcmp[3] & PWMCMP_MASK; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + return 0; + } + + return 0; +} + +static void sifive_pwm_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + SiFivePwmState *s = opaque; + uint32_t value = val64; + uint64_t new_offset, scale; + uint64_t now = sifive_pwm_ns_to_ticks(s, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + + trace_sifive_pwm_write(value, addr); + + switch (addr) { + case A_CONFIG: + if (value & (R_CONFIG_CMP0CENTER_MASK | R_CONFIG_CMP1CENTER_MASK | + R_CONFIG_CMP2CENTER_MASK | R_CONFIG_CMP3CENTER_MASK)) { + qemu_log_mask(LOG_UNIMP, "%s: CMPxCENTER is not supported\n", + __func__); + } + + if (value & (R_CONFIG_CMP0GANG_MASK | R_CONFIG_CMP1GANG_MASK | + R_CONFIG_CMP2GANG_MASK | R_CONFIG_CMP3GANG_MASK)) { + qemu_log_mask(LOG_UNIMP, "%s: CMPxGANG is not supported\n", + __func__); + } + + if (value & (R_CONFIG_CMP0IP_MASK | R_CONFIG_CMP1IP_MASK | + R_CONFIG_CMP2IP_MASK | R_CONFIG_CMP3IP_MASK)) { + qemu_log_mask(LOG_UNIMP, "%s: CMPxIP is not supported\n", + __func__); + } + + if (!(value & R_CONFIG_CMP0IP_MASK)) { + qemu_irq_lower(s->irqs[0]); + } + + if (!(value & R_CONFIG_CMP1IP_MASK)) { + qemu_irq_lower(s->irqs[1]); + } + + if (!(value & R_CONFIG_CMP2IP_MASK)) { + qemu_irq_lower(s->irqs[2]); + } + + if (!(value & R_CONFIG_CMP3IP_MASK)) { + qemu_irq_lower(s->irqs[3]); + } + + /* + * If this write enables the timer increment + * set the time when pwmcount was zero to be cur_time - pwmcount. + * If this write disables the timer increment + * convert back from pwmcount to the time in ticks + * when pwmcount was zero. + */ + if ((!HAS_PWM_EN_BITS(s->pwmcfg) && HAS_PWM_EN_BITS(value)) || + (HAS_PWM_EN_BITS(s->pwmcfg) && !HAS_PWM_EN_BITS(value))) { + s->tick_offset = (now - s->tick_offset) & PWMCOUNT_MASK; + } + + s->pwmcfg = value; + break; + case A_COUNT: + /* The guest changed the counter, updated the offset value. */ + new_offset = value; + + if (HAS_PWM_EN_BITS(s->pwmcfg)) { + new_offset = now - new_offset; + } + + s->tick_offset = new_offset; + break; + case A_PWMS: + scale = sifive_pwm_compute_scale(s); + new_offset = (((value & PWMCMP_MASK) << scale) & PWMCOUNT_MASK); + + if (HAS_PWM_EN_BITS(s->pwmcfg)) { + new_offset = now - new_offset; + } + + s->tick_offset = new_offset; + break; + case A_PWMCMP0: + s->pwmcmp[0] = value & PWMCMP_MASK; + break; + case A_PWMCMP1: + s->pwmcmp[1] = value & PWMCMP_MASK; + break; + case A_PWMCMP2: + s->pwmcmp[2] = value & PWMCMP_MASK; + break; + case A_PWMCMP3: + s->pwmcmp[3] = value & PWMCMP_MASK; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + } + + /* Update the alarms to reflect possible updated values */ + sifive_pwm_set_alarms(s); +} + +static void sifive_pwm_reset(DeviceState *dev) +{ + SiFivePwmState *s = SIFIVE_PWM(dev); + uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + s->pwmcfg = 0x00000000; + s->pwmcmp[0] = 0x00000000; + s->pwmcmp[1] = 0x00000000; + s->pwmcmp[2] = 0x00000000; + s->pwmcmp[3] = 0x00000000; + + s->tick_offset = sifive_pwm_ns_to_ticks(s, now); +} + +static const MemoryRegionOps sifive_pwm_ops = { + .read = sifive_pwm_read, + .write = sifive_pwm_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_sifive_pwm = { + .name = TYPE_SIFIVE_PWM, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_TIMER_ARRAY(timer, SiFivePwmState, 4), + VMSTATE_UINT64(tick_offset, SiFivePwmState), + VMSTATE_UINT32(pwmcfg, SiFivePwmState), + VMSTATE_UINT32_ARRAY(pwmcmp, SiFivePwmState, 4), + VMSTATE_END_OF_LIST() + } +}; + +static Property sifive_pwm_properties[] = { + /* 0.5Ghz per spec after FSBL */ + DEFINE_PROP_UINT64("clock-frequency", struct SiFivePwmState, + freq_hz, 500000000ULL), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sifive_pwm_init(Object *obj) +{ + SiFivePwmState *s = SIFIVE_PWM(obj); + int i; + + for (i = 0; i < SIFIVE_PWM_IRQS; i++) { + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irqs[i]); + } + + memory_region_init_io(&s->mmio, obj, &sifive_pwm_ops, s, + TYPE_SIFIVE_PWM, 0x100); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); +} + +static void sifive_pwm_realize(DeviceState *dev, Error **errp) +{ + SiFivePwmState *s = SIFIVE_PWM(dev); + + timer_init_ns(&s->timer[0], QEMU_CLOCK_VIRTUAL, + sifive_pwm_interrupt_0, s); + + timer_init_ns(&s->timer[1], QEMU_CLOCK_VIRTUAL, + sifive_pwm_interrupt_1, s); + + timer_init_ns(&s->timer[2], QEMU_CLOCK_VIRTUAL, + sifive_pwm_interrupt_2, s); + + timer_init_ns(&s->timer[3], QEMU_CLOCK_VIRTUAL, + sifive_pwm_interrupt_3, s); +} + +static void sifive_pwm_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = sifive_pwm_reset; + device_class_set_props(dc, sifive_pwm_properties); + dc->vmsd = &vmstate_sifive_pwm; + dc->realize = sifive_pwm_realize; +} + +static const TypeInfo sifive_pwm_info = { + .name = TYPE_SIFIVE_PWM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SiFivePwmState), + .instance_init = sifive_pwm_init, + .class_init = sifive_pwm_class_init, +}; + +static void sifive_pwm_register_types(void) +{ + type_register_static(&sifive_pwm_info); +} + +type_init(sifive_pwm_register_types) diff --git a/hw/timer/sse-counter.c b/hw/timer/sse-counter.c index 0384051f151..16c0e8ad15d 100644 --- a/hw/timer/sse-counter.c +++ b/hw/timer/sse-counter.c @@ -33,7 +33,6 @@ #include "trace.h" #include "hw/timer/sse-counter.h" #include "hw/sysbus.h" -#include "hw/irq.h" #include "hw/registerfields.h" #include "hw/clock.h" #include "hw/qdev-clock.h" diff --git a/hw/timer/stellaris-gptm.c b/hw/timer/stellaris-gptm.c new file mode 100644 index 00000000000..fd71c79be48 --- /dev/null +++ b/hw/timer/stellaris-gptm.c @@ -0,0 +1,332 @@ +/* + * Luminary Micro Stellaris General Purpose Timer Module + * + * Copyright (c) 2006 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the GPL. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/timer.h" +#include "qapi/error.h" +#include "migration/vmstate.h" +#include "hw/qdev-clock.h" +#include "hw/timer/stellaris-gptm.h" + +static void gptm_update_irq(gptm_state *s) +{ + int level; + level = (s->state & s->mask) != 0; + qemu_set_irq(s->irq, level); +} + +static void gptm_stop(gptm_state *s, int n) +{ + timer_del(s->timer[n]); +} + +static void gptm_reload(gptm_state *s, int n, int reset) +{ + int64_t tick; + if (reset) { + tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } else { + tick = s->tick[n]; + } + + if (s->config == 0) { + /* 32-bit CountDown. */ + uint32_t count; + count = s->load[0] | (s->load[1] << 16); + tick += clock_ticks_to_ns(s->clk, count); + } else if (s->config == 1) { + /* 32-bit RTC. 1Hz tick. */ + tick += NANOSECONDS_PER_SECOND; + } else if (s->mode[n] == 0xa) { + /* PWM mode. Not implemented. */ + } else { + qemu_log_mask(LOG_UNIMP, + "GPTM: 16-bit timer mode unimplemented: 0x%x\n", + s->mode[n]); + return; + } + s->tick[n] = tick; + timer_mod(s->timer[n], tick); +} + +static void gptm_tick(void *opaque) +{ + gptm_state **p = (gptm_state **)opaque; + gptm_state *s; + int n; + + s = *p; + n = p - s->opaque; + if (s->config == 0) { + s->state |= 1; + if ((s->control & 0x20)) { + /* Output trigger. */ + qemu_irq_pulse(s->trigger); + } + if (s->mode[0] & 1) { + /* One-shot. */ + s->control &= ~1; + } else { + /* Periodic. */ + gptm_reload(s, 0, 0); + } + } else if (s->config == 1) { + /* RTC. */ + uint32_t match; + s->rtc++; + match = s->match[0] | (s->match[1] << 16); + if (s->rtc > match) + s->rtc = 0; + if (s->rtc == 0) { + s->state |= 8; + } + gptm_reload(s, 0, 0); + } else if (s->mode[n] == 0xa) { + /* PWM mode. Not implemented. */ + } else { + qemu_log_mask(LOG_UNIMP, + "GPTM: 16-bit timer mode unimplemented: 0x%x\n", + s->mode[n]); + } + gptm_update_irq(s); +} + +static uint64_t gptm_read(void *opaque, hwaddr offset, + unsigned size) +{ + gptm_state *s = (gptm_state *)opaque; + + switch (offset) { + case 0x00: /* CFG */ + return s->config; + case 0x04: /* TAMR */ + return s->mode[0]; + case 0x08: /* TBMR */ + return s->mode[1]; + case 0x0c: /* CTL */ + return s->control; + case 0x18: /* IMR */ + return s->mask; + case 0x1c: /* RIS */ + return s->state; + case 0x20: /* MIS */ + return s->state & s->mask; + case 0x24: /* CR */ + return 0; + case 0x28: /* TAILR */ + return s->load[0] | ((s->config < 4) ? (s->load[1] << 16) : 0); + case 0x2c: /* TBILR */ + return s->load[1]; + case 0x30: /* TAMARCHR */ + return s->match[0] | ((s->config < 4) ? (s->match[1] << 16) : 0); + case 0x34: /* TBMATCHR */ + return s->match[1]; + case 0x38: /* TAPR */ + return s->prescale[0]; + case 0x3c: /* TBPR */ + return s->prescale[1]; + case 0x40: /* TAPMR */ + return s->match_prescale[0]; + case 0x44: /* TBPMR */ + return s->match_prescale[1]; + case 0x48: /* TAR */ + if (s->config == 1) { + return s->rtc; + } + qemu_log_mask(LOG_UNIMP, + "GPTM: read of TAR but timer read not supported\n"); + return 0; + case 0x4c: /* TBR */ + qemu_log_mask(LOG_UNIMP, + "GPTM: read of TBR but timer read not supported\n"); + return 0; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "GPTM: read at bad offset 0x02%" HWADDR_PRIx "\n", + offset); + return 0; + } +} + +static void gptm_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + gptm_state *s = (gptm_state *)opaque; + uint32_t oldval; + + /* + * The timers should be disabled before changing the configuration. + * We take advantage of this and defer everything until the timer + * is enabled. + */ + switch (offset) { + case 0x00: /* CFG */ + s->config = value; + break; + case 0x04: /* TAMR */ + s->mode[0] = value; + break; + case 0x08: /* TBMR */ + s->mode[1] = value; + break; + case 0x0c: /* CTL */ + oldval = s->control; + s->control = value; + /* TODO: Implement pause. */ + if ((oldval ^ value) & 1) { + if (value & 1) { + gptm_reload(s, 0, 1); + } else { + gptm_stop(s, 0); + } + } + if (((oldval ^ value) & 0x100) && s->config >= 4) { + if (value & 0x100) { + gptm_reload(s, 1, 1); + } else { + gptm_stop(s, 1); + } + } + break; + case 0x18: /* IMR */ + s->mask = value & 0x77; + gptm_update_irq(s); + break; + case 0x24: /* CR */ + s->state &= ~value; + break; + case 0x28: /* TAILR */ + s->load[0] = value & 0xffff; + if (s->config < 4) { + s->load[1] = value >> 16; + } + break; + case 0x2c: /* TBILR */ + s->load[1] = value & 0xffff; + break; + case 0x30: /* TAMARCHR */ + s->match[0] = value & 0xffff; + if (s->config < 4) { + s->match[1] = value >> 16; + } + break; + case 0x34: /* TBMATCHR */ + s->match[1] = value >> 16; + break; + case 0x38: /* TAPR */ + s->prescale[0] = value; + break; + case 0x3c: /* TBPR */ + s->prescale[1] = value; + break; + case 0x40: /* TAPMR */ + s->match_prescale[0] = value; + break; + case 0x44: /* TBPMR */ + s->match_prescale[0] = value; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "GPTM: write at bad offset 0x02%" HWADDR_PRIx "\n", + offset); + } + gptm_update_irq(s); +} + +static const MemoryRegionOps gptm_ops = { + .read = gptm_read, + .write = gptm_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_stellaris_gptm = { + .name = "stellaris_gptm", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_UINT32(config, gptm_state), + VMSTATE_UINT32_ARRAY(mode, gptm_state, 2), + VMSTATE_UINT32(control, gptm_state), + VMSTATE_UINT32(state, gptm_state), + VMSTATE_UINT32(mask, gptm_state), + VMSTATE_UNUSED(8), + VMSTATE_UINT32_ARRAY(load, gptm_state, 2), + VMSTATE_UINT32_ARRAY(match, gptm_state, 2), + VMSTATE_UINT32_ARRAY(prescale, gptm_state, 2), + VMSTATE_UINT32_ARRAY(match_prescale, gptm_state, 2), + VMSTATE_UINT32(rtc, gptm_state), + VMSTATE_INT64_ARRAY(tick, gptm_state, 2), + VMSTATE_TIMER_PTR_ARRAY(timer, gptm_state, 2), + VMSTATE_CLOCK(clk, gptm_state), + VMSTATE_END_OF_LIST() + } +}; + +static void stellaris_gptm_init(Object *obj) +{ + DeviceState *dev = DEVICE(obj); + gptm_state *s = STELLARIS_GPTM(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + sysbus_init_irq(sbd, &s->irq); + qdev_init_gpio_out(dev, &s->trigger, 1); + + memory_region_init_io(&s->iomem, obj, &gptm_ops, s, + "gptm", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + + s->opaque[0] = s->opaque[1] = s; + + /* + * TODO: in an ideal world we would model the effects of changing + * the input clock frequency while the countdown timer is active. + * The best way to do this would be to convert the device to use + * ptimer instead of hand-rolling its own timer. This would also + * make it easy to implement reading the current count from the + * TAR and TBR registers. + */ + s->clk = qdev_init_clock_in(dev, "clk", NULL, NULL, 0); +} + +static void stellaris_gptm_realize(DeviceState *dev, Error **errp) +{ + gptm_state *s = STELLARIS_GPTM(dev); + + if (!clock_has_source(s->clk)) { + error_setg(errp, "stellaris-gptm: clk must be connected"); + return; + } + + s->timer[0] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[0]); + s->timer[1] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[1]); +} + +static void stellaris_gptm_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_stellaris_gptm; + dc->realize = stellaris_gptm_realize; +} + +static const TypeInfo stellaris_gptm_info = { + .name = TYPE_STELLARIS_GPTM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(gptm_state), + .instance_init = stellaris_gptm_init, + .class_init = stellaris_gptm_class_init, +}; + +static void stellaris_gptm_register_types(void) +{ + type_register_static(&stellaris_gptm_info); +} + +type_init(stellaris_gptm_register_types) diff --git a/hw/timer/trace-events b/hw/timer/trace-events index f8b9db25c27..3eccef83858 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # slavio_timer.c slavio_timer_get_out(uint64_t limit, uint32_t counthigh, uint32_t count) "limit 0x%"PRIx64" count 0x%x0x%08x" @@ -24,23 +24,6 @@ grlib_gptimer_hit(int id) "timer:%d HIT" grlib_gptimer_readl(int id, uint64_t addr, uint32_t val) "timer:%d addr 0x%"PRIx64" 0x%x" grlib_gptimer_writel(int id, uint64_t addr, uint32_t val) "timer:%d addr 0x%"PRIx64" 0x%x" -# lm32_timer.c -lm32_timer_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -lm32_timer_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -lm32_timer_hit(void) "timer hit" -lm32_timer_irq_state(int level) "irq state %d" - -# milkymist-sysctl.c -milkymist_sysctl_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_sysctl_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" -milkymist_sysctl_icap_write(uint32_t value) "value 0x%08x" -milkymist_sysctl_start_timer0(void) "Start timer0" -milkymist_sysctl_stop_timer0(void) "Stop timer0" -milkymist_sysctl_start_timer1(void) "Start timer1" -milkymist_sysctl_stop_timer1(void) "Stop timer1" -milkymist_sysctl_pulse_irq_timer0(void) "Pulse IRQ Timer0" -milkymist_sysctl_pulse_irq_timer1(void) "Pulse IRQ Timer1" - # aspeed_timer.c aspeed_timer_ctrl_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d" aspeed_timer_ctrl_external_clock(uint8_t i, bool enable) "Timer %" PRIu8 ": %d" @@ -105,3 +88,14 @@ sse_counter_reset(void) "SSE system counter: reset" sse_timer_read(uint64_t offset, uint64_t data, unsigned size) "SSE system timer read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" sse_timer_write(uint64_t offset, uint64_t data, unsigned size) "SSE system timer write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" sse_timer_reset(void) "SSE system timer: reset" + +# sifive_pwm.c +sifive_pwm_set_alarm(uint64_t alarm, uint64_t now) "Setting alarm to: 0x%" PRIx64 ", now: 0x%" PRIx64 +sifive_pwm_interrupt(int num) "Interrupt %d" +sifive_pwm_read(uint64_t offset) "Read at address: 0x%" PRIx64 +sifive_pwm_write(uint64_t data, uint64_t offset) "Write 0x%" PRIx64 " at address: 0x%" PRIx64 + +# sh_timer.c +sh_timer_start_stop(int enable, int current) "%d (%d)" +sh_timer_read(uint64_t offset) "tmu012_read 0x%" PRIx64 +sh_timer_write(uint64_t offset, uint64_t value) "tmu012_write 0x%" PRIx64 " 0x%08" PRIx64 diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c index aa9c00aad3a..58ebd1469c3 100644 --- a/hw/tpm/tpm_crb.c +++ b/hw/tpm/tpm_crb.c @@ -18,7 +18,6 @@ #include "qemu/module.h" #include "qapi/error.h" -#include "exec/address-spaces.h" #include "hw/qdev-properties.h" #include "hw/pci/pci_ids.h" #include "hw/acpi/tpm.h" diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c index 72d7a3d9260..274e9aa4b01 100644 --- a/hw/tpm/tpm_ppi.c +++ b/hw/tpm/tpm_ppi.c @@ -23,18 +23,21 @@ void tpm_ppi_reset(TPMPPI *tpmppi) { - if (tpmppi->buf[0x15a /* movv, docs/specs/tpm.txt */] & 0x1) { + if (tpmppi->buf[0x15a /* movv, docs/specs/tpm.rst */] & 0x1) { GuestPhysBlockList guest_phys_blocks; GuestPhysBlock *block; guest_phys_blocks_init(&guest_phys_blocks); guest_phys_blocks_append(&guest_phys_blocks); QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) { + hwaddr mr_offs = block->host_addr - + (uint8_t *)memory_region_get_ram_ptr(block->mr); + trace_tpm_ppi_memset(block->host_addr, block->target_end - block->target_start); memset(block->host_addr, 0, block->target_end - block->target_start); - memory_region_set_dirty(block->mr, 0, + memory_region_set_dirty(block->mr, mr_offs, block->target_end - block->target_start); } guest_phys_blocks_free(&guest_phys_blocks); diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events index 6005ecb5dae..f17110458e6 100644 --- a/hw/tpm/trace-events +++ b/hw/tpm/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # tpm_crb.c tpm_crb_mmio_read(uint64_t addr, unsigned size, uint32_t val) "CRB read 0x%016" PRIx64 " len:%u val: 0x%" PRIx32 diff --git a/hw/tricore/Kconfig b/hw/tricore/Kconfig index 506e6183c17..33c1e852c33 100644 --- a/hw/tricore/Kconfig +++ b/hw/tricore/Kconfig @@ -1,9 +1,8 @@ -config TRICORE +config TRICORE_TESTBOARD bool config TRIBOARD bool - select TRICORE select TC27X_SOC config TC27X_SOC diff --git a/hw/tricore/meson.build b/hw/tricore/meson.build index 77ff6fd1371..7e3585daf8f 100644 --- a/hw/tricore/meson.build +++ b/hw/tricore/meson.build @@ -1,5 +1,6 @@ tricore_ss = ss.source_set() -tricore_ss.add(when: 'CONFIG_TRICORE', if_true: files('tricore_testboard.c')) +tricore_ss.add(when: 'CONFIG_TRICORE_TESTBOARD', if_true: files('tricore_testboard.c')) +tricore_ss.add(when: 'CONFIG_TRICORE_TESTBOARD', if_true: files('tricore_testdevice.c')) tricore_ss.add(when: 'CONFIG_TRIBOARD', if_true: files('triboard.c')) tricore_ss.add(when: 'CONFIG_TC27X_SOC', if_true: files('tc27x_soc.c')) diff --git a/hw/tricore/tc27x_soc.c b/hw/tricore/tc27x_soc.c index 8af079e6b25..ecd92717b50 100644 --- a/hw/tricore/tc27x_soc.c +++ b/hw/tricore/tc27x_soc.c @@ -21,13 +21,9 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/sysbus.h" -#include "hw/boards.h" #include "hw/loader.h" #include "qemu/units.h" #include "hw/misc/unimp.h" -#include "exec/address-spaces.h" -#include "qemu/log.h" -#include "cpu.h" #include "hw/tricore/tc27x_soc.h" #include "hw/tricore/triboard.h" diff --git a/hw/tricore/triboard.c b/hw/tricore/triboard.c index 16e2fd7e27e..4dba0259cd3 100644 --- a/hw/tricore/triboard.c +++ b/hw/tricore/triboard.c @@ -22,11 +22,8 @@ #include "qemu/units.h" #include "qapi/error.h" #include "hw/qdev-properties.h" -#include "cpu.h" #include "net/net.h" -#include "hw/boards.h" #include "hw/loader.h" -#include "exec/address-spaces.h" #include "elf.h" #include "hw/tricore/tricore.h" #include "qemu/error-report.h" diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c index 12ea1490fde..b6810e3be05 100644 --- a/hw/tricore/tricore_testboard.c +++ b/hw/tricore/tricore_testboard.c @@ -25,9 +25,9 @@ #include "net/net.h" #include "hw/boards.h" #include "hw/loader.h" -#include "exec/address-spaces.h" #include "elf.h" #include "hw/tricore/tricore.h" +#include "hw/tricore/tricore_testdevice.h" #include "qemu/error-report.h" @@ -57,6 +57,7 @@ static void tricore_testboard_init(MachineState *machine, int board_id) { TriCoreCPU *cpu; CPUTriCoreState *env; + TriCoreTestDeviceState *test_dev; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ext_cram = g_new(MemoryRegion, 1); @@ -88,6 +89,12 @@ static void tricore_testboard_init(MachineState *machine, int board_id) memory_region_add_subregion(sysmem, 0xf0050000, pcp_data); memory_region_add_subregion(sysmem, 0xf0060000, pcp_text); + test_dev = g_new(TriCoreTestDeviceState, 1); + object_initialize(test_dev, sizeof(TriCoreTestDeviceState), + TYPE_TRICORE_TESTDEVICE); + memory_region_add_subregion(sysmem, 0xf0000000, &test_dev->iomem); + + tricoretb_binfo.ram_size = machine->ram_size; tricoretb_binfo.kernel_filename = machine->kernel_filename; diff --git a/hw/tricore/tricore_testdevice.c b/hw/tricore/tricore_testdevice.c new file mode 100644 index 00000000000..a1563aa5689 --- /dev/null +++ b/hw/tricore/tricore_testdevice.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018-2021 Bastian Koppelmann Paderborn University + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "hw/qdev-properties.h" +#include "hw/tricore/tricore_testdevice.h" + +static void tricore_testdevice_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + exit(value); +} + +static uint64_t tricore_testdevice_read(void *opaque, hwaddr offset, + unsigned size) +{ + return 0xdeadbeef; +} + +static void tricore_testdevice_reset(DeviceState *dev) +{ +} + +static const MemoryRegionOps tricore_testdevice_ops = { + .read = tricore_testdevice_read, + .write = tricore_testdevice_write, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void tricore_testdevice_init(Object *obj) +{ + TriCoreTestDeviceState *s = TRICORE_TESTDEVICE(obj); + /* map memory */ + memory_region_init_io(&s->iomem, OBJECT(s), &tricore_testdevice_ops, s, + "tricore_testdevice", 0x4); +} + +static Property tricore_testdevice_properties[] = { + DEFINE_PROP_END_OF_LIST() +}; + +static void tricore_testdevice_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, tricore_testdevice_properties); + dc->reset = tricore_testdevice_reset; +} + +static const TypeInfo tricore_testdevice_info = { + .name = TYPE_TRICORE_TESTDEVICE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(TriCoreTestDeviceState), + .instance_init = tricore_testdevice_init, + .class_init = tricore_testdevice_class_init, +}; + +static void tricore_testdevice_register_types(void) +{ + type_register_static(&tricore_testdevice_info); +} + +type_init(tricore_testdevice_register_types) diff --git a/hw/unicore32/Kconfig b/hw/unicore32/Kconfig deleted file mode 100644 index 4443a29dd29..00000000000 --- a/hw/unicore32/Kconfig +++ /dev/null @@ -1,5 +0,0 @@ -config PUV3 - bool - select ISA_BUS - select PCKBD - select PTIMER diff --git a/hw/unicore32/meson.build b/hw/unicore32/meson.build deleted file mode 100644 index fc26d6bcabe..00000000000 --- a/hw/unicore32/meson.build +++ /dev/null @@ -1,5 +0,0 @@ -unicore32_ss = ss.source_set() -# PKUnity-v3 SoC and board information -unicore32_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3.c')) - -hw_arch += {'unicore32': unicore32_ss} diff --git a/hw/unicore32/puv3.c b/hw/unicore32/puv3.c deleted file mode 100644 index eacacb4249b..00000000000 --- a/hw/unicore32/puv3.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Generic PKUnity SoC machine and board descriptor - * - * Copyright (C) 2010-2012 Guan Xuetao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation, or any later version. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "cpu.h" -#include "ui/console.h" -#include "hw/boards.h" -#include "hw/loader.h" -#include "sysemu/qtest.h" -#include "hw/unicore32/puv3.h" -#include "hw/input/i8042.h" -#include "hw/irq.h" - -#define KERNEL_LOAD_ADDR 0x03000000 -#define KERNEL_MAX_SIZE 0x00800000 /* Just a guess */ - -/* PKUnity System bus (AHB): 0xc0000000 - 0xedffffff (640MB) */ -#define PUV3_DMA_BASE (0xc0200000) /* AHB-4 */ - -/* PKUnity Peripheral bus (APB): 0xee000000 - 0xefffffff (128MB) */ -#define PUV3_GPIO_BASE (0xee500000) /* APB-5 */ -#define PUV3_INTC_BASE (0xee600000) /* APB-6 */ -#define PUV3_OST_BASE (0xee800000) /* APB-8 */ -#define PUV3_PM_BASE (0xeea00000) /* APB-10 */ -#define PUV3_PS2_BASE (0xeeb00000) /* APB-11 */ - -static void puv3_intc_cpu_handler(void *opaque, int irq, int level) -{ - UniCore32CPU *cpu = opaque; - CPUState *cs = CPU(cpu); - - assert(irq == 0); - if (level) { - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } -} - -static void puv3_soc_init(CPUUniCore32State *env) -{ - qemu_irq cpu_intc, irqs[PUV3_IRQS_NR]; - DeviceState *dev; - MemoryRegion *i8042 = g_new(MemoryRegion, 1); - int i; - - /* Initialize interrupt controller */ - cpu_intc = qemu_allocate_irq(puv3_intc_cpu_handler, - env_archcpu(env), 0); - dev = sysbus_create_simple("puv3_intc", PUV3_INTC_BASE, cpu_intc); - for (i = 0; i < PUV3_IRQS_NR; i++) { - irqs[i] = qdev_get_gpio_in(dev, i); - } - - /* Initialize minimal necessary devices for kernel booting */ - sysbus_create_simple("puv3_pm", PUV3_PM_BASE, NULL); - sysbus_create_simple("puv3_dma", PUV3_DMA_BASE, NULL); - sysbus_create_simple("puv3_ost", PUV3_OST_BASE, irqs[PUV3_IRQS_OST0]); - sysbus_create_varargs("puv3_gpio", PUV3_GPIO_BASE, - irqs[PUV3_IRQS_GPIOLOW0], irqs[PUV3_IRQS_GPIOLOW1], - irqs[PUV3_IRQS_GPIOLOW2], irqs[PUV3_IRQS_GPIOLOW3], - irqs[PUV3_IRQS_GPIOLOW4], irqs[PUV3_IRQS_GPIOLOW5], - irqs[PUV3_IRQS_GPIOLOW6], irqs[PUV3_IRQS_GPIOLOW7], - irqs[PUV3_IRQS_GPIOHIGH], NULL); - - /* Keyboard (i8042), mouse disabled for nographic */ - i8042_mm_init(irqs[PUV3_IRQS_PS2_KBD], NULL, i8042, PUV3_REGS_OFFSET, 4); - memory_region_add_subregion(get_system_memory(), PUV3_PS2_BASE, i8042); -} - -static void puv3_board_init(CPUUniCore32State *env, ram_addr_t ram_size) -{ - MemoryRegion *ram_memory = g_new(MemoryRegion, 1); - - /* SDRAM at address zero. */ - memory_region_init_ram(ram_memory, NULL, "puv3.ram", ram_size, - &error_fatal); - memory_region_add_subregion(get_system_memory(), 0, ram_memory); -} - -static const GraphicHwOps no_ops; - -static void puv3_load_kernel(const char *kernel_filename) -{ - int size; - - if (kernel_filename == NULL && qtest_enabled()) { - return; - } - if (kernel_filename == NULL) { - error_report("kernel parameter cannot be empty"); - exit(1); - } - - /* only zImage format supported */ - size = load_image_targphys(kernel_filename, KERNEL_LOAD_ADDR, - KERNEL_MAX_SIZE); - if (size < 0) { - error_report("Load kernel error: '%s'", kernel_filename); - exit(1); - } - - /* cheat curses that we have a graphic console, only under ocd console */ - graphic_console_init(NULL, 0, &no_ops, NULL); -} - -static void puv3_init(MachineState *machine) -{ - ram_addr_t ram_size = machine->ram_size; - const char *kernel_filename = machine->kernel_filename; - const char *initrd_filename = machine->initrd_filename; - CPUUniCore32State *env; - UniCore32CPU *cpu; - - if (initrd_filename) { - error_report("Please use kernel built-in initramdisk"); - exit(1); - } - - cpu = UNICORE32_CPU(cpu_create(machine->cpu_type)); - env = &cpu->env; - - puv3_soc_init(env); - puv3_board_init(env, ram_size); - puv3_load_kernel(kernel_filename); -} - -static void puv3_machine_init(MachineClass *mc) -{ - mc->desc = "PKUnity Version-3 based on UniCore32"; - mc->init = puv3_init; - mc->is_default = true; - mc->default_cpu_type = UNICORE32_CPU_TYPE_NAME("UniCore-II"); -} - -DEFINE_MACHINE("puv3", puv3_machine_init) diff --git a/hw/usb/bus.c b/hw/usb/bus.c index 07083349f51..92d6ed56261 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -2,6 +2,8 @@ #include "hw/qdev-properties.h" #include "hw/usb.h" #include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/type-helpers.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "sysemu/sysemu.h" @@ -82,7 +84,7 @@ const VMStateDescription vmstate_usb_device = { void usb_bus_new(USBBus *bus, size_t bus_size, USBBusOps *ops, DeviceState *host) { - qbus_create_inplace(bus, bus_size, TYPE_USB_BUS, host, NULL); + qbus_init(bus, bus_size, TYPE_USB_BUS, host, NULL); qbus_set_bus_hotplug_handler(BUS(bus)); bus->ops = ops; bus->busnr = next_usb_bus++; @@ -631,15 +633,16 @@ static char *usb_get_fw_dev_path(DeviceState *qdev) return fw_path; } -void hmp_info_usb(Monitor *mon, const QDict *qdict) +HumanReadableText *qmp_x_query_usb(Error **errp) { + g_autoptr(GString) buf = g_string_new(""); USBBus *bus; USBDevice *dev; USBPort *port; if (QTAILQ_EMPTY(&busses)) { - monitor_printf(mon, "USB support not enabled\n"); - return; + error_setg(errp, "USB support not enabled"); + return NULL; } QTAILQ_FOREACH(bus, &busses, next) { @@ -647,14 +650,17 @@ void hmp_info_usb(Monitor *mon, const QDict *qdict) dev = port->dev; if (!dev) continue; - monitor_printf(mon, " Device %d.%d, Port %s, Speed %s Mb/s, " - "Product %s%s%s\n", - bus->busnr, dev->addr, port->path, - usb_speed(dev->speed), dev->product_desc, - dev->qdev.id ? ", ID: " : "", - dev->qdev.id ?: ""); + g_string_append_printf(buf, + " Device %d.%d, Port %s, Speed %s Mb/s, " + "Product %s%s%s\n", + bus->busnr, dev->addr, port->path, + usb_speed(dev->speed), dev->product_desc, + dev->qdev.id ? ", ID: " : "", + dev->qdev.id ?: ""); } } + + return human_readable_text_from_str(buf); } /* handle legacy -usbdevice cmd line option */ diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c index 5c76bed77aa..6c8c0355e09 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c @@ -612,6 +612,7 @@ static const TypeInfo emulated_card_info = { .instance_size = sizeof(EmulatedState), .class_init = emulated_class_initfn, }; +module_obj(TYPE_EMULATED_CCID); static void ccid_card_emulated_register_types(void) { diff --git a/hw/usb/ccid-card-passthru.c b/hw/usb/ccid-card-passthru.c index c1a90fcc7a5..fa3040fb715 100644 --- a/hw/usb/ccid-card-passthru.c +++ b/hw/usb/ccid-card-passthru.c @@ -374,7 +374,7 @@ static void passthru_realize(CCIDCardState *base, Error **errp) card->atr_length = sizeof(DEFAULT_ATR); } -static VMStateDescription passthru_vmstate = { +static const VMStateDescription passthru_vmstate = { .name = "ccid-card-passthru", .version_id = 1, .minimum_version_id = 1, @@ -414,6 +414,7 @@ static const TypeInfo passthru_card_info = { .instance_size = sizeof(PassthruState), .class_init = passthru_class_initfn, }; +module_obj(TYPE_CCID_PASSTHRU); static void ccid_card_passthru_register_types(void) { diff --git a/hw/usb/chipidea.c b/hw/usb/chipidea.c index 3dcd22ccba8..b1c85404d6f 100644 --- a/hw/usb/chipidea.c +++ b/hw/usb/chipidea.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "hw/usb/hcd-ehci.h" #include "hw/usb/chipidea.h" -#include "qemu/log.h" #include "qemu/module.h" enum { diff --git a/hw/usb/combined-packet.c b/hw/usb/combined-packet.c index 5d57e883dcb..e56802f89a3 100644 --- a/hw/usb/combined-packet.c +++ b/hw/usb/combined-packet.c @@ -171,7 +171,9 @@ void usb_ep_combine_input_packets(USBEndpoint *ep) if ((p->iov.size % ep->max_packet_size) != 0 || !p->short_not_ok || next == NULL || /* Work around for Linux usbfs bulk splitting + migration */ - (totalsize == (16 * KiB - 36) && p->int_req)) { + (totalsize == (16 * KiB - 36) && p->int_req) || + /* Next package may grow combined package over 1MiB */ + totalsize > 1 * MiB - ep->max_packet_size) { usb_device_handle_data(ep->dev, first); assert(first->status == USB_RET_ASYNC); if (first->combined) { diff --git a/hw/usb/desc-msos.c b/hw/usb/desc-msos.c index 3a5ad7c8d0f..c72c65b650c 100644 --- a/hw/usb/desc-msos.c +++ b/hw/usb/desc-msos.c @@ -5,12 +5,12 @@ /* * Microsoft OS Descriptors * - * Windows tries to fetch some special descriptors with informations + * Windows tries to fetch some special descriptors with information * specifically for windows. Presence is indicated using a special * string @ index 0xee. There are two kinds of descriptors: * * compatid descriptor - * Used to bind drivers, if usb class isn't specific enougth. + * Used to bind drivers, if usb class isn't specific enough. * Used for PTP/MTP for example (both share the same usb class). * * properties descriptor @@ -23,7 +23,7 @@ * HLM\SYSTEM\CurrentControlSet\Control\usbflags * HLM\SYSTEM\CurrentControlSet\Enum\USB * Windows will complain it can't delete entries on the second one. - * It has deleted everything it had permissions too, which is enouth + * It has deleted everything it had permissions too, which is enough * as this includes "Device Parameters". * * http://msdn.microsoft.com/en-us/library/windows/hardware/ff537430.aspx @@ -181,7 +181,7 @@ static int usb_desc_msos_prop(const USBDesc *desc, uint8_t *dest) if (desc->msos->Label) { /* - * Given as example in the specs. Havn't figured yet where + * Given as example in the specs. Haven't figured yet where * this label shows up in the windows gui. */ length += usb_desc_msos_prop_str(dest+length, MSOS_REG_SZ, @@ -192,8 +192,8 @@ static int usb_desc_msos_prop(const USBDesc *desc, uint8_t *dest) if (desc->msos->SelectiveSuspendEnabled) { /* * Signaling remote wakeup capability in the standard usb - * descriptors isn't enouth to make windows actually use it. - * This is the "Yes, we really mean it" registy entry to flip + * descriptors isn't enough to make windows actually use it. + * This is the "Yes, we really mean it" registry entry to flip * the switch in the windows drivers. */ length += usb_desc_msos_prop_dword(dest+length, diff --git a/hw/usb/desc.h b/hw/usb/desc.h index 4d81c68e0ef..3ac604ecfa1 100644 --- a/hw/usb/desc.h +++ b/hw/usb/desc.h @@ -133,7 +133,7 @@ struct USBDescConfig { const USBDescIface *ifs; }; -/* conceptually an Interface Association Descriptor, and releated interfaces */ +/* conceptually an Interface Association Descriptor, and related interfaces */ struct USBDescIfaceAssoc { uint8_t bFirstInterface; uint8_t bInterfaceCount; diff --git a/hw/usb/dev-audio.c b/hw/usb/dev-audio.c index f5cb2467929..8748c1ba040 100644 --- a/hw/usb/dev-audio.c +++ b/hw/usb/dev-audio.c @@ -168,7 +168,7 @@ static const USBDescIface desc_iface[] = { STRING_FEATURE_UNIT, /* u8 iFeature */ } },{ - /* Headphone Ouptut Terminal ID3 Descriptor */ + /* Headphone Output Terminal ID3 Descriptor */ .data = (uint8_t[]) { 0x09, /* u8 bLength */ USB_DT_CS_INTERFACE, /* u8 bDescriptorType */ @@ -332,7 +332,7 @@ static const USBDescIface desc_iface_multi[] = { STRING_FEATURE_UNIT, /* u8 iFeature */ } },{ - /* Headphone Ouptut Terminal ID3 Descriptor */ + /* Headphone Output Terminal ID3 Descriptor */ .data = (uint8_t[]) { 0x09, /* u8 bLength */ USB_DT_CS_INTERFACE, /* u8 bDescriptorType */ diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c index fc39bab79f9..1c7ae97c303 100644 --- a/hw/usb/dev-hid.c +++ b/hw/usb/dev-hid.c @@ -656,7 +656,7 @@ static void usb_hid_handle_data(USBDevice *dev, USBPacket *p) { USBHIDState *us = USB_HID(dev); HIDState *hs = &us->hid; - uint8_t buf[p->iov.size]; + g_autofree uint8_t *buf = g_malloc(p->iov.size); int len = 0; switch (p->pid) { diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c index bbb82743448..c1d1694fd0b 100644 --- a/hw/usb/dev-mtp.c +++ b/hw/usb/dev-mtp.c @@ -772,12 +772,9 @@ static void usb_mtp_add_str(MTPData *data, const char *str) static void usb_mtp_add_time(MTPData *data, time_t time) { - char buf[16]; - struct tm tm; - - gmtime_r(&time, &tm); - strftime(buf, sizeof(buf), "%Y%m%dT%H%M%S", &tm); - usb_mtp_add_str(data, buf); + g_autoptr(GDateTime) then = g_date_time_new_from_unix_utc(time); + g_autofree char *thenstr = g_date_time_format(then, "%Y%m%dT%H%M%S"); + usb_mtp_add_str(data, thenstr); } /* ----------------------------------------------------------------------- */ @@ -907,7 +904,8 @@ static MTPData *usb_mtp_get_object_handles(MTPState *s, MTPControl *c, MTPObject *o) { MTPData *d = usb_mtp_data_alloc(c); - uint32_t i = 0, handles[o->nchildren]; + uint32_t i = 0; + g_autofree uint32_t *handles = g_new(uint32_t, o->nchildren); MTPObject *iter; trace_usb_mtp_op_get_object_handles(s->dev.addr, o->handle, o->path); diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c index bc3d94092a2..91ffd9f8ae8 100644 --- a/hw/usb/dev-smartcard-reader.c +++ b/hw/usb/dev-smartcard-reader.c @@ -1320,8 +1320,7 @@ static void ccid_realize(USBDevice *dev, Error **errp) usb_desc_create_serial(dev); usb_desc_init(dev); - qbus_create_inplace(&s->bus, sizeof(s->bus), TYPE_CCID_BUS, DEVICE(dev), - NULL); + qbus_init(&s->bus, sizeof(s->bus), TYPE_CCID_BUS, DEVICE(dev), NULL); qbus_set_hotplug_handler(BUS(&s->bus), OBJECT(dev)); s->intr = usb_ep_get(dev, USB_TOKEN_IN, CCID_INT_IN_EP); s->bulk = usb_ep_get(dev, USB_TOKEN_IN, CCID_BULK_IN_EP); @@ -1365,7 +1364,7 @@ static int ccid_pre_save(void *opaque) return 0; } -static VMStateDescription bulk_in_vmstate = { +static const VMStateDescription bulk_in_vmstate = { .name = "CCID BulkIn state", .version_id = 1, .minimum_version_id = 1, @@ -1377,7 +1376,7 @@ static VMStateDescription bulk_in_vmstate = { } }; -static VMStateDescription answer_vmstate = { +static const VMStateDescription answer_vmstate = { .name = "CCID Answer state", .version_id = 1, .minimum_version_id = 1, @@ -1388,7 +1387,7 @@ static VMStateDescription answer_vmstate = { } }; -static VMStateDescription usb_device_vmstate = { +static const VMStateDescription usb_device_vmstate = { .name = "usb_device", .version_id = 1, .minimum_version_id = 1, @@ -1400,7 +1399,7 @@ static VMStateDescription usb_device_vmstate = { } }; -static VMStateDescription ccid_vmstate = { +static const VMStateDescription ccid_vmstate = { .name = "usb-ccid", .version_id = 1, .minimum_version_id = 1, diff --git a/hw/usb/dev-storage-bot.c b/hw/usb/dev-storage-bot.c index 6aad026d113..b24b3148c28 100644 --- a/hw/usb/dev-storage-bot.c +++ b/hw/usb/dev-storage-bot.c @@ -32,12 +32,12 @@ static void usb_msd_bot_realize(USBDevice *dev, Error **errp) usb_desc_create_serial(dev); usb_desc_init(dev); + dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); if (d->hotplugged) { s->dev.auto_attach = 0; } - scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev), - &usb_msd_scsi_info_bot, NULL); + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &usb_msd_scsi_info_bot); usb_msd_handle_reset(dev); } diff --git a/hw/usb/dev-storage-classic.c b/hw/usb/dev-storage-classic.c index 00cb34b22f0..00f25bade28 100644 --- a/hw/usb/dev-storage-classic.c +++ b/hw/usb/dev-storage-classic.c @@ -64,8 +64,9 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) usb_desc_create_serial(dev); usb_desc_init(dev); - scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev), - &usb_msd_scsi_info_storage, NULL); + dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), + &usb_msd_scsi_info_storage); scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable, s->conf.bootindex, s->conf.share_rw, s->conf.rerror, s->conf.werror, diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c index d2bd85d3f6b..599d6b52a01 100644 --- a/hw/usb/dev-uas.c +++ b/hw/usb/dev-uas.c @@ -840,6 +840,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p) } break; case UAS_PIPE_ID_STATUS: + if (p->stream > UAS_MAX_STREAMS) { + goto err_stream; + } if (p->stream) { QTAILQ_FOREACH(st, &uas->results, next) { if (st->stream == p->stream) { @@ -867,6 +870,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p) break; case UAS_PIPE_ID_DATA_IN: case UAS_PIPE_ID_DATA_OUT: + if (p->stream > UAS_MAX_STREAMS) { + goto err_stream; + } if (p->stream) { req = usb_uas_find_request(uas, p->stream); } else { @@ -902,6 +908,11 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p) p->status = USB_RET_STALL; break; } + +err_stream: + error_report("%s: invalid stream %d", __func__, p->stream); + p->status = USB_RET_STALL; + return; } static void usb_uas_unrealize(USBDevice *dev) @@ -926,8 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) QTAILQ_INIT(&uas->requests); uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); - scsi_bus_new(&uas->bus, sizeof(uas->bus), DEVICE(dev), - &usb_uas_scsi_info, NULL); + dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); + scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); } static const VMStateDescription vmstate_usb_uas = { diff --git a/hw/usb/dev-wacom.c b/hw/usb/dev-wacom.c index b5950486350..ed687bc9f1e 100644 --- a/hw/usb/dev-wacom.c +++ b/hw/usb/dev-wacom.c @@ -301,7 +301,7 @@ static void usb_wacom_handle_control(USBDevice *dev, USBPacket *p, static void usb_wacom_handle_data(USBDevice *dev, USBPacket *p) { USBWacomState *s = (USBWacomState *) dev; - uint8_t buf[p->iov.size]; + g_autofree uint8_t *buf = g_malloc(p->iov.size); int len = 0; switch (p->pid) { diff --git a/hw/usb/hcd-dwc3.c b/hw/usb/hcd-dwc3.c index d547d0538dd..279263489e4 100644 --- a/hw/usb/hcd-dwc3.c +++ b/hw/usb/hcd-dwc3.c @@ -31,7 +31,6 @@ #include "hw/sysbus.h" #include "hw/register.h" #include "qemu/bitops.h" -#include "qemu/log.h" #include "qom/object.h" #include "migration/vmstate.h" #include "hw/qdev-properties.h" diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c index 0cb02a64321..d1b5657d722 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -31,6 +31,7 @@ #include "hw/usb/uhci-regs.h" #include "migration/vmstate.h" #include "hw/pci/pci.h" +#include "hw/irq.h" #include "hw/qdev-properties.h" #include "qapi/error.h" #include "qemu/timer.h" @@ -290,7 +291,7 @@ static UHCIAsync *uhci_async_find_td(UHCIState *s, uint32_t td_addr) static void uhci_update_irq(UHCIState *s) { - int level; + int level = 0; if (((s->status2 & 1) && (s->intr & (1 << 2))) || ((s->status2 & 2) && (s->intr & (1 << 3))) || ((s->status & UHCI_STS_USBERR) && (s->intr & (1 << 0))) || @@ -298,10 +299,8 @@ static void uhci_update_irq(UHCIState *s) (s->status & UHCI_STS_HSERR) || (s->status & UHCI_STS_HCPERR)) { level = 1; - } else { - level = 0; } - pci_set_irq(&s->dev, level); + qemu_set_irq(s->irq, level); } static void uhci_reset(DeviceState *dev) @@ -1170,9 +1169,9 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) pci_conf[PCI_CLASS_PROG] = 0x00; /* TODO: reset value should be 0. */ - pci_conf[USB_SBRN] = USB_RELEASE_1; // release number - + pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */ pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); + s->irq = pci_allocate_irq(dev); if (s->masterbus) { USBPort *ports[NB_PORTS]; @@ -1285,6 +1284,9 @@ void uhci_data_class_init(ObjectClass *klass, void *data) } else { device_class_set_props(dc, uhci_properties_standalone); } + if (info->notuser) { + dc->user_creatable = false; + } u->info = *info; } diff --git a/hw/usb/hcd-uhci.h b/hw/usb/hcd-uhci.h index e61d8fcb192..c85ab7868ee 100644 --- a/hw/usb/hcd-uhci.h +++ b/hw/usb/hcd-uhci.h @@ -60,7 +60,7 @@ typedef struct UHCIState { uint32_t frame_bandwidth; bool completions_only; UHCIPort ports[NB_PORTS]; - + qemu_irq irq; /* Interrupts that should be raised at the end of the current frame. */ uint32_t pending_int_mask; @@ -85,6 +85,7 @@ typedef struct UHCIInfo { uint8_t irq_pin; void (*realize)(PCIDevice *dev, Error **errp); bool unplug; + bool notuser; /* disallow user_creatable */ } UHCIInfo; void uhci_data_class_init(ObjectClass *klass, void *data); diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c index 9421734d0fe..e934b1a5b1f 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -57,7 +57,7 @@ static void xhci_pci_intr_update(XHCIState *xhci, int n, bool enable) } } -static void xhci_pci_intr_raise(XHCIState *xhci, int n, bool level) +static bool xhci_pci_intr_raise(XHCIState *xhci, int n, bool level) { XHCIPciState *s = container_of(xhci, XHCIPciState, xhci); PCIDevice *pci_dev = PCI_DEVICE(s); @@ -67,15 +67,18 @@ static void xhci_pci_intr_raise(XHCIState *xhci, int n, bool level) msi_enabled(pci_dev))) { pci_set_irq(pci_dev, level); } - if (msix_enabled(pci_dev)) { + + if (msix_enabled(pci_dev) && level) { msix_notify(pci_dev, n); - return; + return true; } - if (msi_enabled(pci_dev)) { + if (msi_enabled(pci_dev) && level) { msi_notify(pci_dev, n); - return; + return true; } + + return false; } static void xhci_pci_reset(DeviceState *dev) diff --git a/hw/usb/hcd-xhci-sysbus.c b/hw/usb/hcd-xhci-sysbus.c index 42e2574c829..a14e4381960 100644 --- a/hw/usb/hcd-xhci-sysbus.c +++ b/hw/usb/hcd-xhci-sysbus.c @@ -16,11 +16,13 @@ #include "hw/acpi/aml-build.h" #include "hw/irq.h" -static void xhci_sysbus_intr_raise(XHCIState *xhci, int n, bool level) +static bool xhci_sysbus_intr_raise(XHCIState *xhci, int n, bool level) { XHCISysbusState *s = container_of(xhci, XHCISysbusState, xhci); qemu_set_irq(s->irq[n], level); + + return false; } void xhci_sysbus_reset(DeviceState *dev) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 46212b1e695..e01700039b1 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -551,7 +551,9 @@ static void xhci_intr_update(XHCIState *xhci, int v) level = 1; } if (xhci->intr_raise) { - xhci->intr_raise(xhci, 0, level); + if (xhci->intr_raise(xhci, 0, level)) { + xhci->intr[0].iman &= ~IMAN_IP; + } } } if (xhci->intr_update) { @@ -579,7 +581,9 @@ static void xhci_intr_raise(XHCIState *xhci, int v) return; } if (xhci->intr_raise) { - xhci->intr_raise(xhci, v, true); + if (xhci->intr_raise(xhci, v, true)) { + xhci->intr[v].iman &= ~IMAN_IP; + } } } diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h index 7bba361f3bb..98f598382ad 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -194,7 +194,7 @@ typedef struct XHCIState { uint32_t flags; uint32_t max_pstreams_mask; void (*intr_update)(XHCIState *s, int n, bool enable); - void (*intr_raise)(XHCIState *s, int n, bool level); + bool (*intr_raise)(XHCIState *s, int n, bool level); DeviceState *hostOpaque; /* Operational Registers */ diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 2518306f527..d0d46dd0a4a 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -254,6 +254,29 @@ static void usb_host_del_fd(int fd, void *user_data) qemu_set_fd_handler(fd, NULL, NULL, NULL); } +#else + +static QEMUTimer *poll_timer; +static uint32_t request_count; + +static void usb_host_timer_kick(void) +{ + int64_t delay_ns; + + delay_ns = request_count + ? (NANOSECONDS_PER_SECOND / 100) /* 10 ms interval with active req */ + : (NANOSECONDS_PER_SECOND); /* 1 sec interval otherwise */ + timer_mod(poll_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delay_ns); +} + +static void usb_host_timer(void *opaque) +{ + struct timeval tv = { 0, 0 }; + + libusb_handle_events_timeout(ctx, &tv); + usb_host_timer_kick(); +} + #endif /* !CONFIG_WIN32 */ static int usb_host_init(void) @@ -276,7 +299,8 @@ static int usb_host_init(void) libusb_set_debug(ctx, loglevel); #endif #ifdef CONFIG_WIN32 - /* FIXME: add support for Windows. */ + poll_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, usb_host_timer, NULL); + usb_host_timer_kick(); #else libusb_set_pollfd_notifiers(ctx, usb_host_add_fd, usb_host_del_fd, @@ -364,11 +388,18 @@ static USBHostRequest *usb_host_req_alloc(USBHostDevice *s, USBPacket *p, r->buffer = g_malloc(bufsize); } QTAILQ_INSERT_TAIL(&s->requests, r, next); +#ifdef CONFIG_WIN32 + request_count++; + usb_host_timer_kick(); +#endif return r; } static void usb_host_req_free(USBHostRequest *r) { +#ifdef CONFIG_WIN32 + request_count--; +#endif QTAILQ_REMOVE(&r->host->requests, r, next); libusb_free_transfer(r->xfer); g_free(r->buffer); @@ -770,6 +801,13 @@ static void usb_host_speed_compat(USBHostDevice *s) for (i = 0; i < conf->bNumInterfaces; i++) { for (a = 0; a < conf->interface[i].num_altsetting; a++) { intf = &conf->interface[i].altsetting[a]; + + if (intf->bInterfaceClass == LIBUSB_CLASS_MASS_STORAGE && + intf->bInterfaceSubClass == 6) { /* SCSI */ + udev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); + break; + } + for (e = 0; e < intf->bNumEndpoints; e++) { endp = &intf->endpoint[e]; type = endp->bmAttributes & 0x3; @@ -1668,7 +1706,7 @@ static void usb_host_free_streams(USBDevice *udev, USBEndpoint **eps, /* * This is *NOT* about restoring state. We have absolutely no idea * what state the host device is in at the moment and whenever it is - * still present in the first place. Attemping to contine where we + * still present in the first place. Attempting to continue where we * left off is impossible. * * What we are going to do here is emulate a surprise removal of @@ -1770,10 +1808,12 @@ static TypeInfo usb_host_dev_info = { .class_init = usb_host_class_initfn, .instance_init = usb_host_instance_init, }; +module_obj(TYPE_USB_HOST_DEVICE); static void usb_host_register_types(void) { type_register_static(&usb_host_dev_info); + monitor_register_hmp("usbhost", true, hmp_info_usbhost); } type_init(usb_host_register_types) @@ -1893,35 +1933,6 @@ static void usb_host_auto_check(void *unused) timer_mod(usb_auto_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 2000); } -/** - * Check whether USB host device has a USB mass storage SCSI interface - */ -bool usb_host_dev_is_scsi_storage(USBDevice *ud) -{ - USBHostDevice *uhd = USB_HOST_DEVICE(ud); - struct libusb_config_descriptor *conf; - const struct libusb_interface_descriptor *intf; - bool is_scsi_storage = false; - int i; - - if (!uhd || libusb_get_active_config_descriptor(uhd->dev, &conf) != 0) { - return false; - } - - for (i = 0; i < conf->bNumInterfaces; i++) { - intf = &conf->interface[i].altsetting[ud->altsetting[i]]; - if (intf->bInterfaceClass == LIBUSB_CLASS_MASS_STORAGE && - intf->bInterfaceSubClass == 6) { /* 6 means SCSI */ - is_scsi_storage = true; - break; - } - } - - libusb_free_config_descriptor(conf); - - return is_scsi_storage; -} - void hmp_info_usbhost(Monitor *mon, const QDict *qdict) { libusb_device **devs = NULL; diff --git a/hw/usb/host-stub.c b/hw/usb/host-stub.c deleted file mode 100644 index 538ed29684c..00000000000 --- a/hw/usb/host-stub.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Stub host USB redirector - * - * Copyright (c) 2005 Fabrice Bellard - * - * Copyright (c) 2008 Max Krasnyansky - * Support for host device auto connect & disconnect - * Major rewrite to support fully async operation - * - * Copyright 2008 TJ - * Added flexible support for /dev/bus/usb /sys/bus/usb/devices in addition - * to the legacy /proc/bus/usb USB device discovery and handling - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "ui/console.h" -#include "hw/usb.h" -#include "monitor/monitor.h" - -void hmp_info_usbhost(Monitor *mon, const QDict *qdict) -{ - monitor_printf(mon, "USB host devices not supported\n"); -} - -bool usb_host_dev_is_scsi_storage(USBDevice *ud) -{ - return false; -} diff --git a/hw/usb/imx-usb-phy.c b/hw/usb/imx-usb-phy.c index e705a03a1fc..5d7a549e34d 100644 --- a/hw/usb/imx-usb-phy.c +++ b/hw/usb/imx-usb-phy.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "hw/usb/imx-usb-phy.h" #include "migration/vmstate.h" -#include "qemu/log.h" #include "qemu/module.h" static const VMStateDescription vmstate_imx_usbphy = { diff --git a/hw/usb/meson.build b/hw/usb/meson.build index fb7a74e73ae..de853d780dd 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -1,17 +1,14 @@ hw_usb_modules = {} # usb subsystem core -softmmu_ss.add(files( +softmmu_ss.add(when: 'CONFIG_USB', if_true: files( 'bus.c', 'combined-packet.c', 'core.c', - 'pcap.c', - 'libhw.c' -)) - -softmmu_ss.add(when: 'CONFIG_USB', if_true: files( 'desc.c', 'desc-msos.c', + 'libhw.c', + 'pcap.c', )) # usb host adapters @@ -52,7 +49,7 @@ softmmu_ss.add(when: ['CONFIG_POSIX', 'CONFIG_USB_STORAGE_MTP'], if_true: files( # smartcard softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader.c')) -if config_host.has_key('CONFIG_SMARTCARD') +if cacard.found() usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) @@ -67,7 +64,7 @@ if u2f.found() endif # usb redirect -if config_host.has_key('CONFIG_USB_REDIR') +if usbredir.found() usbredir_ss = ss.source_set() usbredir_ss.add(when: 'CONFIG_USB', if_true: [usbredir, files('redirect.c', 'quirks.c')]) @@ -75,10 +72,12 @@ if config_host.has_key('CONFIG_USB_REDIR') endif # usb pass-through -softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_USB_LIBUSB', libusb], - if_true: files('host-libusb.c'), - if_false: files('host-stub.c')) -softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('host-stub.c')) +if libusb.found() + usbhost_ss = ss.source_set() + usbhost_ss.add(when: ['CONFIG_USB', libusb], + if_true: files('host-libusb.c')) + hw_usb_modules += {'host': usbhost_ss} +endif softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c')) diff --git a/hw/usb/quirks-ftdi-ids.h b/hw/usb/quirks-ftdi-ids.h index 57c12ef6625..f3cb157d6fa 100644 --- a/hw/usb/quirks-ftdi-ids.h +++ b/hw/usb/quirks-ftdi-ids.h @@ -625,9 +625,9 @@ * Definitions for Icom Inc. devices */ #define ICOM_VID 0x0C26 /* Icom vendor ID */ -/* Note: ID-1 is a communications tranceiver for HAM-radio operators */ +/* Note: ID-1 is a communications transceiver for HAM-radio operators */ #define ICOM_ID_1_PID 0x0004 /* ID-1 USB to RS-232 */ -/* Note: OPC is an Optional cable to connect an Icom Tranceiver */ +/* Note: OPC is an Optional cable to connect an Icom Transceiver */ #define ICOM_OPC_U_UC_PID 0x0018 /* OPC-478UC, OPC-1122U cloning cable */ /* Note: ID-RP* devices are Icom Repeater Devices for HAM-radio */ #define ICOM_ID_RP2C1_PID 0x0009 /* ID-RP2C Asset 1 to RS-232 */ @@ -1221,12 +1221,6 @@ #define FTDI_SCIENCESCOPE_LS_LOGBOOK_PID 0xFF1C #define FTDI_SCIENCESCOPE_HS_LOGBOOK_PID 0xFF1D -/* - * Milkymist One JTAG/Serial - */ -#define QIHARDWARE_VID 0x20B7 -#define MILKYMISTONE_JTAGSERIAL_PID 0x0713 - /* * CTI GmbH RS485 Converter http://www.cti-lean.com/ */ diff --git a/hw/usb/quirks.h b/hw/usb/quirks.h index 50ef2f9c2eb..c3e595f40b7 100644 --- a/hw/usb/quirks.h +++ b/hw/usb/quirks.h @@ -904,7 +904,6 @@ static const struct usb_device_id usbredir_ftdi_serial_ids[] = { { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_HS_LOGBOOK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CINTERION_MC55I_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOTEC_PID) }, - { USB_DEVICE(QIHARDWARE_VID, MILKYMISTONE_JTAGSERIAL_PID) }, { USB_DEVICE(ST_VID, ST_STMCLT1030_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RF_R106) }, { USB_DEVICE(FTDI_VID, FTDI_DISTORTEC_JTAG_LOCK_PICK_PID) }, diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index 17f06f34179..5f0ef9cb3b0 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -270,7 +270,7 @@ static int usbredir_read(void *priv, uint8_t *data, int count) return count; } -static gboolean usbredir_write_unblocked(GIOChannel *chan, GIOCondition cond, +static gboolean usbredir_write_unblocked(void *do_not_use, GIOCondition cond, void *opaque) { USBRedirDevice *dev = opaque; @@ -476,7 +476,7 @@ static int bufp_alloc(USBRedirDevice *dev, uint8_t *data, uint16_t len, if (dev->endpoint[EP2I(ep)].bufpq_dropping_packets) { if (dev->endpoint[EP2I(ep)].bufpq_size > dev->endpoint[EP2I(ep)].bufpq_target_size) { - free(data); + free(free_on_destroy); return -1; } dev->endpoint[EP2I(ep)].bufpq_dropping_packets = 0; @@ -620,7 +620,7 @@ static void usbredir_handle_iso_data(USBRedirDevice *dev, USBPacket *p, .endpoint = ep, .length = p->iov.size }; - uint8_t buf[p->iov.size]; + g_autofree uint8_t *buf = g_malloc(p->iov.size); /* No id, we look at the ep when receiving a status back */ usb_packet_copy(p, buf, p->iov.size); usbredirparser_send_iso_packet(dev->parser, 0, &iso_packet, @@ -818,7 +818,7 @@ static void usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p, usbredirparser_send_bulk_packet(dev->parser, p->id, &bulk_packet, NULL, 0); } else { - uint8_t buf[size]; + g_autofree uint8_t *buf = g_malloc(size); usb_packet_copy(p, buf, size); usbredir_log_data(dev, "bulk data out:", buf, size); usbredirparser_send_bulk_packet(dev->parser, p->id, @@ -923,7 +923,7 @@ static void usbredir_handle_interrupt_out_data(USBRedirDevice *dev, USBPacket *p, uint8_t ep) { struct usb_redir_interrupt_packet_header interrupt_packet; - uint8_t buf[p->iov.size]; + g_autofree uint8_t *buf = g_malloc(p->iov.size); DPRINTF("interrupt-out ep %02X len %zd id %"PRIu64"\n", ep, p->iov.size, p->id); @@ -2608,6 +2608,7 @@ static const TypeInfo usbredir_dev_info = { .class_init = usbredir_class_initfn, .instance_init = usbredir_instance_init, }; +module_obj(TYPE_USB_REDIR); static void usbredir_register_types(void) { diff --git a/hw/usb/trace-events b/hw/usb/trace-events index 38e05fc7f4d..b8287b63f15 100644 --- a/hw/usb/trace-events +++ b/hw/usb/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # core.c usb_packet_state_change(int bus, const char *port, int ep, void *p, const char *o, const char *n) "bus %d, port %s, ep %d, packet %p, state %s -> %s" diff --git a/hw/usb/u2f-emulated.c b/hw/usb/u2f-emulated.c index 9151feb63d4..63cceaa5fc8 100644 --- a/hw/usb/u2f-emulated.c +++ b/hw/usb/u2f-emulated.c @@ -307,7 +307,7 @@ static void u2f_emulated_realize(U2FKeyState *base, Error **errp) rc = u2f_emulated_setup_vdev_manualy(key); } else { error_setg(errp, "%s: cert, priv, entropy and counter " - "parameters must be provided to manualy configure " + "parameters must be provided to manually configure " "the emulated device", TYPE_U2F_EMULATED); return; } diff --git a/hw/usb/vt82c686-uhci-pci.c b/hw/usb/vt82c686-uhci-pci.c index b109c216033..0bf2b72ff08 100644 --- a/hw/usb/vt82c686-uhci-pci.c +++ b/hw/usb/vt82c686-uhci-pci.c @@ -1,6 +1,17 @@ #include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/isa/vt82c686.h" #include "hcd-uhci.h" +static void uhci_isa_set_irq(void *opaque, int irq_num, int level) +{ + UHCIState *s = opaque; + uint8_t irq = pci_get_byte(s->dev.config + PCI_INTERRUPT_LINE); + if (irq > 0 && irq < 15) { + via_isa_set_irq(pci_get_function_0(&s->dev), irq, level); + } +} + static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp) { UHCIState *s = UHCI(dev); @@ -14,6 +25,8 @@ static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp) pci_set_long(pci_conf + 0xc0, 0x00002000); usb_uhci_common_realize(dev, errp); + object_unref(s->irq); + s->irq = qemu_allocate_irq(uhci_isa_set_irq, s, 0); } static UHCIInfo uhci_info[] = { @@ -25,6 +38,8 @@ static UHCIInfo uhci_info[] = { .irq_pin = 3, .realize = usb_uhci_vt82c686b_realize, .unplug = true, + /* Reason: only works as USB function of VT82xx superio chips */ + .notuser = true, } }; diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 4d266d7bb45..0f7369e7ed6 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -26,7 +26,6 @@ #include "qemu/config-file.h" #include "qemu/main-loop.h" #include "qemu/option.h" -#include "hw/sysbus.h" #include "hw/usb.h" #include "hw/xen/xen-legacy-backend.h" #include "monitor/qdev.h" diff --git a/hw/usb/xlnx-usb-subsystem.c b/hw/usb/xlnx-usb-subsystem.c index 568257370cb..d8deeb6ced5 100644 --- a/hw/usb/xlnx-usb-subsystem.c +++ b/hw/usb/xlnx-usb-subsystem.c @@ -24,10 +24,8 @@ #include "qemu/osdep.h" #include "hw/sysbus.h" -#include "hw/irq.h" #include "hw/register.h" #include "qemu/bitops.h" -#include "qemu/log.h" #include "qom/object.h" #include "qapi/error.h" #include "hw/qdev-properties.h" diff --git a/hw/usb/xlnx-versal-usb2-ctrl-regs.c b/hw/usb/xlnx-versal-usb2-ctrl-regs.c index 9eaa59ebb8b..1c094aa1a63 100644 --- a/hw/usb/xlnx-versal-usb2-ctrl-regs.c +++ b/hw/usb/xlnx-versal-usb2-ctrl-regs.c @@ -32,7 +32,6 @@ #include "hw/irq.h" #include "hw/register.h" #include "qemu/bitops.h" -#include "qemu/log.h" #include "qom/object.h" #include "migration/vmstate.h" #include "hw/usb/xlnx-versal-usb2-ctrl-regs.h" diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 9571c2f91fd..e0dd561e85a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -14,7 +14,6 @@ #include #include #include "qapi/error.h" -#include "hw/sysbus.h" #include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/ap-device.h" @@ -22,8 +21,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "qemu/config-file.h" -#include "cpu.h" -#include "kvm_s390x.h" +#include "kvm/kvm_s390x.h" #include "migration/vmstate.h" #include "hw/qdev-properties.h" #include "hw/s390x/ap-bridge.h" diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index b2df708e4b0..0354737666a 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -20,7 +20,6 @@ #include #include "qapi/error.h" -#include "hw/sysbus.h" #include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/s390-ccw.h" @@ -200,7 +199,7 @@ static int vfio_ccw_handle_clear(SubchDev *sch) case 0: case -ENODEV: case -EACCES: - return 0; + return ret; case -EFAULT: default: sch_gen_unit_exception(sch); @@ -241,7 +240,7 @@ static int vfio_ccw_handle_halt(SubchDev *sch) case -EBUSY: case -ENODEV: case -EACCES: - return 0; + return ret; case -EFAULT: default: sch_gen_unit_exception(sch); @@ -322,6 +321,7 @@ static void vfio_ccw_io_notifier_handler(void *opaque) SCHIB *schib = &sch->curr_status; SCSW s; IRB irb; + ESW esw; int size; if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { @@ -372,6 +372,9 @@ static void vfio_ccw_io_notifier_handler(void *opaque) copy_scsw_to_guest(&s, &irb.scsw); schib->scsw = s; + copy_esw_to_guest(&esw, &irb.esw); + sch->esw = esw; + /* If a uint check is pending, copy sense data. */ if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { @@ -412,8 +415,8 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, } if (vdev->num_irqs < irq + 1) { - error_setg(errp, "vfio: unexpected number of irqs %u", - vdev->num_irqs); + error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", + irq, vdev->num_irqs); return; } @@ -470,7 +473,7 @@ static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, if (vfio_set_irq_signaling(&vcdev->vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); + warn_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); } qemu_set_fd_handler(event_notifier_get_fd(notifier), @@ -690,20 +693,24 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) if (vcdev->crw_region) { vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err); if (err) { - goto out_crw_notifier_err; + goto out_irq_notifier_err; } } vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, &err); if (err) { - goto out_req_notifier_err; + /* + * Report this error, but do not make it a failing condition. + * Lack of this IRQ in the host does not prevent normal operation. + */ + error_report_err(err); } return; -out_req_notifier_err: +out_irq_notifier_err: + vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX); vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX); -out_crw_notifier_err: vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); out_io_notifier_err: vfio_ccw_put_region(vcdev); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index ae5654fcdb8..080046e3f51 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -36,6 +36,7 @@ #include "qemu/range.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" +#include "sysemu/runstate.h" #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" @@ -134,6 +135,29 @@ static const char *index_to_str(VFIODevice *vbasedev, int index) } } +static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) +{ + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + /* + * We support coordinated discarding of RAM via the RamDiscardManager. + */ + return ram_block_uncoordinated_discard_disable(state); + default: + /* + * VFIO_SPAPR_TCE_IOMMU most probably works just fine with + * RamDiscardManager, however, it is completely untested. + * + * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does + * completely the opposite of managing mapping/pinning dynamically as + * required by RamDiscardManager. We would have to special-case sections + * with a RamDiscardManager. + */ + return ram_block_discard_disable(state); + } +} + int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, int action, int fd, Error **errp) { @@ -527,6 +551,7 @@ static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); return 0; } } @@ -538,6 +563,7 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) { return (!memory_region_is_ram(section->mr) && !memory_region_is_iommu(section->mr)) || + memory_region_is_protected(section->mr) || /* * Sizing an enabled 64-bit BAR can cause spurious mappings to * addresses in the upper part of the 64-bit address space. These @@ -569,6 +595,44 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, error_report("iommu map to non memory area %"HWADDR_PRIx"", xlat); return false; + } else if (memory_region_has_ram_discard_manager(mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); + MemoryRegionSection tmp = { + .mr = mr, + .offset_within_region = xlat, + .size = int128_make64(len), + }; + + /* + * Malicious VMs can map memory into the IOMMU, which is expected + * to remain discarded. vfio will pin all pages, populating memory. + * Disallow that. vmstate priorities make sure any RamDiscardManager + * were already restored before IOMMUs are restored. + */ + if (!ram_discard_manager_is_populated(rdm, &tmp)) { + error_report("iommu map to discarded memory (e.g., unplugged via" + " virtio-mem): %"HWADDR_PRIx"", + iotlb->translated_addr); + return false; + } + + /* + * Malicious VMs might trigger discarding of IOMMU-mapped memory. The + * pages will remain pinned inside vfio until unmapped, resulting in a + * higher memory consumption than expected. If memory would get + * populated again later, there would be an inconsistency between pages + * pinned by vfio and pages seen by QEMU. This is the case until + * unmapped from the IOMMU (e.g., during device reset). + * + * With malicious guests, we really only care about pinning more memory + * than expected. RLIMIT_MEMLOCK set for the user/process can never be + * exceeded and can be used to mitigate this problem. + */ + warn_report_once("Using vfio with vIOMMUs and coordinated discarding of" + " RAM (e.g., virtio-mem) works, however, malicious" + " guests can trigger pinning of more memory than" + " intended via an IOMMU. It's possible to mitigate " + " by setting/adjusting RLIMIT_MEMLOCK."); } /* @@ -649,6 +713,154 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) rcu_read_unlock(); } +static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); + const hwaddr size = int128_get64(section->size); + const hwaddr iova = section->offset_within_address_space; + int ret; + + /* Unmap with a single call. */ + ret = vfio_dma_unmap(vrdl->container, iova, size , NULL); + if (ret) { + error_report("%s: vfio_dma_unmap() failed: %s", __func__, + strerror(-ret)); + } +} + +static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); + const hwaddr end = section->offset_within_region + + int128_get64(section->size); + hwaddr start, next, iova; + void *vaddr; + int ret; + + /* + * Map in (aligned within memory region) minimum granularity, so we can + * unmap in minimum granularity later. + */ + for (start = section->offset_within_region; start < end; start = next) { + next = ROUND_UP(start + 1, vrdl->granularity); + next = MIN(next, end); + + iova = start - section->offset_within_region + + section->offset_within_address_space; + vaddr = memory_region_get_ram_ptr(section->mr) + start; + + ret = vfio_dma_map(vrdl->container, iova, next - start, + vaddr, section->readonly); + if (ret) { + /* Rollback */ + vfio_ram_discard_notify_discard(rdl, section); + return ret; + } + } + return 0; +} + +static void vfio_register_ram_discard_listener(VFIOContainer *container, + MemoryRegionSection *section) +{ + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl; + + /* Ignore some corner cases not relevant in practice. */ + g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE)); + g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space, + TARGET_PAGE_SIZE)); + g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); + + vrdl = g_new0(VFIORamDiscardListener, 1); + vrdl->container = container; + vrdl->mr = section->mr; + vrdl->offset_within_address_space = section->offset_within_address_space; + vrdl->size = int128_get64(section->size); + vrdl->granularity = ram_discard_manager_get_min_granularity(rdm, + section->mr); + + g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); + g_assert(container->pgsizes && + vrdl->granularity >= 1ULL << ctz64(container->pgsizes)); + + ram_discard_listener_init(&vrdl->listener, + vfio_ram_discard_notify_populate, + vfio_ram_discard_notify_discard, true); + ram_discard_manager_register_listener(rdm, &vrdl->listener, section); + QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next); + + /* + * Sanity-check if we have a theoretically problematic setup where we could + * exceed the maximum number of possible DMA mappings over time. We assume + * that each mapped section in the same address space as a RamDiscardManager + * section consumes exactly one DMA mapping, with the exception of + * RamDiscardManager sections; i.e., we don't expect to have gIOMMU sections + * in the same address space as RamDiscardManager sections. + * + * We assume that each section in the address space consumes one memslot. + * We take the number of KVM memory slots as a best guess for the maximum + * number of sections in the address space we could have over time, + * also consuming DMA mappings. + */ + if (container->dma_max_mappings) { + unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512; + +#ifdef CONFIG_KVM + if (kvm_enabled()) { + max_memslots = kvm_get_max_memslots(); + } +#endif + + QLIST_FOREACH(vrdl, &container->vrdl_list, next) { + hwaddr start, end; + + start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space, + vrdl->granularity); + end = ROUND_UP(vrdl->offset_within_address_space + vrdl->size, + vrdl->granularity); + vrdl_mappings += (end - start) / vrdl->granularity; + vrdl_count++; + } + + if (vrdl_mappings + max_memslots - vrdl_count > + container->dma_max_mappings) { + warn_report("%s: possibly running out of DMA mappings. E.g., try" + " increasing the 'block-size' of virtio-mem devies." + " Maximum possible DMA mappings: %d, Maximum possible" + " memslots: %d", __func__, container->dma_max_mappings, + max_memslots); + } + } +} + +static void vfio_unregister_ram_discard_listener(VFIOContainer *container, + MemoryRegionSection *section) +{ + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + + QLIST_FOREACH(vrdl, &container->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { + break; + } + } + + if (!vrdl) { + hw_error("vfio: Trying to unregister missing RAM discard listener"); + } + + ram_discard_manager_unregister_listener(rdm, &vrdl->listener); + QLIST_REMOVE(vrdl, next); + g_free(vrdl); +} + static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { @@ -682,6 +894,13 @@ static void vfio_listener_region_add(MemoryListener *listener, llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); if (int128_ge(int128_make64(iova), llend)) { + if (memory_region_is_ram_device(section->mr)) { + trace_vfio_listener_region_add_no_dma_map( + memory_region_name(section->mr), + section->offset_within_address_space, + int128_getlo(section->size), + qemu_real_host_page_size); + } return; } end = int128_get64(int128_sub(llend, int128_one())); @@ -810,6 +1029,16 @@ static void vfio_listener_region_add(MemoryListener *listener, /* Here we assume that memory_region_is_ram(section->mr)==true */ + /* + * For RAM memory regions with a RamDiscardManager, we only want to map the + * actually populated parts - and update the mapping whenever we're notified + * about changes. + */ + if (memory_region_has_ram_discard_manager(section->mr)) { + vfio_register_ram_discard_listener(container, section); + return; + } + vaddr = memory_region_get_ram_ptr(section->mr) + section->offset_within_region + (iova - section->offset_within_address_space); @@ -947,6 +1176,10 @@ static void vfio_listener_region_del(MemoryListener *listener, pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); + } else if (memory_region_has_ram_discard_manager(section->mr)) { + vfio_unregister_ram_discard_listener(container, section); + /* Unregistering will trigger an unmap. */ + try_unmap = false; } if (try_unmap) { @@ -1108,6 +1341,49 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) rcu_read_unlock(); } +static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, + void *opaque) +{ + const hwaddr size = int128_get64(section->size); + const hwaddr iova = section->offset_within_address_space; + const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + VFIORamDiscardListener *vrdl = opaque; + + /* + * Sync the whole mapped region (spanning multiple individual mappings) + * in one go. + */ + return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr); +} + +static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, + MemoryRegionSection *section) +{ + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + + QLIST_FOREACH(vrdl, &container->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { + break; + } + } + + if (!vrdl) { + hw_error("vfio: Trying to sync missing RAM discard listener"); + } + + /* + * We only want/can synchronize the bitmap for actually mapped parts - + * which correspond to populated parts. Replay all populated parts. + */ + return ram_discard_manager_replay_populated(rdm, section, + vfio_ram_discard_get_dirty_bitmap, + &vrdl); +} + static int vfio_sync_dirty_bitmap(VFIOContainer *container, MemoryRegionSection *section) { @@ -1139,6 +1415,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, } } return 0; + } else if (memory_region_has_ram_discard_manager(section->mr)) { + return vfio_sync_ram_discard_listener_dirty_bitmap(container, section); } ram_addr = memory_region_get_ram_addr(section->mr) + @@ -1165,6 +1443,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, } static const MemoryListener vfio_memory_listener = { + .name = "vfio", .region_add = vfio_listener_region_add, .region_del = vfio_listener_region_del, .log_global_start = vfio_listener_log_global_start, @@ -1732,15 +2011,25 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, * new memory, it will not yet set ram_block_discard_set_required() and * therefore, neither stops us here or deals with the sudden memory * consumption of inflated memory. + * + * We do support discarding of memory coordinated via the RamDiscardManager + * with some IOMMU types. vfio_ram_block_discard_disable() handles the + * details once we know which type of IOMMU we are using. */ - ret = ram_block_discard_disable(true); - if (ret) { - error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - return ret; - } QLIST_FOREACH(container, &space->containers, next) { if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, + "Cannot set discarding of RAM broken"); + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, + &container->fd)) { + error_report("vfio: error disconnecting group %d from" + " container", group->groupid); + } + return ret; + } group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); vfio_kvm_device_add_group(group); @@ -1768,14 +2057,22 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->fd = fd; container->error = NULL; container->dirty_pages_supported = false; + container->dma_max_mappings = 0; QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); + QLIST_INIT(&container->vrdl_list); ret = vfio_init_container(container, group->fd, errp); if (ret) { goto free_container_exit; } + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + goto free_container_exit; + } + switch (container->iommu_type) { case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_IOMMU: @@ -1798,7 +2095,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); container->pgsizes = info->iova_pgsizes; + /* The default in the kernel ("dma_entry_limit") is 65535. */ + container->dma_max_mappings = 65535; if (!ret) { + vfio_get_info_dma_avail(info, &container->dma_max_mappings); vfio_get_iommu_info_migration(container, info); } g_free(info); @@ -1820,7 +2120,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (ret) { error_setg_errno(errp, errno, "failed to enable container"); ret = -errno; - goto free_container_exit; + goto enable_discards_exit; } } else { container->prereg_listener = vfio_prereg_listener; @@ -1832,7 +2132,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, ret = -1; error_propagate_prepend(errp, container->error, "RAM memory listener initialization failed: "); - goto free_container_exit; + goto enable_discards_exit; } } @@ -1845,7 +2145,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (v2) { memory_listener_unregister(&container->prereg_listener); } - goto free_container_exit; + goto enable_discards_exit; } if (v2) { @@ -1860,7 +2160,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (ret) { error_setg_errno(errp, -ret, "failed to remove existing window"); - goto free_container_exit; + goto enable_discards_exit; } } else { /* The default table uses 4K pages */ @@ -1901,6 +2201,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, vfio_kvm_device_del_group(group); vfio_listener_release(container); +enable_discards_exit: + vfio_ram_block_discard_disable(container, false); + free_container_exit: g_free(container); @@ -1908,7 +2211,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, close(fd); put_space_exit: - ram_block_discard_disable(false); vfio_put_address_space(space); return ret; @@ -1938,6 +2240,7 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; + VFIOHostDMAWindow *hostwin, *next; QLIST_REMOVE(container, next); @@ -1948,6 +2251,12 @@ static void vfio_disconnect_container(VFIOGroup *group) g_free(giommu); } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } + trace_vfio_disconnect_container(container->fd); close(container->fd); g_free(container); @@ -2030,7 +2339,7 @@ void vfio_put_group(VFIOGroup *group) } if (!group->ram_block_discard_allowed) { - ram_block_discard_disable(false); + vfio_ram_block_discard_disable(group->container, false); } vfio_kvm_device_del_group(group); vfio_disconnect_container(group); @@ -2084,7 +2393,7 @@ int vfio_get_device(VFIOGroup *group, const char *name, if (!group->ram_block_discard_allowed) { group->ram_block_discard_allowed = true; - ram_block_discard_disable(false); + vfio_ram_block_discard_disable(group->container, false); } } diff --git a/hw/vfio/display.c b/hw/vfio/display.c index f04473e3cec..89bc90508fb 100644 --- a/hw/vfio/display.c +++ b/hw/vfio/display.c @@ -14,7 +14,6 @@ #include #include -#include "sysemu/sysemu.h" #include "hw/display/edid.h" #include "ui/console.h" #include "qapi/error.h" diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 470205f487e..d4685709a3b 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -557,7 +557,7 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) * must allocate a 1MB aligned reserved memory region below 4GB with * the requested size (in bytes) for use by the Intel PCI class VGA * device at VM address 00:02.0. The base address of this reserved - * memory region must be written to the device BDSM regsiter at PCI + * memory region must be written to the device BDSM register at PCI * config offset 0x5C. */ bdsm_size = g_malloc(sizeof(*bdsm_size)); diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 384576cfc05..ff6b45de6b5 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -15,7 +15,6 @@ #include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" -#include "cpu.h" #include "migration/migration.h" #include "migration/vmstate.h" #include "migration/qemu-file.h" @@ -725,7 +724,16 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) * _RUNNING bit */ mask = ~VFIO_DEVICE_STATE_RUNNING; - value = 0; + + /* + * When VM state transition to stop for savevm command, device should + * start saving data. + */ + if (state == RUN_STATE_SAVE_VM) { + value = VFIO_DEVICE_STATE_SAVING; + } else { + value = 0; + } } ret = vfio_migration_set_state(vbasedev, mask, value); @@ -850,7 +858,6 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) { VFIOContainer *container = vbasedev->group->container; struct vfio_region_info *info = NULL; - Error *local_err = NULL; int ret = -ENOTSUP; if (!vbasedev->enable_migration || !container->dirty_pages_supported) { @@ -877,9 +884,8 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) "VFIO device doesn't support migration"); g_free(info); - ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err); - if (local_err) { - error_propagate(errp, local_err); + ret = migrate_add_blocker(vbasedev->migration_blocker, errp); + if (ret < 0) { error_free(vbasedev->migration_blocker); vbasedev->migration_blocker = NULL; } @@ -893,6 +899,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) remove_migration_state_change_notifier(&migration->migration_state); qemu_del_vm_change_state_handler(migration->vm_state); + unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); vfio_migration_exit(vbasedev); } diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index b90cf3d37c3..0cf69a8c6d6 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -22,7 +22,6 @@ #include "qapi/error.h" #include "qapi/visitor.h" #include -#include "hw/hw.h" #include "hw/nvram/fw_cfg.h" #include "hw/qdev-properties.h" #include "pci.h" @@ -1357,7 +1356,7 @@ static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev) /* * The scope of a config reset is controlled by a mode bit in the misc register * and a fuse, exposed as a bit in another register. The fuse is the default - * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula + * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the formula * scope = !(misc ^ fuse), where the resulting scope is defined the same as * the fuse. A truth table therefore tells us that if misc == fuse, we need * to flip the value of the bit in the misc register. diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 5c65aa0a98e..7b45353ce27 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -29,15 +29,14 @@ #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "migration/vmstate.h" +#include "qapi/qmp/qdict.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/module.h" -#include "qemu/option.h" #include "qemu/range.h" #include "qemu/units.h" #include "sysemu/kvm.h" #include "sysemu/runstate.h" -#include "sysemu/sysemu.h" #include "pci.h" #include "trace.h" #include "qapi/error.h" @@ -942,7 +941,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) } if (vfio_opt_rom_in_denylist(vdev)) { - if (dev->opts && qemu_opt_get(dev->opts, "rombar")) { + if (dev->opts && qdict_haskey(dev->opts, "rombar")) { warn_report("Device at %s is known to cause system instability" " issues during option rom execution", vdev->vbasedev.name); @@ -1365,7 +1364,7 @@ static void vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp) * TODO: Lookup table for known devices. * * Logically we might use an algorithm here to select the BAR adding - * the least additional MMIO space, but we cannot programatically + * the least additional MMIO space, but we cannot programmatically * predict the driver dependency on BAR ordering or sizing, therefore * 'auto' becomes a lookup for combinations reported to work. */ @@ -1500,6 +1499,14 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO && (vdev->device_id & 0xff00) == 0x5800) { msix->pba_offset = 0x1000; + /* + * BAIDU KUNLUN Virtual Function devices for KUNLUN AI processor + * return an incorrect value of 0x460000 for the VF PBA offset while + * the BAR itself is only 0x10000. The correct value is 0xb400. + */ + } else if (vfio_pci_is(vdev, PCI_VENDOR_ID_BAIDU, + PCI_DEVICE_ID_KUNLUN_VF)) { + msix->pba_offset = 0xb400; } else if (vdev->msix_relo == OFF_AUTOPCIBAR_OFF) { error_setg(errp, "hardware reports invalid configuration, " "MSIX PBA outside of specified BAR"); @@ -2151,7 +2158,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) } /* - * Stop any ongoing DMA by disconecting I/O, MMIO, and bus master. + * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master. * Also put INTx Disable in known state. */ cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2); @@ -2377,7 +2384,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) } /* - * We want to differentiate hot reset of mulitple in-use devices vs hot reset + * We want to differentiate hot reset of multiple in-use devices vs hot reset * of a single in-use device. VFIO_DEVICE_RESET will already handle the case * of doing hot resets when there is only a single device per bus. The in-use * here refers to how many VFIODevices are affected. A hot reset that affects @@ -2446,7 +2453,12 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); PCIDevice *pdev = &vdev->pdev; - int ret; + pcibus_t old_addr[PCI_NUM_REGIONS - 1]; + int bar, ret; + + for (bar = 0; bar < PCI_ROM_SLOT; bar++) { + old_addr[bar] = pdev->io_regions[bar].addr; + } ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1); if (ret) { @@ -2456,6 +2468,18 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) vfio_pci_write_config(pdev, PCI_COMMAND, pci_get_word(pdev->config + PCI_COMMAND), 2); + for (bar = 0; bar < PCI_ROM_SLOT; bar++) { + /* + * The address may not be changed in some scenarios + * (e.g. the VF driver isn't loaded in VM). + */ + if (old_addr[bar] != pdev->io_regions[bar].addr && + vdev->bars[bar].region.size > 0 && + vdev->bars[bar].region.size < qemu_real_host_page_size) { + vfio_sub_page_bar_update_mapping(pdev, bar); + } + } + if (msi_enabled(pdev)) { vfio_msi_enable(vdev); } else if (msix_enabled(pdev)) { @@ -3059,14 +3083,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } - if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) { + if (vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) { ret = vfio_pci_nvidia_v100_ram_init(vdev, errp); if (ret && ret != -ENODEV) { error_report("Failed to setup NVIDIA V100 GPU RAM"); } } - if (vdev->vendor_id == PCI_VENDOR_ID_IBM) { + if (vfio_pci_is(vdev, PCI_VENDOR_ID_IBM, PCI_ANY_ID)) { ret = vfio_pci_nvlink2_init(vdev, errp); if (ret && ret != -ENODEV) { error_report("Failed to setup NVlink2 bridge"); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index cc3f66f7e44..f8f08a0f362 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -156,7 +156,7 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) * if there is no more active IRQ * @opaque: actually points to the VFIO platform device * - * Called on mmap timer timout, this function checks whether the + * Called on mmap timer timeout, this function checks whether the * IRQ is still active and if not, restores the fast path. * by construction a single eventfd is handled at a time. * if the IRQ is still active, the timer is re-programmed. diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 2900bd19417..04c6e67f8fb 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -9,7 +9,6 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include #include @@ -137,6 +136,7 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener, } const MemoryListener vfio_prereg_listener = { + .name = "vfio-pre-reg", .region_add = vfio_prereg_listener_region_add, .region_del = vfio_prereg_listener_region_del, }; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 079f53acf28..0ef1b5f4a65 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # pci.c vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c" diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 0eda25c4e1b..c144d42f9bd 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -58,3 +58,13 @@ config VIRTIO_MEM depends on LINUX depends on VIRTIO_MEM_SUPPORTED select MEM_DEVICE + +config VHOST_USER_I2C + bool + default y + depends on VIRTIO && VHOST_USER + +config VHOST_USER_RNG + bool + default y + depends on VIRTIO && VHOST_USER diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index fbff9bc9d4d..521f7d64a86 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -25,6 +25,10 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock. virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) +virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) +virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) +virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index c62727f8793..650e521e351 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # vhost.c vhost_commit(bool started, bool changed) "Started: %d Changed: %d" @@ -52,6 +52,7 @@ vhost_vdpa_set_vring_call(void *dev, unsigned int index, int fd) "dev: %p index: vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRIx64 vhost_vdpa_set_owner(void *dev) "dev: %p" vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 +vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 # virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 31b33bde37b..b65f8f7e97b 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -24,13 +24,15 @@ static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request, void *arg) { int fd = (uintptr_t) dev->opaque; + int ret; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); - return ioctl(fd, request, arg); + ret = ioctl(fd, request, arg); + return ret < 0 ? -errno : ret; } -static int vhost_kernel_init(struct vhost_dev *dev, void *opaque) +static int vhost_kernel_init(struct vhost_dev *dev, void *opaque, Error **errp) { assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); @@ -291,7 +293,7 @@ static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev, qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL); } -static const VhostOps kernel_ops = { +const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_backend_init = vhost_kernel_init, .vhost_backend_cleanup = vhost_kernel_cleanup, @@ -326,34 +328,6 @@ static const VhostOps kernel_ops = { }; #endif -int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) -{ - int r = 0; - - switch (backend_type) { -#ifdef CONFIG_VHOST_KERNEL - case VHOST_BACKEND_TYPE_KERNEL: - dev->vhost_ops = &kernel_ops; - break; -#endif -#ifdef CONFIG_VHOST_USER - case VHOST_BACKEND_TYPE_USER: - dev->vhost_ops = &user_ops; - break; -#endif -#ifdef CONFIG_VHOST_VDPA - case VHOST_BACKEND_TYPE_VDPA: - dev->vhost_ops = &vdpa_ops; - break; -#endif - default: - error_report("Unknown vhost backend type"); - r = -1; - } - - return r; -} - int vhost_backend_update_device_iotlb(struct vhost_dev *dev, uint64_t iova, uint64_t uaddr, uint64_t len, diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index 6f7f91533d2..c5959579839 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -235,9 +235,8 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) fs->vhost_dev.nvqs = 1 + fs->conf.num_request_queues; fs->vhost_dev.vqs = g_new0(struct vhost_virtqueue, fs->vhost_dev.nvqs); ret = vhost_dev_init(&fs->vhost_dev, &fs->vhost_user, - VHOST_BACKEND_TYPE_USER, 0); + VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { - error_setg_errno(errp, -ret, "vhost_dev_init failed"); goto err_virtio; } diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c new file mode 100644 index 00000000000..70b7b65fd97 --- /dev/null +++ b/hw/virtio/vhost-user-i2c-pci.c @@ -0,0 +1,69 @@ +/* + * Vhost-user i2c virtio device PCI glue + * + * Copyright (c) 2021 Viresh Kumar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-i2c.h" +#include "virtio-pci.h" + +struct VHostUserI2CPCI { + VirtIOPCIProxy parent_obj; + VHostUserI2C vdev; +}; + +typedef struct VHostUserI2CPCI VHostUserI2CPCI; + +#define TYPE_VHOST_USER_I2C_PCI "vhost-user-i2c-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserI2CPCI, VHOST_USER_I2C_PCI, + TYPE_VHOST_USER_I2C_PCI) + +static void vhost_user_i2c_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + vpci_dev->nvectors = 1; + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_i2c_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_i2c_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_i2c_pci_instance_init(Object *obj) +{ + VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_I2C); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_i2c_pci_info = { + .base_name = TYPE_VHOST_USER_I2C_PCI, + .non_transitional_name = "vhost-user-i2c-pci", + .instance_size = sizeof(VHostUserI2CPCI), + .instance_init = vhost_user_i2c_pci_instance_init, + .class_init = vhost_user_i2c_pci_class_init, +}; + +static void vhost_user_i2c_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_i2c_pci_info); +} + +type_init(vhost_user_i2c_pci_register); diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c new file mode 100644 index 00000000000..d172632bb0c --- /dev/null +++ b/hw/virtio/vhost-user-i2c.c @@ -0,0 +1,288 @@ +/* + * Vhost-user i2c virtio device + * + * Copyright (c) 2021 Viresh Kumar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-i2c.h" +#include "qemu/error-report.h" +#include "standard-headers/linux/virtio_ids.h" + +/* Remove this once the header is updated in Linux kernel */ +#ifndef VIRTIO_ID_I2C_ADAPTER +#define VIRTIO_ID_I2C_ADAPTER 34 +#endif + +static void vu_i2c_start(VirtIODevice *vdev) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + int ret, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&i2c->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + i2c->vhost_dev.acked_features = vdev->guest_features; + + ret = vhost_dev_start(&i2c->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost-user-i2c: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < i2c->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&i2c->vhost_dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev); +} + +static void vu_i2c_stop(VirtIODevice *vdev) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&i2c->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev); +} + +static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (i2c->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + vu_i2c_start(vdev); + } else { + vu_i2c_stop(vdev); + } +} + +static uint64_t vu_i2c_get_features(VirtIODevice *vdev, + uint64_t requested_features, Error **errp) +{ + /* No feature bits used yet */ + return requested_features; +} + +static void vu_i2c_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void vu_i2c_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask); +} + +static bool vu_i2c_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + return vhost_virtqueue_pending(&i2c->vhost_dev, idx); +} + +static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserI2C *i2c) +{ + vhost_user_cleanup(&i2c->vhost_user); + virtio_delete_queue(i2c->vq); + virtio_cleanup(vdev); + g_free(i2c->vhost_dev.vqs); + i2c->vhost_dev.vqs = NULL; +} + +static int vu_i2c_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + if (i2c->connected) { + return 0; + } + i2c->connected = true; + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vu_i2c_start(vdev); + } + + return 0; +} + +static void vu_i2c_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + if (!i2c->connected) { + return; + } + i2c->connected = false; + + if (i2c->vhost_dev.started) { + vu_i2c_stop(vdev); + } +} + +static void vu_i2c_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + switch (event) { + case CHR_EVENT_OPENED: + if (vu_i2c_connect(dev) < 0) { + qemu_chr_fe_disconnect(&i2c->chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + vu_i2c_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void vu_i2c_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(dev); + int ret; + + if (!i2c->chardev.chr) { + error_setg(errp, "vhost-user-i2c: missing chardev"); + return; + } + + if (!vhost_user_init(&i2c->vhost_user, &i2c->chardev, errp)) { + return; + } + + virtio_init(vdev, "vhost-user-i2c", VIRTIO_ID_I2C_ADAPTER, 0); + + i2c->vhost_dev.nvqs = 1; + i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output); + i2c->vhost_dev.vqs = g_new0(struct vhost_virtqueue, i2c->vhost_dev.nvqs); + + ret = vhost_dev_init(&i2c->vhost_dev, &i2c->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + if (ret < 0) { + do_vhost_user_cleanup(vdev, i2c); + } + + qemu_chr_fe_set_handlers(&i2c->chardev, NULL, NULL, vu_i2c_event, NULL, + dev, NULL, true); +} + +static void vu_i2c_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(dev); + + /* This will stop vhost backend if appropriate. */ + vu_i2c_set_status(vdev, 0); + vhost_dev_cleanup(&i2c->vhost_dev); + do_vhost_user_cleanup(vdev, i2c); +} + +static const VMStateDescription vu_i2c_vmstate = { + .name = "vhost-user-i2c", + .unmigratable = 1, +}; + +static Property vu_i2c_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserI2C, chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vu_i2c_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vu_i2c_properties); + dc->vmsd = &vu_i2c_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + vdc->realize = vu_i2c_device_realize; + vdc->unrealize = vu_i2c_device_unrealize; + vdc->get_features = vu_i2c_get_features; + vdc->set_status = vu_i2c_set_status; + vdc->guest_notifier_mask = vu_i2c_guest_notifier_mask; + vdc->guest_notifier_pending = vu_i2c_guest_notifier_pending; +} + +static const TypeInfo vu_i2c_info = { + .name = TYPE_VHOST_USER_I2C, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserI2C), + .class_init = vu_i2c_class_init, +}; + +static void vu_i2c_register_types(void) +{ + type_register_static(&vu_i2c_info); +} + +type_init(vu_i2c_register_types) diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c new file mode 100644 index 00000000000..c83dc868138 --- /dev/null +++ b/hw/virtio/vhost-user-rng-pci.c @@ -0,0 +1,79 @@ +/* + * Vhost-user RNG virtio device PCI glue + * + * Copyright (c) 2021 Mathieu Poirier + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-rng.h" +#include "virtio-pci.h" + +struct VHostUserRNGPCI { + VirtIOPCIProxy parent_obj; + VHostUserRNG vdev; +}; + +typedef struct VHostUserRNGPCI VHostUserRNGPCI; + +#define TYPE_VHOST_USER_RNG_PCI "vhost-user-rng-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserRNGPCI, VHOST_USER_RNG_PCI, + TYPE_VHOST_USER_RNG_PCI) + +static Property vhost_user_rng_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = 1; + } + + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_rng_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_rng_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + device_class_set_props(dc, vhost_user_rng_pci_properties); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_OTHERS; +} + +static void vhost_user_rng_pci_instance_init(Object *obj) +{ + VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_RNG); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_rng_pci_info = { + .base_name = TYPE_VHOST_USER_RNG_PCI, + .non_transitional_name = "vhost-user-rng-pci", + .instance_size = sizeof(VHostUserRNGPCI), + .instance_init = vhost_user_rng_pci_instance_init, + .class_init = vhost_user_rng_pci_class_init, +}; + +static void vhost_user_rng_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_rng_pci_info); +} + +type_init(vhost_user_rng_pci_register); diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c new file mode 100644 index 00000000000..209ee5bf9ac --- /dev/null +++ b/hw/virtio/vhost-user-rng.c @@ -0,0 +1,289 @@ +/* + * Vhost-user RNG virtio device + * + * Copyright (c) 2021 Mathieu Poirier + * + * Implementation seriously tailored on vhost-user-i2c.c + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-rng.h" +#include "qemu/error-report.h" +#include "standard-headers/linux/virtio_ids.h" + +static void vu_rng_start(VirtIODevice *vdev) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + int i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&rng->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + rng->vhost_dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&rng->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost-user-rng: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < rng->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&rng->vhost_dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&rng->vhost_dev, vdev); +} + +static void vu_rng_stop(VirtIODevice *vdev) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&rng->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&rng->vhost_dev, vdev); +} + +static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (rng->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + vu_rng_start(vdev); + } else { + vu_rng_stop(vdev); + } +} + +static uint64_t vu_rng_get_features(VirtIODevice *vdev, + uint64_t requested_features, Error **errp) +{ + /* No feature bits used yet */ + return requested_features; +} + +static void vu_rng_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask); +} + +static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + return vhost_virtqueue_pending(&rng->vhost_dev, idx); +} + +static void vu_rng_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + if (rng->connected) { + return; + } + + rng->connected = true; + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vu_rng_start(vdev); + } +} + +static void vu_rng_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + if (!rng->connected) { + return; + } + + rng->connected = false; + + if (rng->vhost_dev.started) { + vu_rng_stop(vdev); + } +} + +static void vu_rng_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + + switch (event) { + case CHR_EVENT_OPENED: + vu_rng_connect(dev); + break; + case CHR_EVENT_CLOSED: + vu_rng_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void vu_rng_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(dev); + int ret; + + if (!rng->chardev.chr) { + error_setg(errp, "missing chardev"); + return; + } + + if (!vhost_user_init(&rng->vhost_user, &rng->chardev, errp)) { + return; + } + + virtio_init(vdev, "vhost-user-rng", VIRTIO_ID_RNG, 0); + + rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output); + if (!rng->req_vq) { + error_setg_errno(errp, -1, "virtio_add_queue() failed"); + goto virtio_add_queue_failed; + } + + rng->vhost_dev.nvqs = 1; + rng->vhost_dev.vqs = g_new0(struct vhost_virtqueue, rng->vhost_dev.nvqs); + ret = vhost_dev_init(&rng->vhost_dev, &rng->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost_dev_init() failed"); + goto vhost_dev_init_failed; + } + + qemu_chr_fe_set_handlers(&rng->chardev, NULL, NULL, vu_rng_event, NULL, + dev, NULL, true); + + return; + +vhost_dev_init_failed: + virtio_delete_queue(rng->req_vq); +virtio_add_queue_failed: + virtio_cleanup(vdev); + vhost_user_cleanup(&rng->vhost_user); +} + +static void vu_rng_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(dev); + + vu_rng_set_status(vdev, 0); + + vhost_dev_cleanup(&rng->vhost_dev); + g_free(rng->vhost_dev.vqs); + rng->vhost_dev.vqs = NULL; + virtio_delete_queue(rng->req_vq); + virtio_cleanup(vdev); + vhost_user_cleanup(&rng->vhost_user); +} + +static const VMStateDescription vu_rng_vmstate = { + .name = "vhost-user-rng", + .unmigratable = 1, +}; + +static Property vu_rng_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserRNG, chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vu_rng_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vu_rng_properties); + dc->vmsd = &vu_rng_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + + vdc->realize = vu_rng_device_realize; + vdc->unrealize = vu_rng_device_unrealize; + vdc->get_features = vu_rng_get_features; + vdc->set_status = vu_rng_set_status; + vdc->guest_notifier_mask = vu_rng_guest_notifier_mask; + vdc->guest_notifier_pending = vu_rng_guest_notifier_pending; +} + +static const TypeInfo vu_rng_info = { + .name = TYPE_VHOST_USER_RNG, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserRNG), + .class_init = vu_rng_class_init, +}; + +static void vu_rng_register_types(void) +{ + type_register_static(&vu_rng_info); +} + +type_init(vu_rng_register_types) diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index a6f08c26b9a..52bd682c34d 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -34,10 +34,12 @@ static void vuv_get_config(VirtIODevice *vdev, uint8_t *config) static int vuv_handle_config_change(struct vhost_dev *dev) { VHostUserVSock *vsock = VHOST_USER_VSOCK(dev->vdev); + Error *local_err = NULL; int ret = vhost_dev_get_config(dev, (uint8_t *)&vsock->vsockcfg, - sizeof(struct virtio_vsock_config)); + sizeof(struct virtio_vsock_config), + &local_err); if (ret < 0) { - error_report("get config space failed"); + error_report_err(local_err); return -1; } @@ -79,7 +81,9 @@ static uint64_t vuv_get_features(VirtIODevice *vdev, { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - return vhost_get_features(&vvc->vhost_dev, user_feature_bits, features); + features = vhost_get_features(&vvc->vhost_dev, user_feature_bits, features); + + return vhost_vsock_common_get_features(vdev, features, errp); } static const VMStateDescription vuv_vmstate = { @@ -108,16 +112,14 @@ static void vuv_device_realize(DeviceState *dev, Error **errp) vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops); ret = vhost_dev_init(&vvc->vhost_dev, &vsock->vhost_user, - VHOST_BACKEND_TYPE_USER, 0); + VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { - error_setg_errno(errp, -ret, "vhost_dev_init failed"); goto err_virtio; } ret = vhost_dev_get_config(&vvc->vhost_dev, (uint8_t *)&vsock->vsockcfg, - sizeof(struct virtio_vsock_config)); + sizeof(struct virtio_vsock_config), errp); if (ret < 0) { - error_setg_errno(errp, -ret, "get config space failed"); goto err_vhost_dev; } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index ded0c104530..bf6e50223cb 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -303,7 +303,7 @@ struct vhost_user_read_cb_data { int ret; }; -static gboolean vhost_user_read_cb(GIOChannel *source, GIOCondition condition, +static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, gpointer opaque) { struct vhost_user_read_cb_data *data = opaque; @@ -429,7 +429,7 @@ static int process_message_reply(struct vhost_dev *dev, } if (msg_reply.hdr.request != msg->hdr.request) { - error_report("Received unexpected msg type." + error_report("Received unexpected msg type. " "Expected %d received %d", msg->hdr.request, msg_reply.hdr.request); return -1; @@ -1095,23 +1095,6 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, return 0; } -static int vhost_user_set_vring_addr(struct vhost_dev *dev, - struct vhost_vring_addr *addr) -{ - VhostUserMsg msg = { - .hdr.request = VHOST_USER_SET_VRING_ADDR, - .hdr.flags = VHOST_USER_VERSION, - .payload.addr = *addr, - .hdr.size = sizeof(msg.payload.addr), - }; - - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; - } - - return 0; -} - static int vhost_user_set_vring_endian(struct vhost_dev *dev, struct vhost_vring_state *ring) { @@ -1288,72 +1271,150 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev, return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); } -static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) + +static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) { VhostUserMsg msg = { .hdr.request = request, .hdr.flags = VHOST_USER_VERSION, - .payload.u64 = u64, - .hdr.size = sizeof(msg.payload.u64), }; + if (vhost_user_one_time_request(request) && dev->vq_index != 0) { + return 0; + } + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { return -1; } + if (vhost_user_read(dev, &msg) < 0) { + return -1; + } + + if (msg.hdr.request != request) { + error_report("Received unexpected msg type. Expected %d received %d", + request, msg.hdr.request); + return -1; + } + + if (msg.hdr.size != sizeof(msg.payload.u64)) { + error_report("Received bad msg size."); + return -1; + } + + *u64 = msg.payload.u64; + return 0; } -static int vhost_user_set_features(struct vhost_dev *dev, - uint64_t features) +static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) { - return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); + if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { + return -EPROTO; + } + + return 0; } -static int vhost_user_set_protocol_features(struct vhost_dev *dev, - uint64_t features) +static int enforce_reply(struct vhost_dev *dev, + const VhostUserMsg *msg) { - return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); + uint64_t dummy; + + if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { + return process_message_reply(dev, msg); + } + + /* + * We need to wait for a reply but the backend does not + * support replies for the command we just sent. + * Send VHOST_USER_GET_FEATURES which makes all backends + * send a reply. + */ + return vhost_user_get_features(dev, &dummy); } -static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) +static int vhost_user_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) { VhostUserMsg msg = { - .hdr.request = request, + .hdr.request = VHOST_USER_SET_VRING_ADDR, .hdr.flags = VHOST_USER_VERSION, + .payload.addr = *addr, + .hdr.size = sizeof(msg.payload.addr), }; - if (vhost_user_one_time_request(request) && dev->vq_index != 0) { - return 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + /* + * wait for a reply if logging is enabled to make sure + * backend is actually logging changes + */ + bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); + + if (reply_supported && wait_for_reply) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } if (vhost_user_write(dev, &msg, NULL, 0) < 0) { return -1; } - if (vhost_user_read(dev, &msg) < 0) { - return -1; + if (wait_for_reply) { + return enforce_reply(dev, &msg); } - if (msg.hdr.request != request) { - error_report("Received unexpected msg type. Expected %d received %d", - request, msg.hdr.request); - return -1; + return 0; +} + +static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, + bool wait_for_reply) +{ + VhostUserMsg msg = { + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, + .payload.u64 = u64, + .hdr.size = sizeof(msg.payload.u64), + }; + + if (wait_for_reply) { + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } } - if (msg.hdr.size != sizeof(msg.payload.u64)) { - error_report("Received bad msg size."); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { return -1; } - *u64 = msg.payload.u64; + if (wait_for_reply) { + return enforce_reply(dev, &msg); + } return 0; } -static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) +static int vhost_user_set_features(struct vhost_dev *dev, + uint64_t features) +{ + /* + * wait for a reply if logging is enabled to make sure + * backend is actually logging changes + */ + bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); + + return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, + log_enabled); +} + +static int vhost_user_set_protocol_features(struct vhost_dev *dev, + uint64_t features) { - return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); + return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, + false); } static int vhost_user_set_owner(struct vhost_dev *dev) @@ -1364,7 +1425,7 @@ static int vhost_user_set_owner(struct vhost_dev *dev) }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + return -EPROTO; } return 0; @@ -1465,11 +1526,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", user, queue_idx); - memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, - page_size, addr); + if (!n->mr.ram) /* Don't init again after suspend. */ + memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, + page_size, addr); g_free(name); if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { + object_unparent(OBJECT(&n->mr)); munmap(addr, page_size); return -1; } @@ -1856,7 +1919,8 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, return 0; } -static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) +static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + Error **errp) { uint64_t features, protocol_features, ram_slots; struct vhost_user *u; @@ -1871,6 +1935,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) err = vhost_user_get_features(dev, &features); if (err < 0) { + error_setg_errno(errp, -err, "vhost_backend_init failed"); return err; } @@ -1880,7 +1945,8 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features); if (err < 0) { - return err; + error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); + return -EPROTO; } dev->protocol_features = @@ -1891,14 +1957,15 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); } else if (!(protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { - error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " - "but backend does not support it."); - return -1; + error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " + "but backend does not support it."); + return -EINVAL; } err = vhost_user_set_protocol_features(dev, dev->protocol_features); if (err < 0) { - return err; + error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); + return -EPROTO; } /* query the max queues we support if backend supports Multiple Queue */ @@ -1906,8 +1973,17 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues); if (err < 0) { - return err; + error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); + return -EPROTO; } + } else { + dev->max_queues = 1; + } + + if (dev->num_queues && dev->max_queues < dev->num_queues) { + error_setg(errp, "The maximum number of queues supported by the " + "backend is %" PRIu64, dev->max_queues); + return -EINVAL; } if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && @@ -1915,9 +1991,9 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) VHOST_USER_PROTOCOL_F_SLAVE_REQ) && virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { - error_report("IOMMU support requires reply-ack and " - "slave-req protocol features."); - return -1; + error_setg(errp, "IOMMU support requires reply-ack and " + "slave-req protocol features."); + return -EINVAL; } /* get max memory regions if backend supports configurable RAM slots */ @@ -1927,15 +2003,16 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) } else { err = vhost_user_get_max_memslots(dev, &ram_slots); if (err < 0) { - return err; + error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); + return -EPROTO; } if (ram_slots < u->user->memory_slots) { - error_report("The backend specified a max ram slots limit " - "of %" PRIu64", when the prior validated limit was %d. " - "This limit should never decrease.", ram_slots, - u->user->memory_slots); - return -1; + error_setg(errp, "The backend specified a max ram slots limit " + "of %" PRIu64", when the prior validated limit was " + "%d. This limit should never decrease.", ram_slots, + u->user->memory_slots); + return -EINVAL; } u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); @@ -1953,7 +2030,8 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) if (dev->vq_index == 0) { err = vhost_setup_slave_channel(dev); if (err < 0) { - return err; + error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); + return -EPROTO; } } @@ -2107,7 +2185,7 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) } static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, - uint32_t config_len) + uint32_t config_len, Error **errp) { VhostUserMsg msg = { .hdr.request = VHOST_USER_GET_CONFIG, @@ -2117,32 +2195,34 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, if (!virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CONFIG)) { - return -1; + error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); + return -EINVAL; } - if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { - return -1; - } + assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); msg.payload.config.offset = 0; msg.payload.config.size = config_len; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; + error_setg_errno(errp, EPROTO, "vhost_get_config failed"); + return -EPROTO; } if (vhost_user_read(dev, &msg) < 0) { - return -1; + error_setg_errno(errp, EPROTO, "vhost_get_config failed"); + return -EPROTO; } if (msg.hdr.request != VHOST_USER_GET_CONFIG) { - error_report("Received unexpected msg type. Expected %d received %d", - VHOST_USER_GET_CONFIG, msg.hdr.request); - return -1; + error_setg(errp, + "Received unexpected msg type. Expected %d received %d", + VHOST_USER_GET_CONFIG, msg.hdr.request); + return -EINVAL; } if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { - error_report("Received bad msg size."); - return -1; + error_setg(errp, "Received bad msg size."); + return -EINVAL; } memcpy(config, msg.payload.config.region, config_len); @@ -2401,7 +2481,7 @@ void vhost_user_cleanup(VhostUserState *user) if (!user->chr) { return; } - + memory_region_transaction_begin(); for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { if (user->notifier[i].addr) { object_unparent(OBJECT(&user->notifier[i].mr)); @@ -2409,6 +2489,7 @@ void vhost_user_cleanup(VhostUserState *user) user->notifier[i].addr = NULL; } } + memory_region_transaction_commit(); user->chr = NULL; } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 01d2101d097..bcaf00e09f3 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -18,22 +18,55 @@ #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio-net.h" #include "hw/virtio/vhost-vdpa.h" +#include "exec/address-spaces.h" #include "qemu/main-loop.h" #include "cpu.h" #include "trace.h" #include "qemu-common.h" -static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) +/* + * Return one past the end of the end of section. Be careful with uint64_t + * conversions! + */ +static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) { - return (!memory_region_is_ram(section->mr) && - !memory_region_is_iommu(section->mr)) || - /* - * Sizing an enabled 64-bit BAR can cause spurious mappings to - * addresses in the upper part of the 64-bit address space. These - * are never accessed by the CPU and beyond the address width of - * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. - */ - section->offset_within_address_space & (1ULL << 63); + Int128 llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + + return llend; +} + +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, + uint64_t iova_min, + uint64_t iova_max) +{ + Int128 llend; + + if ((!memory_region_is_ram(section->mr) && + !memory_region_is_iommu(section->mr)) || + memory_region_is_protected(section->mr) || + /* vhost-vDPA doesn't allow MMIO to be mapped */ + memory_region_is_ram_device(section->mr)) { + return true; + } + + if (section->offset_within_address_space < iova_min) { + error_report("RAM section out of device range (min=0x%" PRIx64 + ", addr=0x%" HWADDR_PRIx ")", + iova_min, section->offset_within_address_space); + return true; + } + + llend = vhost_vdpa_section_end(section); + if (int128_gt(llend, int128_make64(iova_max))) { + error_report("RAM section out of device range (max=0x%" PRIx64 + ", end addr=0x%" PRIx64 ")", + iova_max, int128_get64(llend)); + return true; + } + + return false; } static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, @@ -86,19 +119,13 @@ static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, return ret; } -static void vhost_vdpa_listener_begin(MemoryListener *listener) +static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); - struct vhost_dev *dev = v->dev; - struct vhost_msg_v2 msg = {}; int fd = v->device_fd; - - if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { - return; - } - - msg.type = v->msg_type; - msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN; + struct vhost_msg_v2 msg = { + .type = v->msg_type, + .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, + }; if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", @@ -106,6 +133,16 @@ static void vhost_vdpa_listener_begin(MemoryListener *listener) } } +static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) +{ + if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && + !v->iotlb_batch_begin_sent) { + vhost_vdpa_listener_begin_batch(v); + } + + v->iotlb_batch_begin_sent = true; +} + static void vhost_vdpa_listener_commit(MemoryListener *listener) { struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); @@ -117,6 +154,10 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) return; } + if (!v->iotlb_batch_begin_sent) { + return; + } + msg.type = v->msg_type; msg.iotlb.type = VHOST_IOTLB_BATCH_END; @@ -124,6 +165,8 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) error_report("failed to write, fd=%d, errno=%d (%s)", fd, errno, strerror(errno)); } + + v->iotlb_batch_begin_sent = false; } static void vhost_vdpa_listener_region_add(MemoryListener *listener, @@ -135,7 +178,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, void *vaddr; int ret; - if (vhost_vdpa_listener_skipped_section(section)) { + if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, + v->iova_range.last)) { return; } @@ -146,10 +190,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, } iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - llend = int128_make64(section->offset_within_address_space); - llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); - + llend = vhost_vdpa_section_end(section); if (int128_ge(int128_make64(iova), llend)) { return; } @@ -167,26 +208,17 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); + vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), vaddr, section->readonly); if (ret) { error_report("vhost vdpa map fail!"); - if (memory_region_is_ram_device(section->mr)) { - /* Allow unexpected mappings not to be fatal for RAM devices */ - error_report("map ram fail!"); - return ; - } goto fail; } return; fail: - if (memory_region_is_ram_device(section->mr)) { - error_report("failed to vdpa_dma_map. pci p2p may not work"); - return; - - } /* * On the initfn path, store the first error in the container so we * can gracefully fail. Runtime, there's not much we can do other @@ -205,7 +237,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, Int128 llend, llsize; int ret; - if (vhost_vdpa_listener_skipped_section(section)) { + if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, + v->iova_range.last)) { return; } @@ -216,9 +249,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, } iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - llend = int128_make64(section->offset_within_address_space); - llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + llend = vhost_vdpa_section_end(section); trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); @@ -228,6 +259,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); + vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); if (ret) { error_report("vhost_vdpa dma unmap error!"); @@ -241,7 +273,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, * depends on the addnop(). */ static const MemoryListener vhost_vdpa_memory_listener = { - .begin = vhost_vdpa_listener_begin, + .name = "vhost-vdpa", .commit = vhost_vdpa_listener_commit, .region_add = vhost_vdpa_listener_region_add, .region_del = vhost_vdpa_listener_region_del, @@ -252,10 +284,12 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, { struct vhost_vdpa *v = dev->opaque; int fd = v->device_fd; + int ret; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); - return ioctl(fd, request, arg); + ret = ioctl(fd, request, arg); + return ret < 0 ? -errno : ret; } static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) @@ -272,36 +306,155 @@ static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); } -static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) +static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) +{ + int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, + &v->iova_range); + if (ret != 0) { + v->iova_range.first = 0; + v->iova_range.last = UINT64_MAX; + } + + trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, + v->iova_range.last); +} + +static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + return v->index != 0; +} + +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v; - uint64_t features; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); trace_vhost_vdpa_init(dev, opaque); + int ret; + + /* + * Similar to VFIO, we end up pinning all guest memory and have to + * disable discarding of RAM. + */ + ret = ram_block_discard_disable(true); + if (ret) { + error_report("Cannot set discarding of RAM broken"); + return ret; + } v = opaque; v->dev = dev; dev->opaque = opaque ; - vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); - dev->backend_features = features; v->listener = vhost_vdpa_memory_listener; v->msg_type = VHOST_IOTLB_MSG_V2; + vhost_vdpa_get_iova_range(v); + + if (vhost_vdpa_one_time_request(dev)) { + return 0; + } + vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); return 0; } +static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, + int queue_index) +{ + size_t page_size = qemu_real_host_page_size; + struct vhost_vdpa *v = dev->opaque; + VirtIODevice *vdev = dev->vdev; + VhostVDPAHostNotifier *n; + + n = &v->notifier[queue_index]; + + if (n->addr) { + virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); + object_unparent(OBJECT(&n->mr)); + munmap(n->addr, page_size); + n->addr = NULL; + } +} + +static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) +{ + int i; + + for (i = 0; i < n; i++) { + vhost_vdpa_host_notifier_uninit(dev, i); + } +} + +static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) +{ + size_t page_size = qemu_real_host_page_size; + struct vhost_vdpa *v = dev->opaque; + VirtIODevice *vdev = dev->vdev; + VhostVDPAHostNotifier *n; + int fd = v->device_fd; + void *addr; + char *name; + + vhost_vdpa_host_notifier_uninit(dev, queue_index); + + n = &v->notifier[queue_index]; + + addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, + queue_index * page_size); + if (addr == MAP_FAILED) { + goto err; + } + + name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", + v, queue_index); + memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, + page_size, addr); + g_free(name); + + if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { + munmap(addr, page_size); + goto err; + } + n->addr = addr; + + return 0; + +err: + return -1; +} + +static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) +{ + int i; + + for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { + if (vhost_vdpa_host_notifier_init(dev, i)) { + goto err; + } + } + + return; + +err: + vhost_vdpa_host_notifiers_uninit(dev, i); + return; +} + static int vhost_vdpa_cleanup(struct vhost_dev *dev) { struct vhost_vdpa *v; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); v = dev->opaque; trace_vhost_vdpa_cleanup(dev, v); + vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); memory_listener_unregister(&v->listener); dev->opaque = NULL; + ram_block_discard_disable(false); + return 0; } @@ -314,6 +467,10 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { + if (vhost_vdpa_one_time_request(dev)) { + return 0; + } + trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { @@ -337,6 +494,11 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, uint64_t features) { int ret; + + if (vhost_vdpa_one_time_request(dev)) { + return 0; + } + trace_vhost_vdpa_set_features(dev, features); ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); uint8_t status = 0; @@ -357,13 +519,16 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) int r; if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { - return 0; + return -EFAULT; } features &= f; - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return 0; + + if (vhost_vdpa_one_time_request(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; + } } dev->backend_cap = features; @@ -371,8 +536,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) return 0; } -int vhost_vdpa_get_device_id(struct vhost_dev *dev, - uint32_t *device_id) +static int vhost_vdpa_get_device_id(struct vhost_dev *dev, + uint32_t *device_id) { int ret; ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); @@ -394,8 +559,8 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) { assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); - trace_vhost_vdpa_get_vq_index(dev, idx, idx - dev->vq_index); - return idx - dev->vq_index; + trace_vhost_vdpa_get_vq_index(dev, idx, idx); + return idx; } static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) @@ -448,7 +613,7 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, } static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, - uint32_t config_len) + uint32_t config_len, Error **errp) { struct vhost_vdpa_config *v_config; unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); @@ -472,10 +637,21 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) { struct vhost_vdpa *v = dev->opaque; trace_vhost_vdpa_dev_start(dev, started); + + if (started) { + vhost_vdpa_host_notifiers_init(dev); + vhost_vdpa_set_vring_ready(dev); + } else { + vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); + } + + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { + return 0; + } + if (started) { uint8_t status = 0; memory_listener_register(&v->listener, &address_space_memory); - vhost_vdpa_set_vring_ready(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); @@ -493,6 +669,10 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { + if (vhost_vdpa_one_time_request(dev)) { + return 0; + } + trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, log->log); return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); @@ -558,6 +738,10 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { + if (vhost_vdpa_one_time_request(dev)) { + return 0; + } + trace_vhost_vdpa_set_owner(dev); return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); } diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index 4ad6e234adf..3f3771274e7 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -18,6 +18,30 @@ #include "qemu/iov.h" #include "monitor/monitor.h" +const int feature_bits[] = { + VIRTIO_VSOCK_F_SEQPACKET, + VHOST_INVALID_FEATURE_BIT +}; + +uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + if (vvc->seqpacket != ON_OFF_AUTO_OFF) { + virtio_add_feature(&features, VIRTIO_VSOCK_F_SEQPACKET); + } + + features = vhost_get_features(&vvc->vhost_dev, feature_bits, features); + + if (vvc->seqpacket == ON_OFF_AUTO_ON && + !virtio_has_feature(features, VIRTIO_VSOCK_F_SEQPACKET)) { + error_setg(errp, "vhost-vsock backend doesn't support seqpacket"); + } + + return features; +} + int vhost_vsock_common_start(VirtIODevice *vdev) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); @@ -231,11 +255,18 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev) virtio_cleanup(vdev); } +static Property vhost_vsock_common_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_END_OF_LIST(), +}; + static void vhost_vsock_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + device_class_set_props(dc, vhost_vsock_common_properties); set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask; vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending; diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 8ddfb9abfe1..478c0c9a878 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -108,8 +108,7 @@ static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, uint64_t requested_features, Error **errp) { - /* No feature bits used yet */ - return requested_features; + return vhost_vsock_common_get_features(vdev, requested_features, errp); } static const VMStateDescription vmstate_virtio_vhost_vsock = { @@ -170,9 +169,8 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) vhost_vsock_common_realize(vdev, "vhost-vsock"); ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd, - VHOST_BACKEND_TYPE_KERNEL, 0); + VHOST_BACKEND_TYPE_KERNEL, 0, errp); if (ret < 0) { - error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed"); goto err_virtio; } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index e2163a0d63e..437347ad01c 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -21,7 +21,6 @@ #include "qemu/error-report.h" #include "qemu/memfd.h" #include "standard-headers/linux/vhost_types.h" -#include "exec/address-spaces.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "migration/blocker.h" @@ -175,6 +174,35 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) return log_size; } +static int vhost_set_backend_type(struct vhost_dev *dev, + VhostBackendType backend_type) +{ + int r = 0; + + switch (backend_type) { +#ifdef CONFIG_VHOST_KERNEL + case VHOST_BACKEND_TYPE_KERNEL: + dev->vhost_ops = &kernel_ops; + break; +#endif +#ifdef CONFIG_VHOST_USER + case VHOST_BACKEND_TYPE_USER: + dev->vhost_ops = &user_ops; + break; +#endif +#ifdef CONFIG_VHOST_VDPA + case VHOST_BACKEND_TYPE_VDPA: + dev->vhost_ops = &vdpa_ops; + break; +#endif + default: + error_report("Unknown vhost backend type"); + r = -1; + } + + return r; +} + static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) { Error *err = NULL; @@ -287,7 +315,7 @@ static int vhost_dev_has_iommu(struct vhost_dev *dev) * does not have IOMMU, there's no need to enable this feature * which may cause unnecessary IOTLB miss/update trnasactions. */ - return vdev->dma_as != &address_space_memory && + return virtio_bus_device_iommu_enabled(vdev) && virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); } @@ -1287,11 +1315,11 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) } int vhost_dev_init(struct vhost_dev *hdev, void *opaque, - VhostBackendType backend_type, uint32_t busyloop_timeout) + VhostBackendType backend_type, uint32_t busyloop_timeout, + Error **errp) { uint64_t features; int i, r, n_initialized_vqs = 0; - Error *local_err = NULL; hdev->vdev = NULL; hdev->migration_blocker = NULL; @@ -1299,26 +1327,27 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, r = vhost_set_backend_type(hdev, backend_type); assert(r >= 0); - r = hdev->vhost_ops->vhost_backend_init(hdev, opaque); + r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp); if (r < 0) { goto fail; } r = hdev->vhost_ops->vhost_set_owner(hdev); if (r < 0) { - VHOST_OPS_DEBUG("vhost_set_owner failed"); + error_setg_errno(errp, -r, "vhost_set_owner failed"); goto fail; } r = hdev->vhost_ops->vhost_get_features(hdev, &features); if (r < 0) { - VHOST_OPS_DEBUG("vhost_get_features failed"); + error_setg_errno(errp, -r, "vhost_get_features failed"); goto fail; } for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) { r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i); if (r < 0) { + error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i); goto fail; } } @@ -1328,6 +1357,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, busyloop_timeout); if (r < 0) { + error_setg_errno(errp, -r, "Failed to set busyloop timeout"); goto fail_busyloop; } } @@ -1336,6 +1366,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, hdev->features = features; hdev->memory_listener = (MemoryListener) { + .name = "vhost", .begin = vhost_begin, .commit = vhost_commit, .region_add = vhost_region_addnop, @@ -1351,6 +1382,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, }; hdev->iommu_listener = (MemoryListener) { + .name = "vhost-iommu", .region_add = vhost_iommu_region_add, .region_del = vhost_iommu_region_del, }; @@ -1366,9 +1398,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, } if (hdev->migration_blocker != NULL) { - r = migrate_add_blocker(hdev->migration_blocker, &local_err); - if (local_err) { - error_report_err(local_err); + r = migrate_add_blocker(hdev->migration_blocker, errp); + if (r < 0) { error_free(hdev->migration_blocker); goto fail_busyloop; } @@ -1385,9 +1416,9 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { - error_report("vhost backend memory slots limit is less" - " than current number of present memory slots"); - r = -1; + error_setg(errp, "vhost backend memory slots limit is less" + " than current number of present memory slots"); + r = -EINVAL; goto fail_busyloop; } @@ -1558,15 +1589,17 @@ void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, } int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, - uint32_t config_len) + uint32_t config_len, Error **errp) { assert(hdev->vhost_ops); if (hdev->vhost_ops->vhost_get_config) { - return hdev->vhost_ops->vhost_get_config(hdev, config, config_len); + return hdev->vhost_ops->vhost_get_config(hdev, config, config_len, + errp); } - return -1; + error_setg(errp, "vhost_get_config not implemented"); + return -ENOTSUP; } int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data, diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index d120bf8f438..9a4f491b54d 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -30,6 +30,7 @@ #include "trace.h" #include "qemu/error-report.h" #include "migration/misc.h" +#include "migration/migration.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" @@ -230,7 +231,7 @@ static void balloon_stats_poll_cb(void *opaque) return; } - virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset); + virtqueue_push(s->svq, s->stats_vq_elem, 0); virtio_notify(vdev, s->svq); g_free(s->stats_vq_elem); s->stats_vq_elem = NULL; @@ -437,7 +438,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) memory_region_unref(section.mr); } - virtqueue_push(vq, elem, offset); + virtqueue_push(vq, elem, 0); virtio_notify(vdev, vq); g_free(elem); virtio_balloon_pbp_free(&pbp); @@ -509,6 +510,7 @@ static bool get_free_page_hints(VirtIOBalloon *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtQueue *vq = dev->free_page_vq; bool ret = true; + int i; while (dev->block_iothread) { qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock); @@ -533,26 +535,24 @@ static bool get_free_page_hints(VirtIOBalloon *dev) if (dev->free_page_hint_status == FREE_PAGE_HINT_S_REQUESTED && id == dev->free_page_hint_cmd_id) { dev->free_page_hint_status = FREE_PAGE_HINT_S_START; - } else { + } else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) { /* * Stop the optimization only when it has started. This * avoids a stale stop sign for the previous command. */ - if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) { - dev->free_page_hint_status = FREE_PAGE_HINT_S_STOP; - } + dev->free_page_hint_status = FREE_PAGE_HINT_S_STOP; } } - if (elem->in_num) { - if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) { - qemu_guest_free_page_hint(elem->in_sg[0].iov_base, - elem->in_sg[0].iov_len); + if (elem->in_num && dev->free_page_hint_status == FREE_PAGE_HINT_S_START) { + for (i = 0; i < elem->in_num; i++) { + qemu_guest_free_page_hint(elem->in_sg[i].iov_base, + elem->in_sg[i].iov_len); } } out: - virtqueue_push(vq, elem, 1); + virtqueue_push(vq, elem, 0); g_free(elem); return ret; } @@ -591,16 +591,10 @@ static void virtio_balloon_free_page_start(VirtIOBalloon *s) { VirtIODevice *vdev = VIRTIO_DEVICE(s); - /* For the stop and copy phase, we don't need to start the optimization */ - if (!vdev->vm_running) { - return; - } - qemu_mutex_lock(&s->free_page_lock); if (s->free_page_hint_cmd_id == UINT_MAX) { - s->free_page_hint_cmd_id = - VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN; + s->free_page_hint_cmd_id = VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN; } else { s->free_page_hint_cmd_id++; } @@ -648,8 +642,7 @@ static void virtio_balloon_free_page_done(VirtIOBalloon *s) static int virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data) { - VirtIOBalloon *dev = container_of(n, VirtIOBalloon, - free_page_hint_notify); + VirtIOBalloon *dev = container_of(n, VirtIOBalloon, free_page_hint_notify); VirtIODevice *vdev = VIRTIO_DEVICE(dev); PrecopyNotifyData *pnd = data; @@ -662,10 +655,19 @@ virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data) return 0; } + /* + * Pages hinted via qemu_guest_free_page_hint() are cleared from the dirty + * bitmap and will not get migrated, especially also not when the postcopy + * destination starts using them and requests migration from the source; the + * faulting thread will stall until postcopy migration finishes and + * all threads are woken up. Let's not start free page hinting if postcopy + * is possible. + */ + if (migrate_postcopy_ram()) { + return 0; + } + switch (pnd->reason) { - case PRECOPY_NOTIFY_SETUP: - precopy_enable_free_page_optimization(); - break; case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC: virtio_balloon_free_page_stop(dev); break; @@ -685,6 +687,7 @@ virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data) */ virtio_balloon_free_page_done(dev); break; + case PRECOPY_NOTIFY_SETUP: case PRECOPY_NOTIFY_COMPLETE: break; default: @@ -852,7 +855,7 @@ static const VMStateDescription vmstate_virtio_balloon_free_page_hint = { }; static const VMStateDescription vmstate_virtio_balloon_page_poison = { - .name = "vitio-balloon-device/page-poison", + .name = "virtio-balloon-device/page-poison", .version_id = 1, .minimum_version_id = 1, .needed = virtio_balloon_page_poison_support, @@ -908,8 +911,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); - if (virtio_has_feature(s->host_features, - VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, virtio_balloon_handle_free_page_vq); precopy_add_notifier(&s->free_page_hint_notify); diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index d6332d45c3b..d23db98c568 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -69,6 +69,11 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) return; } + if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { + error_setg(errp, "iommu_platform=true is not supported by the device"); + return; + } + if (klass->device_plugged != NULL) { klass->device_plugged(qbus->parent, &local_err); } @@ -320,6 +325,20 @@ static char *virtio_bus_get_fw_dev_path(DeviceState *dev) return NULL; } +bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev) +{ + DeviceState *qdev = DEVICE(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(qdev)); + VirtioBusState *bus = VIRTIO_BUS(qbus); + VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus); + + if (!klass->iommu_enabled) { + return false; + } + + return klass->iommu_enabled(qbus->parent); +} + static void virtio_bus_class_init(ObjectClass *klass, void *data) { BusClass *bus_class = BUS_CLASS(klass); diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c index 770c286be73..a160ae6b413 100644 --- a/hw/virtio/virtio-iommu-pci.c +++ b/hw/virtio/virtio-iommu-pci.c @@ -98,9 +98,7 @@ static void virtio_iommu_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_iommu_pci_info = { - .base_name = TYPE_VIRTIO_IOMMU_PCI, - .generic_name = "virtio-iommu-pci", - .non_transitional_name = "virtio-iommu-pci-non-transitional", + .generic_name = TYPE_VIRTIO_IOMMU_PCI, .instance_size = sizeof(VirtIOIOMMUPCI), .instance_init = virtio_iommu_pci_instance_init, .class_init = virtio_iommu_pci_class_init, diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c index fa5395cd885..be2383b0c52 100644 --- a/hw/virtio/virtio-mem-pci.c +++ b/hw/virtio/virtio-mem-pci.c @@ -87,14 +87,12 @@ static void virtio_mem_pci_size_change_notify(Notifier *notifier, void *data) VirtIOMEMPCI *pci_mem = container_of(notifier, VirtIOMEMPCI, size_change_notifier); DeviceState *dev = DEVICE(pci_mem); + char *qom_path = object_get_canonical_path(OBJECT(dev)); const uint64_t * const size_p = data; - const char *id = NULL; - if (dev->id) { - id = g_strdup(dev->id); - } - - qapi_event_send_memory_device_size_change(!!id, id, *size_p); + qapi_event_send_memory_device_size_change(!!dev->id, dev->id, *size_p, + qom_path); + g_free(qom_path); } static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 655824ff81a..d5a578142b7 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -145,7 +145,205 @@ static bool virtio_mem_is_busy(void) return migration_in_incoming_postcopy() || !migration_is_idle(); } -static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, +typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg, + uint64_t offset, uint64_t size); + +static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg, + virtio_mem_range_cb cb) +{ + unsigned long first_zero_bit, last_zero_bit; + uint64_t offset, size; + int ret = 0; + + first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); + while (first_zero_bit < vmem->bitmap_size) { + offset = first_zero_bit * vmem->block_size; + last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, + first_zero_bit + 1) - 1; + size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; + + ret = cb(vmem, arg, offset, size); + if (ret) { + break; + } + first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, + last_zero_bit + 2); + } + return ret; +} + +/* + * Adjust the memory section to cover the intersection with the given range. + * + * Returns false if the intersection is empty, otherwise returns true. + */ +static bool virito_mem_intersect_memory_section(MemoryRegionSection *s, + uint64_t offset, uint64_t size) +{ + uint64_t start = MAX(s->offset_within_region, offset); + uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), + offset + size); + + if (end <= start) { + return false; + } + + s->offset_within_address_space += start - s->offset_within_region; + s->offset_within_region = start; + s->size = int128_make64(end - start); + return true; +} + +typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); + +static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, + MemoryRegionSection *s, + void *arg, + virtio_mem_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + int ret = 0; + + first_bit = s->offset_within_region / vmem->bitmap_size; + first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit); + while (first_bit < vmem->bitmap_size) { + MemoryRegionSection tmp = *s; + + offset = first_bit * vmem->block_size; + last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * vmem->block_size; + + if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { + break; + } + ret = cb(&tmp, arg); + if (ret) { + break; + } + first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, + last_bit + 2); + } + return ret; +} + +static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, + MemoryRegionSection *s, + void *arg, + virtio_mem_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + int ret = 0; + + first_bit = s->offset_within_region / vmem->bitmap_size; + first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit); + while (first_bit < vmem->bitmap_size) { + MemoryRegionSection tmp = *s; + + offset = first_bit * vmem->block_size; + last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * vmem->block_size; + + if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { + break; + } + ret = cb(&tmp, arg); + if (ret) { + break; + } + first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, + last_bit + 2); + } + return ret; +} + +static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg) +{ + RamDiscardListener *rdl = arg; + + return rdl->notify_populate(rdl, s); +} + +static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg) +{ + RamDiscardListener *rdl = arg; + + rdl->notify_discard(rdl, s); + return 0; +} + +static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, + uint64_t size) +{ + RamDiscardListener *rdl; + + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { + continue; + } + rdl->notify_discard(rdl, &tmp); + } +} + +static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, + uint64_t size) +{ + RamDiscardListener *rdl, *rdl2; + int ret = 0; + + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { + continue; + } + ret = rdl->notify_populate(rdl, &tmp); + if (ret) { + break; + } + } + + if (ret) { + /* Notify all already-notified listeners. */ + QLIST_FOREACH(rdl2, &vmem->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (rdl2 == rdl) { + break; + } + if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { + continue; + } + rdl2->notify_discard(rdl2, &tmp); + } + } + return ret; +} + +static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) +{ + RamDiscardListener *rdl; + + if (!vmem->size) { + return; + } + + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { + if (rdl->double_discard_supported) { + rdl->notify_discard(rdl, rdl->section); + } else { + virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + virtio_mem_notify_discard_cb); + } + } +} + +static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa, uint64_t size, bool plugged) { const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; @@ -198,7 +396,8 @@ static void virtio_mem_send_response_simple(VirtIOMEM *vmem, virtio_mem_send_response(vmem, elem, &resp); } -static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size) +static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa, + uint64_t size) { if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) { return false; @@ -219,19 +418,21 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, uint64_t size, bool plug) { const uint64_t offset = start_gpa - vmem->addr; - int ret; + RAMBlock *rb = vmem->memdev->mr.ram_block; if (virtio_mem_is_busy()) { return -EBUSY; } if (!plug) { - ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); - if (ret) { - error_report("Unexpected error discarding RAM: %s", - strerror(-ret)); + if (ram_block_discard_range(rb, offset, size)) { return -EBUSY; } + virtio_mem_notify_unplug(vmem, offset, size); + } else if (virtio_mem_notify_plug(vmem, offset, size)) { + /* Could be a mapping attempt resulted in memory getting populated. */ + ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); + return -EBUSY; } virtio_mem_set_bitmap(vmem, start_gpa, size, plug); return 0; @@ -318,17 +519,16 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, static int virtio_mem_unplug_all(VirtIOMEM *vmem) { RAMBlock *rb = vmem->memdev->mr.ram_block; - int ret; if (virtio_mem_is_busy()) { return -EBUSY; } - ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); - if (ret) { - error_report("Unexpected error discarding RAM: %s", strerror(-ret)); + if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { return -EBUSY; } + virtio_mem_notify_unplug_all(vmem); + bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); if (vmem->size) { vmem->size = 0; @@ -551,7 +751,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) return; } - if (ram_block_discard_require(true)) { + if (ram_block_coordinated_discard_require(true)) { error_setg(errp, "Discarding RAM is disabled"); return; } @@ -559,7 +759,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); if (ret) { error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); - ram_block_discard_require(false); + ram_block_coordinated_discard_require(false); return; } @@ -576,7 +776,13 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) host_memory_backend_set_mapped(vmem->memdev, true); vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); qemu_register_reset(virtio_mem_system_reset, vmem); - precopy_add_notifier(&vmem->precopy_notifier); + + /* + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ + memory_region_set_ram_discard_manager(&vmem->memdev->mr, + RAM_DISCARD_MANAGER(vmem)); } static void virtio_mem_device_unrealize(DeviceState *dev) @@ -584,50 +790,58 @@ static void virtio_mem_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOMEM *vmem = VIRTIO_MEM(dev); - precopy_remove_notifier(&vmem->precopy_notifier); + /* + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); qemu_unregister_reset(virtio_mem_system_reset, vmem); vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); host_memory_backend_set_mapped(vmem->memdev, false); virtio_del_queue(vdev, 0); virtio_cleanup(vdev); g_free(vmem->bitmap); - ram_block_discard_require(false); + ram_block_coordinated_discard_require(false); } -static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) +static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg, + uint64_t offset, uint64_t size) { RAMBlock *rb = vmem->memdev->mr.ram_block; - unsigned long first_zero_bit, last_zero_bit; - uint64_t offset, length; - int ret; - /* Find consecutive unplugged blocks and discard the consecutive range. */ - first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); - while (first_zero_bit < vmem->bitmap_size) { - offset = first_zero_bit * vmem->block_size; - last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, - first_zero_bit + 1) - 1; - length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; + return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0; +} - ret = ram_block_discard_range(rb, offset, length); - if (ret) { - error_report("Unexpected error discarding RAM: %s", - strerror(-ret)); - return -EINVAL; - } - first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, - last_zero_bit + 2); - } - return 0; +static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) +{ + /* Make sure all memory is really discarded after migration. */ + return virtio_mem_for_each_unplugged_range(vmem, NULL, + virtio_mem_discard_range_cb); } static int virtio_mem_post_load(void *opaque, int version_id) { + VirtIOMEM *vmem = VIRTIO_MEM(opaque); + RamDiscardListener *rdl; + int ret; + + /* + * We started out with all memory discarded and our memory region is mapped + * into an address space. Replay, now that we updated the bitmap. + */ + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { + ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + virtio_mem_notify_populate_cb); + if (ret) { + return ret; + } + } + if (migration_in_incoming_postcopy()) { return 0; } - return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque)); + return virtio_mem_restore_unplugged(vmem); } typedef struct VirtIOMEMMigSanityChecks { @@ -702,6 +916,7 @@ static const VMStateDescription vmstate_virtio_mem_device = { .name = "virtio-mem-device", .minimum_version_id = 1, .version_id = 1, + .priority = MIG_PRI_VIRTIO_MEM, .post_load = virtio_mem_post_load, .fields = (VMStateField[]) { VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks, @@ -872,55 +1087,12 @@ static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, vmem->block_size = value; } -static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem) -{ - void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block); - unsigned long first_zero_bit, last_zero_bit; - uint64_t offset, length; - - /* - * Find consecutive unplugged blocks and exclude them from migration. - * - * Note: Blocks cannot get (un)plugged during precopy, no locking needed. - */ - first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); - while (first_zero_bit < vmem->bitmap_size) { - offset = first_zero_bit * vmem->block_size; - last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, - first_zero_bit + 1) - 1; - length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; - - qemu_guest_free_page_hint(host + offset, length); - first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, - last_zero_bit + 2); - } -} - -static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data) -{ - VirtIOMEM *vmem = container_of(n, VirtIOMEM, precopy_notifier); - PrecopyNotifyData *pnd = data; - - switch (pnd->reason) { - case PRECOPY_NOTIFY_SETUP: - precopy_enable_free_page_optimization(); - break; - case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: - virtio_mem_precopy_exclude_unplugged(vmem); - break; - default: - break; - } - - return 0; -} - static void virtio_mem_instance_init(Object *obj) { VirtIOMEM *vmem = VIRTIO_MEM(obj); notifier_list_init(&vmem->size_change_notifiers); - vmem->precopy_notifier.notify = virtio_mem_precopy_notify; + QLIST_INIT(&vmem->rdl_list); object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, NULL, NULL, NULL); @@ -940,11 +1112,132 @@ static Property virtio_mem_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, + const MemoryRegion *mr) +{ + const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + + g_assert(mr == &vmem->memdev->mr); + return vmem->block_size; +} + +static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, + const MemoryRegionSection *s) +{ + const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + uint64_t start_gpa = vmem->addr + s->offset_within_region; + uint64_t end_gpa = start_gpa + int128_get64(s->size); + + g_assert(s->mr == &vmem->memdev->mr); + + start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size); + end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size); + + if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) { + return false; + } + + return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true); +} + +struct VirtIOMEMReplayData { + void *fn; + void *opaque; +}; + +static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) +{ + struct VirtIOMEMReplayData *data = arg; + + return ((ReplayRamPopulate)data->fn)(s, data->opaque); +} + +static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamPopulate replay_fn, + void *opaque) +{ + const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + struct VirtIOMEMReplayData data = { + .fn = replay_fn, + .opaque = opaque, + }; + + g_assert(s->mr == &vmem->memdev->mr); + return virtio_mem_for_each_plugged_section(vmem, s, &data, + virtio_mem_rdm_replay_populated_cb); +} + +static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, + void *arg) +{ + struct VirtIOMEMReplayData *data = arg; + + ((ReplayRamDiscard)data->fn)(s, data->opaque); + return 0; +} + +static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamDiscard replay_fn, + void *opaque) +{ + const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + struct VirtIOMEMReplayData data = { + .fn = replay_fn, + .opaque = opaque, + }; + + g_assert(s->mr == &vmem->memdev->mr); + virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); +} + +static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl, + MemoryRegionSection *s) +{ + VirtIOMEM *vmem = VIRTIO_MEM(rdm); + int ret; + + g_assert(s->mr == &vmem->memdev->mr); + rdl->section = memory_region_section_new_copy(s); + + QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next); + ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + virtio_mem_notify_populate_cb); + if (ret) { + error_report("%s: Replaying plugged ranges failed: %s", __func__, + strerror(-ret)); + } +} + +static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl) +{ + VirtIOMEM *vmem = VIRTIO_MEM(rdm); + + g_assert(rdl->section->mr == &vmem->memdev->mr); + if (vmem->size) { + if (rdl->double_discard_supported) { + rdl->notify_discard(rdl, rdl->section); + } else { + virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + virtio_mem_notify_discard_cb); + } + } + + memory_region_section_free_copy(rdl->section); + rdl->section = NULL; + QLIST_REMOVE(rdl, next); +} + static void virtio_mem_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); device_class_set_props(dc, virtio_mem_properties); dc->vmsd = &vmstate_virtio_mem; @@ -960,6 +1253,13 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) vmc->get_memory_region = virtio_mem_get_memory_region; vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; + + rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; + rdmc->is_populated = virtio_mem_rdm_is_populated; + rdmc->replay_populated = virtio_mem_rdm_replay_populated; + rdmc->replay_discarded = virtio_mem_rdm_replay_discarded; + rdmc->register_listener = virtio_mem_rdm_register_listener; + rdmc->unregister_listener = virtio_mem_rdm_unregister_listener; } static const TypeInfo virtio_mem_info = { @@ -969,6 +1269,10 @@ static const TypeInfo virtio_mem_info = { .instance_init = virtio_mem_instance_init, .class_init = virtio_mem_class_init, .class_size = sizeof(VirtIOMEMClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_RAM_DISCARD_MANAGER }, + { } + }, }; static void virtio_register_types(void) diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 342c918ea7b..72da12fea59 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -29,6 +29,7 @@ #include "qemu/host-utils.h" #include "qemu/module.h" #include "sysemu/kvm.h" +#include "sysemu/replay.h" #include "hw/virtio/virtio-mmio.h" #include "qemu/error-report.h" #include "qemu/log.h" @@ -36,7 +37,9 @@ static bool virtio_mmio_ioeventfd_enabled(DeviceState *d) { - return kvm_eventfds_enabled(); + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + + return (proxy->flags & VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD) != 0; } static int virtio_mmio_ioeventfd_assign(DeviceState *d, @@ -720,6 +723,8 @@ static Property virtio_mmio_properties[] = { DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy, format_transport_address, true), DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true), + DEFINE_PROP_BIT("ioeventfd", VirtIOMMIOProxy, flags, + VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_END_OF_LIST(), }; @@ -728,9 +733,18 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp) VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); SysBusDevice *sbd = SYS_BUS_DEVICE(d); - qbus_create_inplace(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS, - d, NULL); + qbus_init(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS, d, NULL); sysbus_init_irq(sbd, &proxy->irq); + + if (!kvm_eventfds_enabled()) { + proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD; + } + + /* fd-based ioevents can't be synchronized in record/replay */ + if (replay_mode != REPLAY_MODE_NONE) { + proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD; + } + if (proxy->legacy) { memory_region_init_io(&proxy->iomem, OBJECT(d), &virtio_legacy_mem_ops, proxy, @@ -803,6 +817,17 @@ static char *virtio_mmio_bus_get_dev_path(DeviceState *dev) return path; } +static void virtio_mmio_vmstate_change(DeviceState *d, bool running) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + + if (running) { + virtio_mmio_start_ioeventfd(proxy); + } else { + virtio_mmio_stop_ioeventfd(proxy); + } +} + static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) { BusClass *bus_class = BUS_CLASS(klass); @@ -818,6 +843,7 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled; k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; k->pre_plugged = virtio_mmio_pre_plugged; + k->vmstate_change = virtio_mmio_vmstate_change; k->has_variable_vring_alignment = true; bus_class->max_dev = 1; bus_class->get_dev_path = virtio_mmio_bus_get_dev_path; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index b321604d9b3..750aa47ec14 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -37,6 +37,7 @@ #include "qemu/range.h" #include "hw/virtio/virtio-bus.h" #include "qapi/visitor.h" +#include "sysemu/replay.h" #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev)) @@ -423,6 +424,11 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev); uint64_t val = 0; + + if (vdev == NULL) { + return UINT64_MAX; + } + if (addr < config) { return virtio_ioport_read(proxy, addr); } @@ -454,6 +460,11 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr, VirtIOPCIProxy *proxy = opaque; uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev); VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + + if (vdev == NULL) { + return; + } + if (addr < config) { virtio_ioport_write(proxy, addr, val); return; @@ -1110,6 +1121,19 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d) return pci_get_address_space(dev); } +static bool virtio_pci_iommu_enabled(DeviceState *d) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(d); + PCIDevice *dev = &proxy->pci_dev; + AddressSpace *dma_as = pci_device_iommu_address_space(dev); + + if (dma_as == &address_space_memory) { + return false; + } + + return true; +} + static bool virtio_pci_queue_enabled(DeviceState *d, int n) { VirtIOPCIProxy *proxy = VIRTIO_PCI(d); @@ -1146,6 +1170,10 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, uint32_t val = 0; int i; + if (vdev == NULL) { + return UINT64_MAX; + } + switch (addr) { case VIRTIO_PCI_COMMON_DFSELECT: val = proxy->dfselect; @@ -1229,6 +1257,10 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + if (vdev == NULL) { + return; + } + switch (addr) { case VIRTIO_PCI_COMMON_DFSELECT: proxy->dfselect = val; @@ -1330,6 +1362,11 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr, unsigned size) { + VirtIOPCIProxy *proxy = opaque; + if (virtio_bus_get_device(&proxy->bus) == NULL) { + return UINT64_MAX; + } + return 0; } @@ -1367,7 +1404,7 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr, uint64_t val; if (vdev == NULL) { - return 0; + return UINT64_MAX; } val = qatomic_xchg(&vdev->isr, 0); @@ -1388,7 +1425,7 @@ static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr, uint64_t val; if (vdev == NULL) { - return 0; + return UINT64_MAX; } switch (size) { @@ -1760,6 +1797,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; } + /* fd-based ioevents can't be synchronized in record/replay */ + if (replay_mode != REPLAY_MODE_NONE) { + proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; + } + /* * virtio pci bar layout used by default. * subclasses can re-arrange things if needed. @@ -2145,8 +2187,7 @@ static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, DeviceState *qdev = DEVICE(dev); char virtio_bus_name[] = "virtio-bus"; - qbus_create_inplace(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, - virtio_bus_name); + qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name); } static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) @@ -2173,6 +2214,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled; k->ioeventfd_assign = virtio_pci_ioeventfd_assign; k->get_dma_as = virtio_pci_get_dma_as; + k->iommu_enabled = virtio_pci_iommu_enabled; k->queue_enabled = virtio_pci_queue_enabled; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 07f4e60b309..ea7c079fb04 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -15,7 +15,6 @@ #include "qapi/error.h" #include "cpu.h" #include "trace.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/main-loop.h" @@ -134,12 +133,10 @@ struct VirtQueue QLIST_ENTRY(VirtQueue) node; }; +/* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { - if (!caches) { - return; - } - + assert(caches != NULL); address_space_cache_destroy(&caches->desc); address_space_cache_destroy(&caches->avail); address_space_cache_destroy(&caches->used); @@ -250,13 +247,10 @@ static void vring_packed_event_read(VirtIODevice *vdev, hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap); hwaddr off_flags = offsetof(VRingPackedDescEvent, flags); - address_space_read_cached(cache, off_flags, &e->flags, - sizeof(e->flags)); + e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags); /* Make sure flags is seen before off_wrap */ smp_rmb(); - address_space_read_cached(cache, off_off, &e->off_wrap, - sizeof(e->off_wrap)); - virtio_tswap16s(vdev, &e->off_wrap); + e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); virtio_tswap16s(vdev, &e->flags); } @@ -266,8 +260,7 @@ static void vring_packed_off_wrap_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, off_wrap); - virtio_tswap16s(vdev, &off_wrap); - address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap)); + virtio_stw_phys_cached(vdev, cache, off, off_wrap); address_space_cache_invalidate(cache, off, sizeof(off_wrap)); } @@ -276,8 +269,7 @@ static void vring_packed_flags_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, flags); - virtio_tswap16s(vdev, &flags); - address_space_write_cached(cache, off, &flags, sizeof(flags)); + virtio_stw_phys_cached(vdev, cache, off, flags); address_space_cache_invalidate(cache, off, sizeof(flags)); } @@ -510,11 +502,9 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, MemoryRegionCache *cache, int i) { - address_space_read_cached(cache, - i * sizeof(VRingPackedDesc) + - offsetof(VRingPackedDesc, flags), - flags, sizeof(*flags)); - virtio_tswap16s(vdev, flags); + hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); + + *flags = virtio_lduw_phys_cached(vdev, cache, off); } static void vring_packed_desc_read(VirtIODevice *vdev, @@ -567,8 +557,7 @@ static void vring_packed_desc_write_flags(VirtIODevice *vdev, { hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); - virtio_tswap16s(vdev, &desc->flags); - address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags)); + virtio_stw_phys_cached(vdev, cache, off, desc->flags); address_space_cache_invalidate(cache, off, sizeof(desc->flags)); } @@ -635,6 +624,7 @@ static int virtio_queue_split_empty(VirtQueue *vq) return empty; } +/* Called within rcu_read_lock(). */ static int virtio_queue_packed_empty_rcu(VirtQueue *vq) { struct VRingPackedDesc desc; @@ -986,28 +976,23 @@ static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } +/* Called within rcu_read_lock(). */ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) + unsigned max_in_bytes, unsigned max_out_bytes, + VRingMemoryRegionCaches *caches) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; unsigned int total_bufs, in_total, out_total; - VRingMemoryRegionCaches *caches; MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; int64_t len = 0; int rc; - RCU_READ_LOCK_GUARD(); - idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; max = vq->vring.num; - caches = vring_get_region_caches(vq); - if (!caches) { - goto err; - } while ((rc = virtqueue_num_heads(vq, idx)) > 0) { MemoryRegionCache *desc_cache = &caches->desc; @@ -1126,32 +1111,28 @@ static int virtqueue_packed_read_next_desc(VirtQueue *vq, return VIRTQUEUE_READ_DESC_MORE; } +/* Called within rcu_read_lock(). */ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, unsigned int *out_bytes, unsigned max_in_bytes, - unsigned max_out_bytes) + unsigned max_out_bytes, + VRingMemoryRegionCaches *caches) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; unsigned int total_bufs, in_total, out_total; MemoryRegionCache *desc_cache; - VRingMemoryRegionCaches *caches; MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; int64_t len = 0; VRingPackedDesc desc; bool wrap_counter; - RCU_READ_LOCK_GUARD(); idx = vq->last_avail_idx; wrap_counter = vq->last_avail_wrap_counter; total_bufs = in_total = out_total = 0; max = vq->vring.num; - caches = vring_get_region_caches(vq); - if (!caches) { - goto err; - } for (;;) { unsigned int num_bufs = total_bufs; @@ -1252,6 +1233,8 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, uint16_t desc_size; VRingMemoryRegionCaches *caches; + RCU_READ_LOCK_GUARD(); + if (unlikely(!vq->vring.desc)) { goto err; } @@ -1270,10 +1253,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes, - max_in_bytes, max_out_bytes); + max_in_bytes, max_out_bytes, + caches); } else { virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, - max_in_bytes, max_out_bytes); + max_in_bytes, max_out_bytes, + caches); } return; @@ -1705,6 +1690,8 @@ static unsigned int virtqueue_packed_drop_all(VirtQueue *vq) VirtIODevice *vdev = vq->vdev; VRingPackedDesc desc; + RCU_READ_LOCK_GUARD(); + caches = vring_get_region_caches(vq); if (!caches) { return 0; @@ -1973,9 +1960,7 @@ static enum virtio_device_endian virtio_default_endian(void) static enum virtio_device_endian virtio_current_cpu_endian(void) { - CPUClass *cc = CPU_GET_CLASS(current_cpu); - - if (cc->virtio_is_big_endian(current_cpu)) { + if (cpu_virtio_is_big_endian(current_cpu)) { return VIRTIO_DEVICE_ENDIAN_BIG; } else { return VIRTIO_DEVICE_ENDIAN_LITTLE; @@ -2450,6 +2435,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value) } } +/* Called within rcu_read_lock(). */ static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) { uint16_t old, new; @@ -2486,6 +2472,7 @@ static bool vring_packed_need_event(VirtQueue *vq, bool wrap, return vring_need_event(off, new, old); } +/* Called within rcu_read_lock(). */ static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) { VRingPackedDescEvent e; @@ -2982,7 +2969,7 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } -size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes, +size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes, uint64_t host_features) { size_t config_size = 0; @@ -3672,6 +3659,7 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) } vdev->listener.commit = virtio_memory_listener_commit; + vdev->listener.name = "virtio"; memory_listener_register(&vdev->listener, vdev->dma_as); } @@ -3731,6 +3719,10 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); int i, n, r, err; + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; @@ -3769,6 +3761,10 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); } + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ memory_region_transaction_commit(); while (--i >= 0) { @@ -3793,6 +3789,10 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); int n, r; + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; @@ -3804,6 +3804,10 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); } + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ memory_region_transaction_commit(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { diff --git a/hw/watchdog/sbsa_gwdt.c b/hw/watchdog/sbsa_gwdt.c index d0998f8489c..e49cacd0e20 100644 --- a/hw/watchdog/sbsa_gwdt.c +++ b/hw/watchdog/sbsa_gwdt.c @@ -273,8 +273,9 @@ static void wdt_sbsa_gwdt_class_init(ObjectClass *klass, void *data) dc->realize = wdt_sbsa_gwdt_realize; dc->reset = wdt_sbsa_gwdt_reset; dc->hotpluggable = false; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); + set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories); dc->vmsd = &vmstate_sbsa_gwdt; + dc->desc = "SBSA-compliant generic watchdog device"; } static const TypeInfo wdt_sbsa_gwdt_info = { diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events index 3124ca1f1b6..e7523e22aaf 100644 --- a/hw/watchdog/trace-events +++ b/hw/watchdog/trace-events @@ -1,7 +1,11 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # cmsdk-apb-watchdog.c cmsdk_apb_watchdog_read(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB watchdog read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" cmsdk_apb_watchdog_write(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB watchdog write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" cmsdk_apb_watchdog_reset(void) "CMSDK APB watchdog: reset" cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB watchdog: lock %" PRIu32 + +# wdt-aspeed.c +aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d" +aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size=%d value=0x%"PRIx64 diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c index 0e98ffb73fb..1437e6c5b62 100644 --- a/hw/watchdog/watchdog.c +++ b/hw/watchdog/watchdog.c @@ -76,20 +76,6 @@ int select_watchdog(const char *p) return 1; } -int select_watchdog_action(const char *p) -{ - int action; - char *qapi_value; - - qapi_value = g_ascii_strdown(p, -1); - action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, NULL); - g_free(qapi_value); - if (action < 0) - return -1; - qmp_watchdog_set_action(action, &error_abort); - return 0; -} - WatchdogAction get_watchdog_action(void) { return watchdog_action; diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c index 6352ba1b0e5..6aa6f90b664 100644 --- a/hw/watchdog/wdt_aspeed.c +++ b/hw/watchdog/wdt_aspeed.c @@ -19,6 +19,7 @@ #include "hw/sysbus.h" #include "hw/watchdog/wdt_aspeed.h" #include "migration/vmstate.h" +#include "trace.h" #define WDT_STATUS (0x00 / 4) #define WDT_RELOAD_VALUE (0x04 / 4) @@ -60,6 +61,8 @@ static uint64_t aspeed_wdt_read(void *opaque, hwaddr offset, unsigned size) { AspeedWDTState *s = ASPEED_WDT(opaque); + trace_aspeed_wdt_read(offset, size); + offset >>= 2; switch (offset) { @@ -118,13 +121,29 @@ static void aspeed_wdt_reload_1mhz(AspeedWDTState *s) } } +static uint64_t aspeed_2400_sanitize_ctrl(uint64_t data) +{ + return data & 0xffff; +} + +static uint64_t aspeed_2500_sanitize_ctrl(uint64_t data) +{ + return (data & ~(0xfUL << 8)) | WDT_CTRL_1MHZ_CLK; +} + +static uint64_t aspeed_2600_sanitize_ctrl(uint64_t data) +{ + return data & ~(0x7UL << 7); +} static void aspeed_wdt_write(void *opaque, hwaddr offset, uint64_t data, unsigned size) { AspeedWDTState *s = ASPEED_WDT(opaque); AspeedWDTClass *awc = ASPEED_WDT_GET_CLASS(s); - bool enable = data & WDT_CTRL_ENABLE; + bool enable; + + trace_aspeed_wdt_write(offset, size, data); offset >>= 2; @@ -144,12 +163,16 @@ static void aspeed_wdt_write(void *opaque, hwaddr offset, uint64_t data, } break; case WDT_CTRL: + data = awc->sanitize_ctrl(data); + enable = data & WDT_CTRL_ENABLE; if (enable && !aspeed_wdt_is_enabled(s)) { s->regs[WDT_CTRL] = data; awc->wdt_reload(s); } else if (!enable && aspeed_wdt_is_enabled(s)) { s->regs[WDT_CTRL] = data; timer_del(s->timer); + } else { + s->regs[WDT_CTRL] = data; } break; case WDT_RESET_WIDTH: @@ -207,11 +230,12 @@ static const MemoryRegionOps aspeed_wdt_ops = { static void aspeed_wdt_reset(DeviceState *dev) { AspeedWDTState *s = ASPEED_WDT(dev); + AspeedWDTClass *awc = ASPEED_WDT_GET_CLASS(s); s->regs[WDT_STATUS] = 0x3EF1480; s->regs[WDT_RELOAD_VALUE] = 0x03EF1480; s->regs[WDT_RESTART] = 0; - s->regs[WDT_CTRL] = 0; + s->regs[WDT_CTRL] = awc->sanitize_ctrl(0); s->regs[WDT_RESET_WIDTH] = 0xFF; timer_del(s->timer); @@ -269,9 +293,10 @@ static void aspeed_wdt_class_init(ObjectClass *klass, void *data) dc->desc = "ASPEED Watchdog Controller"; dc->realize = aspeed_wdt_realize; dc->reset = aspeed_wdt_reset; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); + set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories); dc->vmsd = &vmstate_aspeed_wdt; device_class_set_props(dc, aspeed_wdt_properties); + dc->desc = "Aspeed watchdog device"; } static const TypeInfo aspeed_wdt_info = { @@ -293,6 +318,7 @@ static void aspeed_2400_wdt_class_init(ObjectClass *klass, void *data) awc->ext_pulse_width_mask = 0xff; awc->reset_ctrl_reg = SCU_RESET_CONTROL1; awc->wdt_reload = aspeed_wdt_reload; + awc->sanitize_ctrl = aspeed_2400_sanitize_ctrl; } static const TypeInfo aspeed_2400_wdt_info = { @@ -328,6 +354,7 @@ static void aspeed_2500_wdt_class_init(ObjectClass *klass, void *data) awc->reset_ctrl_reg = SCU_RESET_CONTROL1; awc->reset_pulse = aspeed_2500_wdt_reset_pulse; awc->wdt_reload = aspeed_wdt_reload_1mhz; + awc->sanitize_ctrl = aspeed_2500_sanitize_ctrl; } static const TypeInfo aspeed_2500_wdt_info = { @@ -348,6 +375,7 @@ static void aspeed_2600_wdt_class_init(ObjectClass *klass, void *data) awc->reset_ctrl_reg = AST2600_SCU_RESET_CONTROL1; awc->reset_pulse = aspeed_2500_wdt_reset_pulse; awc->wdt_reload = aspeed_wdt_reload_1mhz; + awc->sanitize_ctrl = aspeed_2600_sanitize_ctrl; } static const TypeInfo aspeed_2600_wdt_info = { diff --git a/hw/watchdog/wdt_diag288.c b/hw/watchdog/wdt_diag288.c index e135a4de8b2..9e8882a11cf 100644 --- a/hw/watchdog/wdt_diag288.c +++ b/hw/watchdog/wdt_diag288.c @@ -122,9 +122,10 @@ static void wdt_diag288_class_init(ObjectClass *klass, void *data) dc->unrealize = wdt_diag288_unrealize; dc->reset = wdt_diag288_reset; dc->hotpluggable = false; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); + set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories); dc->vmsd = &vmstate_diag288; diag288->handle_timer = wdt_diag288_handle_timer; + dc->desc = "diag288 device for s390x platform"; } static const TypeInfo wdt_diag288_info = { diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c index 4c52e3bb9e1..f99a1c9d294 100644 --- a/hw/watchdog/wdt_i6300esb.c +++ b/hw/watchdog/wdt_i6300esb.c @@ -476,7 +476,8 @@ static void i6300esb_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_SYSTEM_OTHER; dc->reset = i6300esb_reset; dc->vmsd = &vmstate_i6300esb; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); + set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories); + dc->desc = "Intel 6300ESB"; } static const TypeInfo i6300esb_info = { diff --git a/hw/watchdog/wdt_ib700.c b/hw/watchdog/wdt_ib700.c index 177aaa503f9..91d1bdc0da1 100644 --- a/hw/watchdog/wdt_ib700.c +++ b/hw/watchdog/wdt_ib700.c @@ -140,7 +140,8 @@ static void wdt_ib700_class_init(ObjectClass *klass, void *data) dc->realize = wdt_ib700_realize; dc->reset = wdt_ib700_reset; dc->vmsd = &vmstate_ib700; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); + set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories); + dc->desc = "iBASE 700"; } static const TypeInfo wdt_ib700_info = { diff --git a/hw/watchdog/wdt_imx2.c b/hw/watchdog/wdt_imx2.c index a5fb76308f5..c3128370b5b 100644 --- a/hw/watchdog/wdt_imx2.c +++ b/hw/watchdog/wdt_imx2.c @@ -280,8 +280,8 @@ static void imx2_wdt_class_init(ObjectClass *klass, void *data) dc->realize = imx2_wdt_realize; dc->reset = imx2_wdt_reset; dc->vmsd = &vmstate_imx2_wdt; - dc->desc = "i.MX watchdog timer"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->desc = "i.MX2 watchdog timer"; + set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories); } static const TypeInfo imx2_wdt_info = { diff --git a/hw/xen/trace-events b/hw/xen/trace-events index e6885bc751a..3da3fd83483 100644 --- a/hw/xen/trace-events +++ b/hw/xen/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # ../../include/hw/xen/xen_common.h xen_default_ioreq_server(void) "" diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c index b459bb93968..5a1e12b374e 100644 --- a/hw/xen/xen-bus-helper.c +++ b/hw/xen/xen-bus-helper.c @@ -6,7 +6,6 @@ */ #include "qemu/osdep.h" -#include "hw/sysbus.h" #include "hw/xen/xen.h" #include "hw/xen/xen-bus.h" #include "hw/xen/xen-bus-helper.h" diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index 8c588920d9f..416583f130b 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -1398,7 +1398,7 @@ type_init(xen_register_types) void xen_bus_init(void) { DeviceState *dev = qdev_new(TYPE_XEN_BRIDGE); - BusState *bus = qbus_create(TYPE_XEN_BUS, dev, NULL); + BusState *bus = qbus_new(TYPE_XEN_BUS, dev, NULL); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); qbus_set_bus_hotplug_handler(bus); diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index b61a4855b7b..085fd31ef7a 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -27,7 +27,6 @@ #include "hw/sysbus.h" #include "hw/boards.h" #include "hw/qdev-properties.h" -#include "qemu/log.h" #include "qemu/main-loop.h" #include "qapi/error.h" #include "hw/xen/xen-legacy-backend.h" @@ -277,7 +276,8 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom, xendev = g_malloc0(ops->size); object_initialize(&xendev->qdev, ops->size, TYPE_XENBACKEND); OBJECT(xendev)->free = g_free; - qdev_set_id(DEVICE(xendev), g_strdup_printf("xen-%s-%d", type, dev)); + qdev_set_id(DEVICE(xendev), g_strdup_printf("xen-%s-%d", type, dev), + &error_fatal); qdev_realize(DEVICE(xendev), xen_sysbus, &error_fatal); object_unref(OBJECT(xendev)); @@ -703,7 +703,7 @@ int xen_be_init(void) xen_sysdev = qdev_new(TYPE_XENSYSDEV); sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal); - xen_sysbus = qbus_create(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); + xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); qbus_set_bus_hotplug_handler(xen_sysbus); return 0; diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index a513fdd62d1..027190fa447 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -64,7 +64,6 @@ #include "hw/xen/xen-legacy-backend.h" #include "xen_pt.h" #include "qemu/range.h" -#include "exec/address-spaces.h" static bool has_igd_gfx_passthru; @@ -616,8 +615,8 @@ static void xen_pt_region_update(XenPCIPassthroughState *s, } args.type = d->io_regions[bar].type; - pci_for_each_device(pci_get_bus(d), pci_dev_bus_num(d), - xen_pt_check_bar_overlap, &args); + pci_for_each_device_under_bus(pci_get_bus(d), + xen_pt_check_bar_overlap, &args); if (args.rc) { XEN_PT_WARN(d, "Region: %d (addr: 0x%"FMT_PCIBUS ", len: 0x%"FMT_PCIBUS") is overlapped.\n", @@ -690,12 +689,14 @@ static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec) } static const MemoryListener xen_pt_memory_listener = { + .name = "xen-pt-mem", .region_add = xen_pt_region_add, .region_del = xen_pt_region_del, .priority = 10, }; static const MemoryListener xen_pt_io_listener = { + .name = "xen-pt-io", .region_add = xen_pt_io_region_add, .region_del = xen_pt_io_region_del, .priority = 10, diff --git a/hw/xtensa/sim.c b/hw/xtensa/sim.c index cbac50db2de..2028fe793d9 100644 --- a/hw/xtensa/sim.c +++ b/hw/xtensa/sim.c @@ -27,14 +27,12 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "sysemu/reset.h" #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/loader.h" #include "elf.h" #include "exec/memory.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "xtensa_memory.h" #include "xtensa_sim.h" diff --git a/hw/xtensa/virt.c b/hw/xtensa/virt.c index e47e1de6767..a18e3fc910e 100644 --- a/hw/xtensa/virt.c +++ b/hw/xtensa/virt.c @@ -27,16 +27,13 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "sysemu/reset.h" -#include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/loader.h" #include "hw/pci-host/gpex.h" #include "net/net.h" #include "elf.h" #include "exec/memory.h" -#include "exec/address-spaces.h" #include "qemu/error-report.h" #include "xtensa_memory.h" #include "xtensa_sim.h" diff --git a/hw/xtensa/xtensa_memory.c b/hw/xtensa/xtensa_memory.c index 1c5f62b0146..2c1095f0170 100644 --- a/hw/xtensa/xtensa_memory.c +++ b/hw/xtensa/xtensa_memory.c @@ -27,7 +27,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "cpu.h" #include "exec/memory.h" #include "qemu/error-report.h" #include "xtensa_memory.h" diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c index 7be53f1895b..17f087b3951 100644 --- a/hw/xtensa/xtfpga.c +++ b/hw/xtensa/xtfpga.c @@ -35,7 +35,6 @@ #include "hw/qdev-properties.h" #include "elf.h" #include "exec/memory.h" -#include "exec/address-spaces.h" #include "hw/char/serial.h" #include "net/net.h" #include "hw/sysbus.h" diff --git a/include/block/aio.h b/include/block/aio.h index 5f342267d5c..47fbe9d81f2 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -232,6 +232,9 @@ struct AioContext { int64_t poll_grow; /* polling time growth factor */ int64_t poll_shrink; /* polling time shrink factor */ + /* AIO engine parameters */ + int64_t aio_max_batch; /* maximum number of requests in a batch */ + /* * List of handlers participating in userspace polling. Protected by * ctx->list_lock. Iterated and modified mostly by the event loop thread @@ -291,20 +294,45 @@ void aio_context_acquire(AioContext *ctx); /* Relinquish ownership of the AioContext. */ void aio_context_release(AioContext *ctx); +/** + * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will + * run only once and as soon as possible. + * + * @name: A human-readable identifier for debugging purposes. + */ +void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + const char *name); + /** * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run * only once and as soon as possible. + * + * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the + * name string. */ -void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); +#define aio_bh_schedule_oneshot(ctx, cb, opaque) \ + aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb))) /** - * aio_bh_new: Allocate a new bottom half structure. + * aio_bh_new_full: Allocate a new bottom half structure. * * Bottom halves are lightweight callbacks whose invocation is guaranteed * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure * is opaque and must be allocated prior to its use. + * + * @name: A human-readable identifier for debugging purposes. + */ +QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + const char *name); + +/** + * aio_bh_new: Allocate a new bottom half structure + * + * A convenience wrapper for aio_bh_new_full() that uses the cb as the name + * string. */ -QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); +#define aio_bh_new(ctx, cb, opaque) \ + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) /** * aio_notify: Force processing of pending events. @@ -691,10 +719,13 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); * Return the AioContext whose event loop runs in the current thread. * * If called from an IOThread this will be the IOThread's AioContext. If - * called from another thread it will be the main loop AioContext. + * called from the main thread or with the "big QEMU lock" taken it + * will be the main loop AioContext. */ AioContext *qemu_get_current_aio_context(void); +void qemu_set_current_aio_context(AioContext *ctx); + /** * aio_context_setup: * @ctx: the aio context @@ -727,4 +758,13 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, int64_t grow, int64_t shrink, Error **errp); +/** + * aio_context_set_aio_params: + * @ctx: the aio context + * @max_batch: maximum number of requests in a batch, 0 means that the + * engine will use its default + */ +void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, + Error **errp); + #endif diff --git a/include/block/block-copy.h b/include/block/block-copy.h index 338f2ea7fdf..99370fa38be 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -18,15 +18,18 @@ #include "block/block.h" #include "qemu/co-shared-resource.h" +/* All APIs are thread-safe */ + typedef void (*BlockCopyAsyncCallbackFunc)(void *opaque); typedef struct BlockCopyState BlockCopyState; typedef struct BlockCopyCallState BlockCopyCallState; BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, - int64_t cluster_size, bool use_copy_range, - BdrvRequestFlags write_flags, Error **errp); +/* Function should be called prior any actual copy request */ +void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range, + bool compress); void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm); void block_copy_state_free(BlockCopyState *s); @@ -87,6 +90,7 @@ void block_copy_kick(BlockCopyCallState *call_state); void block_copy_call_cancel(BlockCopyCallState *call_state); BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s); +int64_t block_copy_cluster_size(BlockCopyState *s); void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip); #endif /* BLOCK_COPY_H */ diff --git a/include/block/block.h b/include/block/block.h index b3f6e509d49..e5dd22b0343 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -9,6 +9,7 @@ #include "block/dirty-bitmap.h" #include "block/blockjob.h" #include "qemu/hbitmap.h" +#include "qemu/transactions.h" /* * generated_co_wrapper @@ -101,6 +102,7 @@ typedef struct HDGeometry { uint32_t cylinders; } HDGeometry; +#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */ #define BDRV_O_RDWR 0x0002 #define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ @@ -206,9 +208,8 @@ typedef struct BDRVReopenState { int flags; BlockdevDetectZeroesOptions detect_zeroes; bool backing_missing; - bool replace_backing_bs; /* new_backing_bs is ignored if this is false */ - BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */ - uint64_t perm, shared_perm; + BlockDriverState *old_backing_bs; /* keep pointer for permissions update */ + BlockDriverState *old_file_bs; /* keep pointer for permissions update */ QDict *options; QDict *explicit_options; void *opaque; @@ -360,8 +361,11 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, Error **errp); int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp); +int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + Error **errp); BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, int flags, Error **errp); +int bdrv_drop_filter(BlockDriverState *bs, Error **errp); int bdrv_parse_aio(const char *mode, int *flags); int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); @@ -379,18 +383,21 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); +BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, + const char *node_name, + QDict *options, int flags, + Error **errp); BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, int flags, Error **errp); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts); +void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue); int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + Error **errp); int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); -int bdrv_reopen_prepare(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); -void bdrv_reopen_commit(BDRVReopenState *reopen_state); -void bdrv_reopen_abort(BDRVReopenState *reopen_state); int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes, BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); @@ -424,7 +431,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, BlockDriverState *in_bs, Error **errp); void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); -void bdrv_refresh_limits(BlockDriverState *bs, Error **errp); +void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); int bdrv_commit(BlockDriverState *bs); int bdrv_make_empty(BdrvChild *c, Error **errp); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, @@ -702,6 +709,9 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp); bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, GSList **ignore, Error **errp); +AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); + int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); @@ -745,9 +755,7 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, * bdrv_drained_begin: * * Begin a quiesced section for exclusive access to the BDS, by disabling - * external request sources including NBD server and device model. Note that - * this doesn't block timers or coroutines from submitting more requests, which - * means block_job_pause is still necessary. + * external request sources including NBD server, block jobs, and device model. * * This function can be recursive. */ diff --git a/include/block/block_int.h b/include/block/block_int.h index 88e41119398..f4c75e8ba95 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -34,6 +34,7 @@ #include "qemu/hbitmap.h" #include "block/snapshot.h" #include "qemu/throttle.h" +#include "qemu/rcu.h" #define BLOCK_FLAG_LAZY_REFCOUNTS 8 @@ -93,6 +94,9 @@ typedef struct BdrvTrackedRequest { struct BdrvTrackedRequest *waiting_for; } BdrvTrackedRequest; +int bdrv_check_qiov_request(int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + Error **errp); int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp); struct BlockDriver { @@ -231,11 +235,11 @@ struct BlockDriver { /* aio */ BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, - BlockCompletionFunc *cb, void *opaque); + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, - BlockCompletionFunc *cb, void *opaque); + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, @@ -261,10 +265,11 @@ struct BlockDriver { * The buffer in @qiov may point directly to guest memory. */ int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, int flags); + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); /** @@ -283,10 +288,11 @@ struct BlockDriver { * The buffer in @qiov may point directly to guest memory. */ int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, int flags); + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); /* * Efficiently zero a region of the disk image. Typically an image format @@ -295,9 +301,9 @@ struct BlockDriver { * will be called instead. */ int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags); + int64_t offset, int64_t bytes, BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, - int64_t offset, int bytes); + int64_t offset, int64_t bytes); /* Map [offset, offset + nbytes) range onto a child of @bs to copy from, * and invoke bdrv_co_copy_range_from(child, ...), or invoke @@ -308,10 +314,10 @@ struct BlockDriver { */ int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, BdrvChild *src, - uint64_t offset, + int64_t offset, BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, + int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); @@ -325,10 +331,10 @@ struct BlockDriver { */ int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, BdrvChild *src, - uint64_t src_offset, + int64_t src_offset, BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, + int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); @@ -347,6 +353,15 @@ struct BlockDriver { * clamped to bdrv_getlength() and aligned to request_alignment, * as well as non-NULL pnum, map, and file; in turn, the driver * must return an error or set pnum to an aligned non-zero value. + * + * Note that @bytes is just a hint on how big of a region the + * caller wants to inspect. It is not a limit on *pnum. + * Implementations are free to return larger values of *pnum if + * doing so does not incur a performance penalty. + * + * block/io.c's bdrv_co_block_status() will utilize an unclamped + * *pnum value for the block-status cache on protocol nodes, prior + * to clamping *pnum for return to its caller. */ int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, @@ -357,7 +372,7 @@ struct BlockDriver { * of in-flight requests, so don't waste the time if possible. * * One example usage is to avoid waiting for an nbd target node reconnect - * timeout during job-cancel. + * timeout during job-cancel with force=true. */ void (*bdrv_cancel_in_flight)(BlockDriverState *bs); @@ -424,10 +439,9 @@ struct BlockDriver { Error **errp); int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov); + int64_t offset, int64_t bytes, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, - uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset); + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); int (*bdrv_snapshot_create)(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); @@ -660,11 +674,12 @@ typedef struct BlockLimits { * otherwise. */ uint32_t request_alignment; - /* Maximum number of bytes that can be discarded at once (since it - * is signed, it must be < 2G, if set). Must be multiple of - * pdiscard_alignment, but need not be power of 2. May be 0 if no - * inherent 32-bit limit */ - int32_t max_pdiscard; + /* + * Maximum number of bytes that can be discarded at once. Must be multiple + * of pdiscard_alignment, but need not be power of 2. May be 0 if no + * inherent 64-bit limit. + */ + int64_t max_pdiscard; /* Optimal alignment for discard requests in bytes. A power of 2 * is best but not mandatory. Must be a multiple of @@ -672,10 +687,11 @@ typedef struct BlockLimits { * that is set. May be 0 if bl.request_alignment is good enough */ uint32_t pdiscard_alignment; - /* Maximum number of bytes that can zeroized at once (since it is - * signed, it must be < 2G, if set). Must be multiple of - * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */ - int32_t max_pwrite_zeroes; + /* + * Maximum number of bytes that can zeroized at once. Must be multiple of + * pwrite_zeroes_alignment. 0 means no limit. + */ + int64_t max_pwrite_zeroes; /* Optimal alignment for write zeroes requests in bytes. A power * of 2 is best but not mandatory. Must be a multiple of @@ -695,6 +711,20 @@ typedef struct BlockLimits { * clamped down. */ uint32_t max_transfer; + /* Maximal hardware transfer length in bytes. Applies whenever + * transfers to the device bypass the kernel I/O scheduler, for + * example with SG_IO. If larger than max_transfer or if zero, + * blk_get_max_hw_transfer will fall back to max_transfer. + */ + uint64_t max_hw_transfer; + + /* Maximal number of scatter/gather elements allowed by the hardware. + * Applies whenever transfers to the device bypass the kernel I/O + * scheduler, for example with SG_IO. If larger than max_iov + * or if zero, blk_get_max_hw_iov will fall back to max_iov. + */ + int max_hw_iov; + /* memory alignment, in bytes so that no bounce buffer is needed */ size_t min_mem_alignment; @@ -789,6 +819,8 @@ struct BdrvChildClass { bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore, Error **errp); void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); + + AioContext *(*get_parent_aio_context)(BdrvChild *child); }; extern const BdrvChildClass child_of_bds; @@ -811,11 +843,6 @@ struct BdrvChild { */ uint64_t shared_perm; - /* backup of permissions during permission update procedure */ - bool has_backup_perm; - uint64_t backup_perm; - uint64_t backup_shared_perm; - /* * This link is frozen: the child can neither be replaced nor * detached from the parent. @@ -836,17 +863,28 @@ struct BdrvChild { }; /* - * Note: the function bdrv_append() copies and swaps contents of - * BlockDriverStates, so if you add new fields to this struct, please - * inspect bdrv_append() to determine if the new fields need to be - * copied as well. + * Allows bdrv_co_block_status() to cache one data region for a + * protocol node. + * + * @valid: Whether the cache is valid (should be accessed with atomic + * functions so this can be reset by RCU readers) + * @data_start: Offset where we know (or strongly assume) is data + * @data_end: Offset where the data region ends (which is not necessarily + * the start of a zeroed region) */ +typedef struct BdrvBlockStatusCache { + struct rcu_head rcu; + + bool valid; + int64_t data_start; + int64_t data_end; +} BdrvBlockStatusCache; + struct BlockDriverState { /* Protected by big QEMU lock or read-only after opening. No special * locking needed during I/O... */ int open_flags; /* flags used to open the file, re-used for re-open */ - bool read_only; /* if true, the media is read only */ bool encrypted; /* if true, the media is encrypted */ bool sg; /* if true, the device is a /dev/sg* */ bool probed; /* if true, format was probed rather than specified */ @@ -957,12 +995,8 @@ struct BlockDriverState { */ int64_t total_sectors; - /* Callback before write request is processed */ - NotifierWithReturnList before_write_notifiers; - /* threshold limit for writes, in bytes. "High water mark". */ uint64_t write_threshold_offset; - NotifierWithReturn write_threshold_notifier; /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. * Reading from the list can be done with either the BQL or the @@ -1011,11 +1045,15 @@ struct BlockDriverState { /* BdrvChild links to this node may never be frozen */ bool never_freeze; + + /* Lock for block-status cache RCU writers */ + CoMutex bsc_modify_lock; + /* Always non-NULL, but must only be dereferenced under an RCU read guard */ + BdrvBlockStatusCache *block_status_cache; }; struct BlockBackendRootState { int open_flags; - bool read_only; BlockdevDetectZeroesOptions detect_zeroes; }; @@ -1087,15 +1125,6 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, bool bdrv_backing_overridden(BlockDriverState *bs); -/** - * bdrv_add_before_write_notifier: - * - * Register a callback that is invoked before write requests are processed but - * after any throttling or waiting for overlapping requests. - */ -void bdrv_add_before_write_notifier(BlockDriverState *bs, - NotifierWithReturn *notifier); - /** * bdrv_add_aio_context_notifier: * @@ -1306,7 +1335,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildClass *child_class, BdrvChildRole child_role, - AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); @@ -1447,4 +1475,30 @@ static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) */ void bdrv_drain_all_end_quiesce(BlockDriverState *bs); +/** + * Check whether the given offset is in the cached block-status data + * region. + * + * If it is, and @pnum is not NULL, *pnum is set to + * `bsc.data_end - offset`, i.e. how many bytes, starting from + * @offset, are data (according to the cache). + * Otherwise, *pnum is not touched. + */ +bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum); + +/** + * If [offset, offset + bytes) overlaps with the currently cached + * block-status region, invalidate the cache. + * + * (To be used by I/O paths that cause data regions to be zero or + * holes.) + */ +void bdrv_bsc_invalidate_range(BlockDriverState *bs, + int64_t offset, int64_t bytes); + +/** + * Mark the range [offset, offset + bytes) as a data region. + */ +void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); + #endif /* BLOCK_INT_H */ diff --git a/include/block/nbd.h b/include/block/nbd.h index 5f34d23bb03..78d101b7748 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -406,4 +406,22 @@ const char *nbd_info_lookup(uint16_t info); const char *nbd_cmd_lookup(uint16_t info); const char *nbd_err_lookup(int err); +/* nbd/client-connection.c */ +typedef struct NBDClientConnection NBDClientConnection; + +void nbd_client_connection_enable_retry(NBDClientConnection *conn); + +NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr, + bool do_negotiation, + const char *export_name, + const char *x_dirty_bitmap, + QCryptoTLSCreds *tlscreds); +void nbd_client_connection_release(NBDClientConnection *conn); + +QIOChannel *coroutine_fn +nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info, + bool blocking, Error **errp); + +void coroutine_fn nbd_co_establish_connection_cancel(NBDClientConnection *conn); + #endif diff --git a/include/block/nvme.h b/include/block/nvme.h index 4ac926fbc68..e3bd47bf76a 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -7,9 +7,9 @@ typedef struct QEMU_PACKED NvmeBar { uint32_t intms; uint32_t intmc; uint32_t cc; - uint32_t rsvd1; + uint8_t rsvd24[4]; uint32_t csts; - uint32_t nssrc; + uint32_t nssr; uint32_t aqa; uint64_t asq; uint64_t acq; @@ -26,10 +26,38 @@ typedef struct QEMU_PACKED NvmeBar { uint32_t pmrsts; uint32_t pmrebs; uint32_t pmrswtp; - uint64_t pmrmsc; + uint32_t pmrmscl; + uint32_t pmrmscu; uint8_t css[484]; } NvmeBar; +enum NvmeBarRegs { + NVME_REG_CAP = offsetof(NvmeBar, cap), + NVME_REG_VS = offsetof(NvmeBar, vs), + NVME_REG_INTMS = offsetof(NvmeBar, intms), + NVME_REG_INTMC = offsetof(NvmeBar, intmc), + NVME_REG_CC = offsetof(NvmeBar, cc), + NVME_REG_CSTS = offsetof(NvmeBar, csts), + NVME_REG_NSSR = offsetof(NvmeBar, nssr), + NVME_REG_AQA = offsetof(NvmeBar, aqa), + NVME_REG_ASQ = offsetof(NvmeBar, asq), + NVME_REG_ACQ = offsetof(NvmeBar, acq), + NVME_REG_CMBLOC = offsetof(NvmeBar, cmbloc), + NVME_REG_CMBSZ = offsetof(NvmeBar, cmbsz), + NVME_REG_BPINFO = offsetof(NvmeBar, bpinfo), + NVME_REG_BPRSEL = offsetof(NvmeBar, bprsel), + NVME_REG_BPMBL = offsetof(NvmeBar, bpmbl), + NVME_REG_CMBMSC = offsetof(NvmeBar, cmbmsc), + NVME_REG_CMBSTS = offsetof(NvmeBar, cmbsts), + NVME_REG_PMRCAP = offsetof(NvmeBar, pmrcap), + NVME_REG_PMRCTL = offsetof(NvmeBar, pmrctl), + NVME_REG_PMRSTS = offsetof(NvmeBar, pmrsts), + NVME_REG_PMREBS = offsetof(NvmeBar, pmrebs), + NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp), + NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl), + NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu), +}; + enum NvmeCapShift { CAP_MQES_SHIFT = 0, CAP_CQR_SHIFT = 16, @@ -475,25 +503,25 @@ enum NvmePmrswtpMask { #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) -enum NvmePmrmscShift { - PMRMSC_CMSE_SHIFT = 1, - PMRMSC_CBA_SHIFT = 12, +enum NvmePmrmsclShift { + PMRMSCL_CMSE_SHIFT = 1, + PMRMSCL_CBA_SHIFT = 12, }; -enum NvmePmrmscMask { - PMRMSC_CMSE_MASK = 0x1, - PMRMSC_CBA_MASK = 0xfffffffffffff, +enum NvmePmrmsclMask { + PMRMSCL_CMSE_MASK = 0x1, + PMRMSCL_CBA_MASK = 0xfffff, }; -#define NVME_PMRMSC_CMSE(pmrmsc) \ - ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) -#define NVME_PMRMSC_CBA(pmrmsc) \ - ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) +#define NVME_PMRMSCL_CMSE(pmrmscl) \ + ((pmrmscl >> PMRMSCL_CMSE_SHIFT) & PMRMSCL_CMSE_MASK) +#define NVME_PMRMSCL_CBA(pmrmscl) \ + ((pmrmscl >> PMRMSCL_CBA_SHIFT) & PMRMSCL_CBA_MASK) -#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ - (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) -#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ - (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) +#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val) \ + (pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT) +#define NVME_PMRMSCL_SET_CBA(pmrmscl, val) \ + (pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT) enum NvmeSglDescriptorType { NVME_SGL_DESCR_TYPE_DATA_BLOCK = 0x0, @@ -708,6 +736,14 @@ enum { #define NVME_RW_PRINFO(control) ((control >> 10) & 0xf) +enum { + NVME_PRINFO_PRACT = 1 << 3, + NVME_PRINFO_PRCHK_GUARD = 1 << 2, + NVME_PRINFO_PRCHK_APP = 1 << 1, + NVME_PRINFO_PRCHK_REF = 1 << 0, + NVME_PRINFO_PRCHK_MASK = 7 << 0, +}; + typedef struct QEMU_PACKED NvmeDsmCmd { uint8_t opcode; uint8_t flags; @@ -848,8 +884,8 @@ enum NvmeStatusCodes { NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, NVME_NS_ALREADY_ATTACHED = 0x0118, NVME_NS_PRIVATE = 0x0119, - NVME_NS_NOT_ATTACHED = 0x011A, - NVME_NS_CTRL_LIST_INVALID = 0x011C, + NVME_NS_NOT_ATTACHED = 0x011a, + NVME_NS_CTRL_LIST_INVALID = 0x011c, NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, @@ -980,6 +1016,7 @@ enum NvmeIdCns { NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, + NVME_ID_CNS_CTRL_LIST = 0x13, NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET = 0x1c, @@ -1117,6 +1154,11 @@ enum NvmeIdCtrlCmic { NVME_CMIC_MULTI_CTRL = 1 << 1, }; +enum NvmeNsAttachmentOperation { + NVME_NS_ATTACHMENT_ATTACH = 0x0, + NVME_NS_ATTACHMENT_DETACH = 0x1, +}; + #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf) #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf) #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) @@ -1341,6 +1383,15 @@ enum NvmeIdNsDps { NVME_ID_NS_DPS_FIRST_EIGHT = 8, }; +enum NvmeIdNsFlbas { + NVME_ID_NS_FLBAS_EXTENDED = 1 << 4, +}; + +enum NvmeIdNsMc { + NVME_ID_NS_MC_EXTENDED = 1 << 0, + NVME_ID_NS_MC_SEPARATE = 1 << 1, +}; + #define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK) typedef struct NvmeDifTuple { @@ -1409,9 +1460,9 @@ typedef enum NvmeZoneState { NVME_ZONE_STATE_IMPLICITLY_OPEN = 0x02, NVME_ZONE_STATE_EXPLICITLY_OPEN = 0x03, NVME_ZONE_STATE_CLOSED = 0x04, - NVME_ZONE_STATE_READ_ONLY = 0x0D, - NVME_ZONE_STATE_FULL = 0x0E, - NVME_ZONE_STATE_OFFLINE = 0x0F, + NVME_ZONE_STATE_READ_ONLY = 0x0d, + NVME_ZONE_STATE_FULL = 0x0e, + NVME_ZONE_STATE_OFFLINE = 0x0f, } NvmeZoneState; static inline void _nvme_check_size(void) diff --git a/include/block/qdict.h b/include/block/qdict.h index d8cb502d7db..ced2acfb92a 100644 --- a/include/block/qdict.h +++ b/include/block/qdict.h @@ -20,8 +20,6 @@ void qdict_join(QDict *dest, QDict *src, bool overwrite); void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start); void qdict_array_split(QDict *src, QList **dst); int qdict_array_entries(QDict *src, const char *subqdict); -QObject *qdict_crumple(const QDict *src, Error **errp); -void qdict_flatten(QDict *qdict); typedef struct QDictRenames { const char *from; diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 251b10d2733..21fc10c4c96 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -51,11 +51,13 @@ typedef struct LinuxAioState LinuxAioState; LinuxAioState *laio_init(Error **errp); void laio_cleanup(LinuxAioState *s); int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - uint64_t offset, QEMUIOVector *qiov, int type); + uint64_t offset, QEMUIOVector *qiov, int type, + uint64_t dev_max_batch); void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s); +void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch); #endif /* io_uring.c - Linux io_uring implementation */ #ifdef CONFIG_LINUX_IO_URING diff --git a/replication.h b/include/block/replication.h similarity index 98% rename from replication.h rename to include/block/replication.h index d49fc22cb9f..21931b4f0ca 100644 --- a/replication.h +++ b/include/block/replication.h @@ -23,7 +23,7 @@ typedef struct ReplicationOps ReplicationOps; typedef struct ReplicationState ReplicationState; /** - * SECTION:replication.h + * SECTION:block/replication.h * @title:Base Replication System * @short_description: interfaces for handling replication * @@ -32,7 +32,7 @@ typedef struct ReplicationState ReplicationState; * * How to use replication interfaces * - * #include "replication.h" + * #include "block/replication.h" * * typedef struct BDRVReplicationState { * ReplicationState *rs; diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h index c646f267a45..f50f923e7e1 100644 --- a/include/block/write-threshold.h +++ b/include/block/write-threshold.h @@ -13,7 +13,7 @@ #ifndef BLOCK_WRITE_THRESHOLD_H #define BLOCK_WRITE_THRESHOLD_H -#include "block/block_int.h" +#include "qemu/typedefs.h" /* * bdrv_write_threshold_set: @@ -36,27 +36,12 @@ void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes); uint64_t bdrv_write_threshold_get(const BlockDriverState *bs); /* - * bdrv_write_threshold_is_set + * bdrv_write_threshold_check_write * - * Tell if a write threshold is set for a given BDS. + * Check whether the specified request exceeds the write threshold. + * If so, send a corresponding event and disable write threshold checking. */ -bool bdrv_write_threshold_is_set(const BlockDriverState *bs); - -/* - * bdrv_write_threshold_exceeded - * - * Return the extent of a write request that exceeded the threshold, - * or zero if the request is below the threshold. - * Return zero also if the threshold was not set. - * - * NOTE: here we assume the following holds for each request this code - * deals with: - * - * assert((req->offset + req->bytes) <= UINT64_MAX) - * - * Please not there is *not* an actual C assert(). - */ -uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, - const BdrvTrackedRequest *req); +void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset, + int64_t bytes); #endif diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h index a5538433649..867ef1b3b28 100644 --- a/include/chardev/char-fe.h +++ b/include/chardev/char-fe.h @@ -174,6 +174,9 @@ void qemu_chr_fe_set_open(CharBackend *be, int fe_open); void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...) GCC_FMT_ATTR(2, 3); + +typedef gboolean (*FEWatchFunc)(void *do_not_use, GIOCondition condition, void *data); + /** * qemu_chr_fe_add_watch: * @cond: the condition to poll for @@ -188,10 +191,13 @@ void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...) * Note that you are responsible to update the front-end sources if * you are switching the main context with qemu_chr_fe_set_handlers(). * + * Warning: DO NOT use the first callback argument (it may be either + * a GIOChannel or a QIOChannel, depending on the underlying chardev) + * * Returns: the source tag */ guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond, - GIOFunc func, void *user_data); + FEWatchFunc func, void *user_data); /** * qemu_chr_fe_write: diff --git a/include/chardev/char.h b/include/chardev/char.h index 7c0444f90db..a319b5fdff7 100644 --- a/include/chardev/char.h +++ b/include/chardev/char.h @@ -254,26 +254,58 @@ struct ChardevClass { bool internal; /* TODO: eventually use TYPE_USER_CREATABLE */ bool supports_yank; + + /* parse command line options and populate QAPI @backend */ void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp); + /* called after construction, open/starts the backend */ void (*open)(Chardev *chr, ChardevBackend *backend, bool *be_opened, Error **errp); + /* write buf to the backend */ int (*chr_write)(Chardev *s, const uint8_t *buf, int len); + + /* + * Read from the backend (blocking). A typical front-end will instead rely + * on chr_can_read/chr_read being called when polling/looping. + */ int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len); + + /* create a watch on the backend */ GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond); + + /* update the backend internal sources */ void (*chr_update_read_handler)(Chardev *s); + + /* send an ioctl to the backend */ int (*chr_ioctl)(Chardev *s, int cmd, void *arg); + + /* get ancillary-received fds during last read */ int (*get_msgfds)(Chardev *s, int* fds, int num); + + /* set ancillary fds to be sent with next write */ int (*set_msgfds)(Chardev *s, int *fds, int num); + + /* accept the given fd */ int (*chr_add_client)(Chardev *chr, int fd); + + /* wait for a connection */ int (*chr_wait_connected)(Chardev *chr, Error **errp); + + /* disconnect a connection */ void (*chr_disconnect)(Chardev *chr); + + /* called by frontend when it can read */ void (*chr_accept_input)(Chardev *chr); + + /* set terminal echo */ void (*chr_set_echo)(Chardev *chr, bool echo); + + /* notify the backend of frontend open state */ void (*chr_set_fe_open)(Chardev *chr, int fe_open); + + /* handle various events */ void (*chr_be_event)(Chardev *s, QEMUChrEvent event); - void (*chr_options_parsed)(Chardev *chr); }; Chardev *qemu_chardev_new(const char *id, const char *typename, diff --git a/include/crypto/tls-cipher-suites.h b/include/crypto/tls-cipher-suites.h index bb9ee53e03a..7eb1b76122d 100644 --- a/include/crypto/tls-cipher-suites.h +++ b/include/crypto/tls-cipher-suites.h @@ -19,12 +19,6 @@ typedef struct QCryptoTLSCipherSuites QCryptoTLSCipherSuites; DECLARE_INSTANCE_CHECKER(QCryptoTLSCipherSuites, QCRYPTO_TLS_CIPHER_SUITES, TYPE_QCRYPTO_TLS_CIPHER_SUITES) -struct QCryptoTLSCipherSuites { - /* */ - QCryptoTLSCreds parent_obj; - /* */ -}; - /** * qcrypto_tls_cipher_suites_get_data: * @obj: pointer to a TLS cipher suites object diff --git a/include/crypto/tlscreds.h b/include/crypto/tlscreds.h index d0808e391e9..2a8a8570109 100644 --- a/include/crypto/tlscreds.h +++ b/include/crypto/tlscreds.h @@ -24,10 +24,6 @@ #include "qapi/qapi-types-crypto.h" #include "qom/object.h" -#ifdef CONFIG_GNUTLS -#include -#endif - #define TYPE_QCRYPTO_TLS_CREDS "tls-creds" typedef struct QCryptoTLSCreds QCryptoTLSCreds; typedef struct QCryptoTLSCredsClass QCryptoTLSCredsClass; @@ -48,22 +44,24 @@ typedef bool (*CryptoTLSCredsReload)(QCryptoTLSCreds *, Error **); * certificate credentials. */ -struct QCryptoTLSCreds { - Object parent_obj; - char *dir; - QCryptoTLSCredsEndpoint endpoint; -#ifdef CONFIG_GNUTLS - gnutls_dh_params_t dh_params; -#endif - bool verifyPeer; - char *priority; -}; - - struct QCryptoTLSCredsClass { ObjectClass parent_class; CryptoTLSCredsReload reload; }; +/** + * qcrypto_tls_creds_check_endpoint: + * @creds: pointer to a TLS credentials object + * @endpoint: type of network endpoint that will be using the credentials + * @errp: pointer to a NULL-initialized error object + * + * Check whether the credentials is setup according to + * the type of @endpoint argument. + * + * Returns true if the credentials is setup for the endpoint, false otherwise + */ +bool qcrypto_tls_creds_check_endpoint(QCryptoTLSCreds *creds, + QCryptoTLSCredsEndpoint endpoint, + Error **errp); #endif /* QCRYPTO_TLSCREDS_H */ diff --git a/include/crypto/tlscredsanon.h b/include/crypto/tlscredsanon.h index 3f464a38095..bd3023f9ea7 100644 --- a/include/crypto/tlscredsanon.h +++ b/include/crypto/tlscredsanon.h @@ -92,18 +92,6 @@ typedef struct QCryptoTLSCredsAnonClass QCryptoTLSCredsAnonClass; * */ - -struct QCryptoTLSCredsAnon { - QCryptoTLSCreds parent_obj; -#ifdef CONFIG_GNUTLS - union { - gnutls_anon_server_credentials_t server; - gnutls_anon_client_credentials_t client; - } data; -#endif -}; - - struct QCryptoTLSCredsAnonClass { QCryptoTLSCredsClass parent_class; }; diff --git a/include/crypto/tlscredspsk.h b/include/crypto/tlscredspsk.h index d7e6bdb5edf..bcd07dc4f62 100644 --- a/include/crypto/tlscredspsk.h +++ b/include/crypto/tlscredspsk.h @@ -87,18 +87,6 @@ typedef struct QCryptoTLSCredsPSKClass QCryptoTLSCredsPSKClass; * The PSK file can be created and managed using psktool. */ -struct QCryptoTLSCredsPSK { - QCryptoTLSCreds parent_obj; - char *username; -#ifdef CONFIG_GNUTLS - union { - gnutls_psk_server_credentials_t server; - gnutls_psk_client_credentials_t client; - } data; -#endif -}; - - struct QCryptoTLSCredsPSKClass { QCryptoTLSCredsClass parent_class; }; diff --git a/include/crypto/tlscredsx509.h b/include/crypto/tlscredsx509.h index c6d89b78819..c4daba21a6b 100644 --- a/include/crypto/tlscredsx509.h +++ b/include/crypto/tlscredsx509.h @@ -96,16 +96,6 @@ typedef struct QCryptoTLSCredsX509Class QCryptoTLSCredsX509Class; * */ -struct QCryptoTLSCredsX509 { - QCryptoTLSCreds parent_obj; -#ifdef CONFIG_GNUTLS - gnutls_certificate_credentials_t data; -#endif - bool sanityCheck; - char *passwordid; -}; - - struct QCryptoTLSCredsX509Class { QCryptoTLSCredsClass parent_class; }; diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h index 691e4687d96..446fe662c5b 100644 --- a/include/disas/dis-asm.h +++ b/include/disas/dis-asm.h @@ -9,6 +9,12 @@ #ifndef DISAS_DIS_ASM_H #define DISAS_DIS_ASM_H +#include "qemu/bswap.h" + +#ifdef __cplusplus +extern "C" { +#endif + typedef void *PTR; typedef uint64_t bfd_vma; typedef int64_t bfd_signed_vma; @@ -243,8 +249,6 @@ enum bfd_architecture #define bfd_mach_nios2 0 #define bfd_mach_nios2r1 1 #define bfd_mach_nios2r2 2 - bfd_arch_lm32, /* Lattice Mico32 */ -#define bfd_mach_lm32 1 bfd_arch_rx, /* Renesas RX */ #define bfd_mach_rx 0x75 #define bfd_mach_rx_v2 0x76 @@ -438,7 +442,6 @@ int print_insn_m32r (bfd_vma, disassemble_info*); int print_insn_m88k (bfd_vma, disassemble_info*); int print_insn_mn10200 (bfd_vma, disassemble_info*); int print_insn_mn10300 (bfd_vma, disassemble_info*); -int print_insn_moxie (bfd_vma, disassemble_info*); int print_insn_ns32k (bfd_vma, disassemble_info*); int print_insn_big_powerpc (bfd_vma, disassemble_info*); int print_insn_little_powerpc (bfd_vma, disassemble_info*); @@ -453,9 +456,7 @@ int print_insn_crisv32 (bfd_vma, disassemble_info*); int print_insn_crisv10 (bfd_vma, disassemble_info*); int print_insn_microblaze (bfd_vma, disassemble_info*); int print_insn_ia64 (bfd_vma, disassemble_info*); -int print_insn_lm32 (bfd_vma, disassemble_info*); -int print_insn_big_nios2 (bfd_vma, disassemble_info*); -int print_insn_little_nios2 (bfd_vma, disassemble_info*); +int print_insn_nios2(bfd_vma, disassemble_info*); int print_insn_xtensa (bfd_vma, disassemble_info*); int print_insn_riscv32 (bfd_vma, disassemble_info*); int print_insn_riscv64 (bfd_vma, disassemble_info*); @@ -482,8 +483,6 @@ bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size); /* from libbfd */ -#include "qemu/bswap.h" - static inline bfd_vma bfd_getl64(const bfd_byte *addr) { return ldq_le_p(addr); @@ -511,4 +510,8 @@ static inline bfd_vma bfd_getb16(const bfd_byte *addr) typedef bool bfd_boolean; +#ifdef __cplusplus +} +#endif + #endif /* DISAS_DIS_ASM_H */ diff --git a/include/elf.h b/include/elf.h index 78237c9a871..811bf4a1cb5 100644 --- a/include/elf.h +++ b/include/elf.h @@ -174,9 +174,8 @@ typedef struct mips_elf_abiflags_v0 { #define EM_OPENRISC 92 /* OpenCores OpenRISC */ -#define EM_UNICORE32 110 /* UniCore32 */ - #define EM_HEXAGON 164 /* Qualcomm Hexagon */ + #define EM_RX 173 /* Renesas RX family */ #define EM_RISCV 243 /* RISC-V */ @@ -206,9 +205,6 @@ typedef struct mips_elf_abiflags_v0 { #define EM_AARCH64 183 -#define EM_MOXIE 223 /* Moxie processor family */ -#define EM_MOXIE_OLD 0xFEED - #define EF_AVR_MACH 0x7F /* Mask for AVR e_flags to get core type */ /* This is the info that is needed to parse the dynamic section of the file */ @@ -609,6 +605,13 @@ typedef struct { #define HWCAP_S390_HIGH_GPRS 512 #define HWCAP_S390_TE 1024 #define HWCAP_S390_VXRS 2048 +#define HWCAP_S390_VXRS_BCD 4096 +#define HWCAP_S390_VXRS_EXT 8192 +#define HWCAP_S390_GS 16384 +#define HWCAP_S390_VXRS_EXT2 32768 +#define HWCAP_S390_VXRS_PDE 65536 +#define HWCAP_S390_SORT 131072 +#define HWCAP_S390_DFLT 262144 /* M68K specific definitions. */ /* We use the top 24 bits to encode information about the diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 2062f418473..65ea00e0654 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -442,10 +442,10 @@ static inline bool tlb_hit(target_ulong tlb_addr, target_ulong addr) #ifdef CONFIG_TCG /* accel/tcg/cpu-exec.c */ -void dump_drift_info(void); +void dump_drift_info(GString *buf); /* accel/tcg/translate-all.c */ -void dump_exec_info(void); -void dump_opcount_info(void); +void dump_exec_info(GString *buf); +void dump_opcount_info(GString *buf); #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 5a0a2d93e06..039d422bf4c 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -57,7 +57,9 @@ const char *qemu_ram_get_idstr(RAMBlock *rb); void *qemu_ram_get_host_addr(RAMBlock *rb); ram_addr_t qemu_ram_get_offset(RAMBlock *rb); ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); +ram_addr_t qemu_ram_get_max_length(RAMBlock *rb); bool qemu_ram_is_shared(RAMBlock *rb); +bool qemu_ram_is_noreserve(RAMBlock *rb); bool qemu_ram_is_uf_zeroable(RAMBlock *rb); void qemu_ram_set_uf_zeroable(RAMBlock *rb); bool qemu_ram_is_migratable(RAMBlock *rb); diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 60d1fa5fffc..06b84804c86 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -28,10 +28,12 @@ * load: cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr) * cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr) * cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr) + * cpu_ld{sign}{size}{end}_mmu(env, ptr, oi, retaddr) * * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val) * cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr) * cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) + * cpu_st{size}{end}_mmu(env, ptr, val, oi, retaddr) * * sign is: * (empty): for 32 and 64 bit sizes @@ -53,10 +55,16 @@ * The "mmuidx" suffix carries an extra mmu_idx argument that specifies * the index to use; the "data" and "code" suffixes take the index from * cpu_mmu_index(). + * + * The "mmu" suffix carries the full MemOpIdx, with both mmu_idx and the + * MemOp including alignment requirements. The alignment will be enforced. */ #ifndef CPU_LDST_H #define CPU_LDST_H +#include "exec/memopidx.h" +#include "qemu/int128.h" + #if defined(CONFIG_USER_ONLY) /* sparc32plus has 64bit long but 32bit space address * this can make bad result with g2h() and h2g() @@ -118,12 +126,10 @@ typedef target_ulong abi_ptr; uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr); int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr); - uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr); int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr); uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr); uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr); - uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr); int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr); uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr); @@ -131,37 +137,31 @@ uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr); uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); - uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); - uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val); - void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val); - void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val); void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, uintptr_t ra); - void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, uintptr_t ra); void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, uintptr_t ra); void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr, uint64_t val, uintptr_t ra); - void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, uintptr_t ra); void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr, @@ -176,6 +176,157 @@ void cpu_st_cap_word_ra(CPUArchState *env, target_ulong ptr, target_ulong val, uintptr_t retaddr); #endif +uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, + int mmu_idx, uintptr_t ra); + +void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, + int mmu_idx, uintptr_t ra); +void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, + int mmu_idx, uintptr_t ra); +void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, + int mmu_idx, uintptr_t ra); +void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val, + int mmu_idx, uintptr_t ra); +void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, + int mmu_idx, uintptr_t ra); +void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, + int mmu_idx, uintptr_t ra); +void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val, + int mmu_idx, uintptr_t ra); + +uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra); +uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr ptr, + MemOpIdx oi, uintptr_t ra); +uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr ptr, + MemOpIdx oi, uintptr_t ra); +uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr ptr, + MemOpIdx oi, uintptr_t ra); +uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr ptr, + MemOpIdx oi, uintptr_t ra); +uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr, + MemOpIdx oi, uintptr_t ra); +uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr, + MemOpIdx oi, uintptr_t ra); + +void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val, + MemOpIdx oi, uintptr_t ra); +void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val, + MemOpIdx oi, uintptr_t ra); +void cpu_stl_be_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val, + MemOpIdx oi, uintptr_t ra); +void cpu_stq_be_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val, + MemOpIdx oi, uintptr_t ra); +void cpu_stw_le_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val, + MemOpIdx oi, uintptr_t ra); +void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val, + MemOpIdx oi, uintptr_t ra); +void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val, + MemOpIdx oi, uintptr_t ra); + +uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + MemOpIdx oi, uintptr_t retaddr); +uint32_t cpu_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + MemOpIdx oi, uintptr_t retaddr); +uint32_t cpu_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + MemOpIdx oi, uintptr_t retaddr); +uint64_t cpu_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr, + uint64_t cmpv, uint64_t newv, + MemOpIdx oi, uintptr_t retaddr); +uint32_t cpu_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + MemOpIdx oi, uintptr_t retaddr); +uint32_t cpu_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr, + uint32_t cmpv, uint32_t newv, + MemOpIdx oi, uintptr_t retaddr); +uint64_t cpu_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr, + uint64_t cmpv, uint64_t newv, + MemOpIdx oi, uintptr_t retaddr); + +#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX) \ +TYPE cpu_atomic_ ## NAME ## SUFFIX ## _mmu \ + (CPUArchState *env, target_ulong addr, TYPE val, \ + MemOpIdx oi, uintptr_t retaddr); + +#ifdef CONFIG_ATOMIC64 +#define GEN_ATOMIC_HELPER_ALL(NAME) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) \ + GEN_ATOMIC_HELPER(NAME, uint64_t, q_le) \ + GEN_ATOMIC_HELPER(NAME, uint64_t, q_be) +#else +#define GEN_ATOMIC_HELPER_ALL(NAME) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ + GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) +#endif + +GEN_ATOMIC_HELPER_ALL(fetch_add) +GEN_ATOMIC_HELPER_ALL(fetch_sub) +GEN_ATOMIC_HELPER_ALL(fetch_and) +GEN_ATOMIC_HELPER_ALL(fetch_or) +GEN_ATOMIC_HELPER_ALL(fetch_xor) +GEN_ATOMIC_HELPER_ALL(fetch_smin) +GEN_ATOMIC_HELPER_ALL(fetch_umin) +GEN_ATOMIC_HELPER_ALL(fetch_smax) +GEN_ATOMIC_HELPER_ALL(fetch_umax) + +GEN_ATOMIC_HELPER_ALL(add_fetch) +GEN_ATOMIC_HELPER_ALL(sub_fetch) +GEN_ATOMIC_HELPER_ALL(and_fetch) +GEN_ATOMIC_HELPER_ALL(or_fetch) +GEN_ATOMIC_HELPER_ALL(xor_fetch) +GEN_ATOMIC_HELPER_ALL(smin_fetch) +GEN_ATOMIC_HELPER_ALL(umin_fetch) +GEN_ATOMIC_HELPER_ALL(smax_fetch) +GEN_ATOMIC_HELPER_ALL(umax_fetch) + +GEN_ATOMIC_HELPER_ALL(xchg) + +#undef GEN_ATOMIC_HELPER_ALL +#undef GEN_ATOMIC_HELPER + +Int128 cpu_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + MemOpIdx oi, uintptr_t retaddr); +Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + MemOpIdx oi, uintptr_t retaddr); + +Int128 cpu_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +Int128 cpu_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +void cpu_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, + MemOpIdx oi, uintptr_t retaddr); +void cpu_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, + MemOpIdx oi, uintptr_t retaddr); + #if defined(CONFIG_USER_ONLY) extern __thread uintptr_t helper_retaddr; @@ -200,119 +351,6 @@ static inline void clear_helper_retaddr(void) helper_retaddr = 0; } -/* - * Provide the same *_mmuidx_ra interface as for softmmu. - * The mmu_idx argument is ignored. - */ - -static inline uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldub_data_ra(env, addr, ra); -} - -static inline int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldsb_data_ra(env, addr, ra); -} - -static inline uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_lduw_be_data_ra(env, addr, ra); -} - -static inline int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldsw_be_data_ra(env, addr, ra); -} - -static inline uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldl_be_data_ra(env, addr, ra); -} - -static inline uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldq_be_data_ra(env, addr, ra); -} - -static inline uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_lduw_le_data_ra(env, addr, ra); -} - -static inline int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldsw_le_data_ra(env, addr, ra); -} - -static inline uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldl_le_data_ra(env, addr, ra); -} - -static inline uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return cpu_ldq_le_data_ra(env, addr, ra); -} - -static inline void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, int mmu_idx, uintptr_t ra) -{ - cpu_stb_data_ra(env, addr, val, ra); -} - -static inline void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, int mmu_idx, - uintptr_t ra) -{ - cpu_stw_be_data_ra(env, addr, val, ra); -} - -static inline void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, int mmu_idx, - uintptr_t ra) -{ - cpu_stl_be_data_ra(env, addr, val, ra); -} - -static inline void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint64_t val, int mmu_idx, - uintptr_t ra) -{ - cpu_stq_be_data_ra(env, addr, val, ra); -} - -static inline void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, int mmu_idx, - uintptr_t ra) -{ - cpu_stw_le_data_ra(env, addr, val, ra); -} - -static inline void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, int mmu_idx, - uintptr_t ra) -{ - cpu_stl_le_data_ra(env, addr, val, ra); -} - -static inline void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - uint64_t val, int mmu_idx, - uintptr_t ra) -{ - cpu_stq_le_data_ra(env, addr, val, ra); -} - #else /* Needed for TCG_OVERSIZED_GUEST */ @@ -343,46 +381,6 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)]; } -uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); - -uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); - -uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); - -void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); - -void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, - int mmu_idx, uintptr_t retaddr); - -void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, - int mmu_idx, uintptr_t retaddr); - #endif /* defined(CONFIG_USER_ONLY) */ #ifdef TARGET_WORDS_BIGENDIAN @@ -398,6 +396,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, # define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra # define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra # define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra +# define cpu_ldw_mmu cpu_ldw_be_mmu +# define cpu_ldl_mmu cpu_ldl_be_mmu +# define cpu_ldq_mmu cpu_ldq_be_mmu # define cpu_stw_data cpu_stw_be_data # define cpu_stl_data cpu_stl_be_data # define cpu_stq_data cpu_stq_be_data @@ -407,6 +408,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, # define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra # define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra # define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra +# define cpu_stw_mmu cpu_stw_be_mmu +# define cpu_stl_mmu cpu_stl_be_mmu +# define cpu_stq_mmu cpu_stq_be_mmu #else # define cpu_lduw_data cpu_lduw_le_data # define cpu_ldsw_data cpu_ldsw_le_data @@ -420,6 +424,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, # define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra # define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra # define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra +# define cpu_ldw_mmu cpu_ldw_le_mmu +# define cpu_ldl_mmu cpu_ldl_le_mmu +# define cpu_ldq_mmu cpu_ldq_le_mmu # define cpu_stw_data cpu_stw_le_data # define cpu_stl_data cpu_stl_le_data # define cpu_stq_data cpu_stq_le_data @@ -429,6 +436,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, # define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra # define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra # define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra +# define cpu_stw_mmu cpu_stw_le_mmu +# define cpu_stl_mmu cpu_stl_le_mmu +# define cpu_stq_mmu cpu_stq_le_mmu #endif uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 4376e654517..57dd90a76ad 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -21,7 +21,6 @@ #define EXEC_ALL_H #include "cpu.h" -#include "exec/tb-context.h" #ifdef CONFIG_TCG #include "exec/cpu_ldst.h" #endif @@ -262,6 +261,31 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, void tlb_flush_page_bits_by_mmuidx_all_cpus_synced (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits); +/** + * tlb_flush_range_by_mmuidx + * @cpu: CPU whose TLB should be flushed + * @addr: virtual address of the start of the range to be flushed + * @len: length of range to be flushed + * @idxmap: bitmap of mmu indexes to flush + * @bits: number of significant bits in address + * + * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len), + * comparing only the low @bits worth of each virtual page. + */ +void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits); + +/* Similarly, with broadcast and syncing. */ +void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits); +void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, + target_ulong addr, + target_ulong len, + uint16_t idxmap, + unsigned bits); + /** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for @@ -365,6 +389,25 @@ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits) { } +static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits) +{ +} +static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, + target_ulong addr, + target_ulong len, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, + target_ulong addr, + target_long len, + uint16_t idxmap, + unsigned bits) +{ +} #endif /** * probe_access: @@ -446,21 +489,28 @@ struct tb_tc { struct TranslationBlock { target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ target_ulong cs_base; /* CS base for this block */ - target_ulong cs_top; /* End (exclusive) of code segment for this block */ + target_ulong pcc_base; /* CHERI: Base of program counter for this block */ + target_ulong pcc_top; /* CHERI: End of program counter for this block */ uint32_t cheri_flags; /* Extra flags for CHERI. We need more bits than are available in flags (at least for MIPS) and this will allow us to avoid continuously changing the bits that we use when merging from upstream. */ uint32_t flags; /* flags defining in which context the code was generated */ uint32_t cflags; /* compile flags */ -#define CF_COUNT_MASK 0x00007fff -#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ -#define CF_MEMI_ONLY 0x00010000 /* Only instrument memory ops */ -#define CF_USE_ICOUNT 0x00020000 -#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ -#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ -#define CF_LOG_INSTR 0x00100000 /* Generate calls to instruction tracing */ -#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ + +/* Note that TCG_MAX_INSNS is 512; we validate this match elsewhere. */ +#define CF_COUNT_MASK 0x000001ff +#define CF_NO_GOTO_TB 0x00000200 /* Do not chain with goto_tb */ +#define CF_NO_GOTO_PTR 0x00000400 /* Do not chain with goto_ptr */ +#define CF_SINGLE_STEP 0x00000800 /* gdbstub single-step in effect */ +#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ +#define CF_MEMI_ONLY 0x00010000 /* Only instrument memory ops */ +#define CF_USE_ICOUNT 0x00020000 +#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ +#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ +#define CF_NOIRQ 0x00100000 /* Generate an uninterruptible TB */ +#define CF_LOG_INSTR 0x00200000 /* Generate calls to instruction tracing */ +#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 /* Per-vCPU dynamic tracing state used to generate this TB */ @@ -518,15 +568,17 @@ struct TranslationBlock { uintptr_t jmp_dest[2]; }; -// Reduce diff to upstream for CHERI (since we addd cs_top/ds_base/ds_top) -#if !defined(cpu_get_tb_cpu_state_6) -static inline void cpu_get_tb_cpu_state_6(CPUArchState *env, target_ulong *pc, +/* Reduce diff to upstream for CHERI since we add pcc_{base,top},cheri_flags. */ +#if !defined(cpu_get_tb_cpu_state_ext) +static inline void cpu_get_tb_cpu_state_ext(CPUArchState *env, target_ulong *pc, target_ulong *cs_base, - target_ulong *cs_top, + target_ulong *pcc_base, + target_ulong *pcc_top, uint32_t *cheri_flags, uint32_t *flags) { - (void)cs_top; + (void)pcc_base; + (void)pcc_top; (void)cheri_flags; cpu_get_tb_cpu_state(env, pc, cs_base, flags); } @@ -539,10 +591,7 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb) } /* current cflags for hashing/comparison */ -static inline uint32_t curr_cflags(CPUState *cpu) -{ - return cpu->tcg_cflags; -} +uint32_t curr_cflags(CPUState *cpu); /* TranslationBlock invalidate API */ #if defined(CONFIG_USER_ONLY) @@ -554,9 +603,9 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs); void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, target_ulong cs_top, - uint32_t cheri_flags, uint32_t flags, - uint32_t cflags); + target_ulong cs_base, target_ulong pcc_base, + target_ulong pcc_top, uint32_t cheri_flags, + uint32_t flags, uint32_t cflags); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); /* GETPC is the true target of the return instruction that we'll execute. */ @@ -637,6 +686,58 @@ static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, } return addr; } + +/** + * adjust_signal_pc: + * @pc: raw pc from the host signal ucontext_t. + * @is_write: host memory operation was write, or read-modify-write. + * + * Alter @pc as required for unwinding. Return the type of the + * guest memory access -- host reads may be for guest execution. + */ +MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write); + +/** + * handle_sigsegv_accerr_write: + * @cpu: the cpu context + * @old_set: the sigset_t from the signal ucontext_t + * @host_pc: the host pc, adjusted for the signal + * @host_addr: the host address of the fault + * + * Return true if the write fault has been handled, and should be re-tried. + */ +bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, + uintptr_t host_pc, abi_ptr guest_addr); + +/** + * cpu_loop_exit_sigsegv: + * @cpu: the cpu context + * @addr: the guest address of the fault + * @access_type: access was read/write/execute + * @maperr: true for invalid page, false for permission fault + * @ra: host pc for unwinding + * + * Use the TCGCPUOps hook to record cpu state, do guest operating system + * specific things to raise SIGSEGV, and jump to the main cpu loop. + */ +void QEMU_NORETURN cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); + +/** + * cpu_loop_exit_sigbus: + * @cpu: the cpu context + * @addr: the guest address of the alignment fault + * @access_type: access was read/write/execute + * @ra: host pc for unwinding + * + * Use the TCGCPUOps hook to record cpu state, do guest operating system + * specific things to raise SIGBUS, and jump to the main cpu loop. + */ +void QEMU_NORETURN cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, + uintptr_t ra); + #else static inline void mmap_lock(void) {} static inline void mmap_unlock(void) {} diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 298e01eef49..c57204ddad9 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -1,6 +1,7 @@ #ifndef GEN_ICOUNT_H #define GEN_ICOUNT_H +#include "exec/exec-all.h" #include "qemu/timer.h" /* Helpers for instruction counting code generation. */ @@ -16,27 +17,10 @@ static inline void gen_io_start(void) tcg_temp_free_i32(tmp); } -/* - * cpu->can_do_io is cleared automatically at the beginning of - * each translation block. The cost is minimal and only paid - * for -icount, plus it would be very easy to forget doing it - * in the translator. Therefore, backends only need to call - * gen_io_start. - */ -static inline void gen_io_end(void) -{ - TCGv_i32 tmp = tcg_const_i32(0); - tcg_gen_st_i32(tmp, cpu_env, - offsetof(ArchCPU, parent_obj.can_do_io) - - offsetof(ArchCPU, env)); - tcg_temp_free_i32(tmp); -} - static inline void gen_tb_start(const TranslationBlock *tb) { TCGv_i32 count; - tcg_ctx->exitreq_label = gen_new_label(); if (tb_cflags(tb) & CF_USE_ICOUNT) { count = tcg_temp_local_new_i32(); } else { @@ -57,13 +41,34 @@ static inline void gen_tb_start(const TranslationBlock *tb) icount_start_insn = tcg_last_op(); } - tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); + /* + * Emit the check against icount_decr.u32 to see if we should exit + * unless we suppress the check with CF_NOIRQ. If we are using + * icount and have suppressed interruption the higher level code + * should have ensured we don't run more instructions than the + * budget. + */ + if (tb_cflags(tb) & CF_NOIRQ) { + tcg_ctx->exitreq_label = NULL; + } else { + tcg_ctx->exitreq_label = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); + } if (tb_cflags(tb) & CF_USE_ICOUNT) { tcg_gen_st16_i32(count, cpu_env, offsetof(ArchCPU, neg.icount_decr.u16.low) - offsetof(ArchCPU, env)); - gen_io_end(); + /* + * cpu->can_do_io is cleared automatically here at the beginning of + * each translation block. The cost is minimal and only paid for + * -icount, plus it would be very easy to forget doing it in the + * translator. Doing it here means we don't need a gen_io_end() to + * go with gen_io_start(). + */ + tcg_gen_st_i32(tcg_constant_i32(0), cpu_env, + offsetof(ArchCPU, parent_obj.can_do_io) - + offsetof(ArchCPU, env)); } tcg_temp_free_i32(count); @@ -80,8 +85,10 @@ static inline void gen_tb_end(const TranslationBlock *tb, int num_insns) tcgv_i32_arg(tcg_constant_i32(num_insns))); } - gen_set_label(tcg_ctx->exitreq_label); - tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED); + if (tcg_ctx->exitreq_label) { + gen_set_label(tcg_ctx->exitreq_label); + tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED); + } } #endif diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h index 29c02f85dcc..1c2e7a8ed39 100644 --- a/include/exec/helper-gen.h +++ b/include/exec/helper-gen.h @@ -81,8 +81,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ #include "helper.h" #include "trace/generated-helpers.h" #include "trace/generated-helpers-wrappers.h" -#include "tcg-runtime.h" -#include "plugin-helpers.h" +#include "accel/tcg/tcg-runtime.h" +#include "accel/tcg/plugin-helpers.h" #undef DEF_HELPER_FLAGS_0 #undef DEF_HELPER_FLAGS_1 diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h index bf171853b73..da1a8bc9c5e 100644 --- a/include/exec/helper-head.h +++ b/include/exec/helper-head.h @@ -49,8 +49,8 @@ #define dh_ctype_cptr const void * #define dh_ctype_void void #define dh_ctype_memop MemOp -/* Can't use TCGMemOpIdx here due to include ordering. */ -#define dh_ctype_memop_idx uint32_t /* TCGMemOpIdx */ +/* Can't use MemOpIdx here due to include ordering. */ +#define dh_ctype_memop_idx uint32_t /* MemOpIdx */ #define dh_ctype_noreturn void QEMU_NORETURN #define dh_ctype(t) dh_ctype_##t @@ -95,38 +95,15 @@ #define dh_retvar_cap_checked_ptr tcgv_cap_checked_ptr_temp(retval) #define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) -#define dh_is_64bit_void 0 -#define dh_is_64bit_memop 0 -#define dh_is_64bit_memop_idx 0 -#define dh_is_64bit_noreturn 0 -#define dh_is_64bit_i32 0 -#define dh_is_64bit_i64 1 -#define dh_is_64bit_ptr (sizeof(void *) == 8) -#define dh_is_64bit_cptr dh_is_64bit_ptr -#define dh_is_64bit_cap_checked_ptr (sizeof(target_ulong) == 8) -#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t)) - -#define dh_is_signed_void 0 -#define dh_is_signed_memop 0 -#define dh_is_signed_memop_idx 0 -#define dh_is_signed_noreturn 0 -#define dh_is_signed_i32 0 -#define dh_is_signed_s32 1 -#define dh_is_signed_i64 0 -#define dh_is_signed_s64 1 -#define dh_is_signed_f16 0 -#define dh_is_signed_f32 0 -#define dh_is_signed_f64 0 -#define dh_is_signed_tl 0 -#define dh_is_signed_cap_checked_ptr 0 -#define dh_is_signed_int 1 -/* ??? This is highly specific to the host cpu. There are even special - extension instructions that may be required, e.g. ia64's addp4. But - for now we don't support any 64-bit targets with 32-bit pointers. */ -#define dh_is_signed_ptr 0 -#define dh_is_signed_cptr dh_is_signed_ptr -#define dh_is_signed_env dh_is_signed_ptr -#define dh_is_signed(t) dh_is_signed_##t +#define dh_typecode_void 0 +#define dh_typecode_noreturn 0 +#define dh_typecode_i32 2 +#define dh_typecode_s32 3 +#define dh_typecode_i64 4 +#define dh_typecode_s64 5 +#define dh_typecode_ptr 6 +#define dh_typecode_cap_checked_ptr 6 +#define dh_typecode(t) glue(dh_typecode_, dh_alias(t)) #define dh_callflag_i32 0 #define dh_callflag_s32 0 @@ -145,8 +122,7 @@ #define dh_callflag_noreturn TCG_CALL_NO_RETURN #define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) -#define dh_sizemask(t, n) \ - ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1))) +#define dh_typemask(t, n) (dh_typecode(t) << (n * 3)) #define dh_arg(t, n) \ glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n)) diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h index 659f9298e8f..ba100793a7d 100644 --- a/include/exec/helper-proto.h +++ b/include/exec/helper-proto.h @@ -39,8 +39,8 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ #include "helper.h" #include "trace/generated-helpers.h" -#include "tcg-runtime.h" -#include "plugin-helpers.h" +#include "accel/tcg/tcg-runtime.h" +#include "accel/tcg/plugin-helpers.h" #undef IN_HELPER_PROTO diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h index 27870509a20..16cd318b836 100644 --- a/include/exec/helper-tcg.h +++ b/include/exec/helper-tcg.h @@ -13,55 +13,55 @@ #define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) }, + .typemask = dh_typemask(ret, 0) }, #define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) }, #define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) }, #define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) }, #define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) }, #define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ - | dh_sizemask(t5, 5) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ + | dh_typemask(t5, 5) }, #define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ - | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ + | dh_typemask(t5, 5) | dh_typemask(t6, 6) }, #define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \ { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ - | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ + | dh_typemask(t5, 5) | dh_typemask(t6, 6) | dh_typemask(t7, 7) }, #include "helper.h" #include "trace/generated-helpers.h" -#include "tcg-runtime.h" -#include "plugin-helpers.h" +#include "accel/tcg/tcg-runtime.h" +#include "accel/tcg/plugin-helpers.h" #undef str #undef DEF_HELPER_FLAGS_0 diff --git a/include/exec/memop.h b/include/exec/memop.h index 529d07b02dd..04264ffd6b5 100644 --- a/include/exec/memop.h +++ b/include/exec/memop.h @@ -19,11 +19,15 @@ typedef enum MemOp { MO_16 = 1, MO_32 = 2, MO_64 = 3, - MO_SIZE = 3, /* Mask for the above. */ + MO_128 = 4, + MO_256 = 5, + MO_512 = 6, + MO_1024 = 7, + MO_SIZE = 0x07, /* Mask for the above. */ - MO_SIGN = 4, /* Sign-extended, otherwise zero-extended. */ + MO_SIGN = 0x08, /* Sign-extended, otherwise zero-extended. */ - MO_BSWAP = 8, /* Host reverse endian. */ + MO_BSWAP = 0x10, /* Host reverse endian. */ #ifdef HOST_WORDS_BIGENDIAN MO_LE = MO_BSWAP, MO_BE = 0, @@ -59,8 +63,8 @@ typedef enum MemOp { * - an alignment to a specified size, which may be more or less than * the access size (MO_ALIGN_x where 'x' is a size in bytes); */ - MO_ASHIFT = 4, - MO_AMASK = 7 << MO_ASHIFT, + MO_ASHIFT = 5, + MO_AMASK = 0x7 << MO_ASHIFT, #ifdef NEED_CPU_H #ifdef TARGET_ALIGNED_ONLY MO_ALIGN = 0, diff --git a/include/exec/memopidx.h b/include/exec/memopidx.h new file mode 100644 index 00000000000..83bce97874d --- /dev/null +++ b/include/exec/memopidx.h @@ -0,0 +1,55 @@ +/* + * Combine the MemOp and mmu_idx parameters into a single value. + * + * Authors: + * Richard Henderson + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef EXEC_MEMOPIDX_H +#define EXEC_MEMOPIDX_H 1 + +#include "exec/memop.h" + +typedef uint32_t MemOpIdx; + +/** + * make_memop_idx + * @op: memory operation + * @idx: mmu index + * + * Encode these values into a single parameter. + */ +static inline MemOpIdx make_memop_idx(MemOp op, unsigned idx) +{ +#ifdef CONFIG_DEBUG_TCG + assert(idx <= 15); +#endif + return (op << 4) | idx; +} + +/** + * get_memop + * @oi: combined op/idx parameter + * + * Extract the memory operation from the combined value. + */ +static inline MemOp get_memop(MemOpIdx oi) +{ + return oi >> 4; +} + +/** + * get_mmuidx + * @oi: combined op/idx parameter + * + * Extract the mmu index from the combined value. + */ +static inline unsigned get_mmuidx(MemOpIdx oi) +{ + return oi & 15; +} + +#endif diff --git a/include/exec/memory.h b/include/exec/memory.h index 5728a681b27..20f1b27377e 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -42,6 +42,12 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass; DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass, IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION) +#define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager" +typedef struct RamDiscardManagerClass RamDiscardManagerClass; +typedef struct RamDiscardManager RamDiscardManager; +DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass, + RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER); + #ifdef CONFIG_FUZZ void fuzz_dma_read_cb(size_t addr, size_t len, @@ -55,7 +61,17 @@ static inline void fuzz_dma_read_cb(size_t addr, } #endif -extern bool global_dirty_log; +/* Possible bits for global_dirty_log_{start|stop} */ + +/* Dirty tracking enabled because migration is running */ +#define GLOBAL_DIRTY_MIGRATION (1U << 0) + +/* Dirty tracking enabled because measuring dirty rate */ +#define GLOBAL_DIRTY_DIRTY_RATE (1U << 1) + +#define GLOBAL_DIRTY_MASK (0x3) + +extern unsigned int global_dirty_tracking; typedef struct MemoryRegionOps MemoryRegionOps; @@ -65,6 +81,28 @@ struct ReservedRegion { unsigned type; }; +/** + * struct MemoryRegionSection: describes a fragment of a #MemoryRegion + * + * @mr: the region, or %NULL if empty + * @fv: the flat view of the address space the region is mapped in + * @offset_within_region: the beginning of the section, relative to @mr's start + * @size: the size of the section; will not exceed @mr's boundaries + * @offset_within_address_space: the address of the first byte of the section + * relative to the region's address space + * @readonly: writes to this section are ignored + * @nonvolatile: this section is non-volatile + */ +struct MemoryRegionSection { + Int128 size; + MemoryRegion *mr; + FlatView *fv; + hwaddr offset_within_region; + hwaddr offset_within_address_space; + bool readonly; + bool nonvolatile; +}; + typedef struct IOMMUTLBEntry IOMMUTLBEntry; /* See address_space_translate: bit 0 is read, bit 1 is write. */ @@ -131,7 +169,7 @@ typedef struct IOMMUTLBEvent { #define RAM_SHARED (1 << 1) /* Only a portion of RAM (used_length) is actually used, and migrated. - * This used_length size can change across reboots. + * Resizing RAM while migrating can result in the migration being canceled. */ #define RAM_RESIZEABLE (1 << 2) @@ -155,6 +193,16 @@ typedef struct IOMMUTLBEvent { */ #define RAM_UF_WRITEPROTECT (1 << 6) +/* + * RAM is mmap-ed with MAP_NORESERVE. When set, reserving swap space (or huge + * pages if applicable) is skipped: will bail out if not supported. When not + * set, the OS will do the reservation, if supported for the memory type. + */ +#define RAM_NORESERVE (1 << 7) + +/* RAM that isn't accessible through normal means. */ +#define RAM_PROTECTED (1 << 8) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, @@ -441,6 +489,227 @@ struct IOMMUMemoryRegionClass { Error **errp); }; +typedef struct RamDiscardListener RamDiscardListener; +typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl, + MemoryRegionSection *section); +typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl, + MemoryRegionSection *section); + +struct RamDiscardListener { + /* + * @notify_populate: + * + * Notification that previously discarded memory is about to get populated. + * Listeners are able to object. If any listener objects, already + * successfully notified listeners are notified about a discard again. + * + * @rdl: the #RamDiscardListener getting notified + * @section: the #MemoryRegionSection to get populated. The section + * is aligned within the memory region to the minimum granularity + * unless it would exceed the registered section. + * + * Returns 0 on success. If the notification is rejected by the listener, + * an error is returned. + */ + NotifyRamPopulate notify_populate; + + /* + * @notify_discard: + * + * Notification that previously populated memory was discarded successfully + * and listeners should drop all references to such memory and prevent + * new population (e.g., unmap). + * + * @rdl: the #RamDiscardListener getting notified + * @section: the #MemoryRegionSection to get populated. The section + * is aligned within the memory region to the minimum granularity + * unless it would exceed the registered section. + */ + NotifyRamDiscard notify_discard; + + /* + * @double_discard_supported: + * + * The listener suppors getting @notify_discard notifications that span + * already discarded parts. + */ + bool double_discard_supported; + + MemoryRegionSection *section; + QLIST_ENTRY(RamDiscardListener) next; +}; + +static inline void ram_discard_listener_init(RamDiscardListener *rdl, + NotifyRamPopulate populate_fn, + NotifyRamDiscard discard_fn, + bool double_discard_supported) +{ + rdl->notify_populate = populate_fn; + rdl->notify_discard = discard_fn; + rdl->double_discard_supported = double_discard_supported; +} + +typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque); +typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); + +/* + * RamDiscardManagerClass: + * + * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion + * regions are currently populated to be used/accessed by the VM, notifying + * after parts were discarded (freeing up memory) and before parts will be + * populated (consuming memory), to be used/acessed by the VM. + * + * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the + * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is + * mapped. + * + * The #RamDiscardManager is intended to be used by technologies that are + * incompatible with discarding of RAM (e.g., VFIO, which may pin all + * memory inside a #MemoryRegion), and require proper coordination to only + * map the currently populated parts, to hinder parts that are expected to + * remain discarded from silently getting populated and consuming memory. + * Technologies that support discarding of RAM don't have to bother and can + * simply map the whole #MemoryRegion. + * + * An example #RamDiscardManager is virtio-mem, which logically (un)plugs + * memory within an assigned RAM #MemoryRegion, coordinated with the VM. + * Logically unplugging memory consists of discarding RAM. The VM agreed to not + * access unplugged (discarded) memory - especially via DMA. virtio-mem will + * properly coordinate with listeners before memory is plugged (populated), + * and after memory is unplugged (discarded). + * + * Listeners are called in multiples of the minimum granularity (unless it + * would exceed the registered range) and changes are aligned to the minimum + * granularity within the #MemoryRegion. Listeners have to prepare for memory + * becomming discarded in a different granularity than it was populated and the + * other way around. + */ +struct RamDiscardManagerClass { + /* private */ + InterfaceClass parent_class; + + /* public */ + + /** + * @get_min_granularity: + * + * Get the minimum granularity in which listeners will get notified + * about changes within the #MemoryRegion via the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @mr: the #MemoryRegion + * + * Returns the minimum granularity. + */ + uint64_t (*get_min_granularity)(const RamDiscardManager *rdm, + const MemoryRegion *mr); + + /** + * @is_populated: + * + * Check whether the given #MemoryRegionSection is completely populated + * (i.e., no parts are currently discarded) via the #RamDiscardManager. + * There are no alignment requirements. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * + * Returns whether the given range is completely populated. + */ + bool (*is_populated)(const RamDiscardManager *rdm, + const MemoryRegionSection *section); + + /** + * @replay_populated: + * + * Call the #ReplayRamPopulate callback for all populated parts within the + * #MemoryRegionSection via the #RamDiscardManager. + * + * In case any call fails, no further calls are made. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayRamPopulate callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ + int (*replay_populated)(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamPopulate replay_fn, void *opaque); + + /** + * @replay_discarded: + * + * Call the #ReplayRamDiscard callback for all discarded parts within the + * #MemoryRegionSection via the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayRamDiscard callback + * @opaque: pointer to forward to the callback + */ + void (*replay_discarded)(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscard replay_fn, void *opaque); + + /** + * @register_listener: + * + * Register a #RamDiscardListener for the given #MemoryRegionSection and + * immediately notify the #RamDiscardListener about all populated parts + * within the #MemoryRegionSection via the #RamDiscardManager. + * + * In case any notification fails, no further notifications are triggered + * and an error is logged. + * + * @rdm: the #RamDiscardManager + * @rdl: the #RamDiscardListener + * @section: the #MemoryRegionSection + */ + void (*register_listener)(RamDiscardManager *rdm, + RamDiscardListener *rdl, + MemoryRegionSection *section); + + /** + * @unregister_listener: + * + * Unregister a previously registered #RamDiscardListener via the + * #RamDiscardManager after notifying the #RamDiscardListener about all + * populated parts becoming unpopulated within the registered + * #MemoryRegionSection. + * + * @rdm: the #RamDiscardManager + * @rdl: the #RamDiscardListener + */ + void (*unregister_listener)(RamDiscardManager *rdm, + RamDiscardListener *rdl); +}; + +uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, + const MemoryRegion *mr); + +bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, + const MemoryRegionSection *section); + +int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamPopulate replay_fn, + void *opaque); + +void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscard replay_fn, + void *opaque); + +void ram_discard_manager_register_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl, + MemoryRegionSection *section); + +void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl); + typedef struct CoalescedMemoryRange CoalescedMemoryRange; typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; @@ -487,6 +756,7 @@ struct MemoryRegion { const char *name; unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; + RamDiscardManager *rdm; /* Only for RAM */ }; struct IOMMUMemoryRegion { @@ -571,7 +841,7 @@ struct MemoryListener { * @log_start: * * Called during an address space update transaction, after - * one of #MemoryListener.region_add(),#MemoryListener.region_del() or + * one of #MemoryListener.region_add(), #MemoryListener.region_del() or * #MemoryListener.region_nop(), if dirty memory logging clients have * become active since the last transaction. * @@ -616,6 +886,18 @@ struct MemoryListener { */ void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section); + /** + * @log_sync_global: + * + * This is the global version of @log_sync when the listener does + * not have a way to synchronize the log with finer granularity. + * When the listener registers with @log_sync_global defined, then + * its @log_sync must be NULL. Vice versa. + * + * @listener: The #MemoryListener. + */ + void (*log_sync_global)(MemoryListener *listener); + /** * @log_clear: * @@ -731,6 +1013,14 @@ struct MemoryListener { */ unsigned priority; + /** + * @name: + * + * Name of the listener. It can be used in contexts where we'd like to + * identify one memory listener with the rest. + */ + const char *name; + /* private: */ AddressSpace *address_space; QTAILQ_ENTRY(MemoryListener) link; @@ -806,28 +1096,6 @@ typedef bool (*flatview_cb)(Int128 start, */ void flatview_for_each_range(FlatView *fv, flatview_cb cb, void *opaque); -/** - * struct MemoryRegionSection: describes a fragment of a #MemoryRegion - * - * @mr: the region, or %NULL if empty - * @fv: the flat view of the address space the region is mapped in - * @offset_within_region: the beginning of the section, relative to @mr's start - * @size: the size of the section; will not exceed @mr's boundaries - * @offset_within_address_space: the address of the first byte of the section - * relative to the region's address space - * @readonly: writes to this section are ignored - * @nonvolatile: this section is non-volatile - */ -struct MemoryRegionSection { - Int128 size; - MemoryRegion *mr; - FlatView *fv; - hwaddr offset_within_region; - hwaddr offset_within_address_space; - bool readonly; - bool nonvolatile; -}; - static inline bool MemoryRegionSection_eq(MemoryRegionSection *a, MemoryRegionSection *b) { @@ -840,6 +1108,26 @@ static inline bool MemoryRegionSection_eq(MemoryRegionSection *a, a->nonvolatile == b->nonvolatile; } +/** + * memory_region_section_new_copy: Copy a memory region section + * + * Allocate memory for a new copy, copy the memory region section, and + * properly take a reference on all relevant members. + * + * @s: the #MemoryRegionSection to copy + */ +MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s); + +/** + * memory_region_section_new_copy: Free a copied memory region section + * + * Free a copy of a memory section created via memory_region_section_new_copy(). + * properly dropping references on all relevant members. + * + * @s: the #MemoryRegionSection to copy + */ +void memory_region_section_free_copy(MemoryRegionSection *s); + /** * memory_region_init: Initialize a memory region * @@ -928,34 +1216,36 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr, Error **errp); /** - * memory_region_init_ram_shared_nomigrate: Initialize RAM memory region. - * Accesses into the region will - * modify memory directly. + * memory_region_init_ram_flags_nomigrate: Initialize RAM memory region. + * Accesses into the region will + * modify memory directly. * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count * @name: Region name, becomes part of RAMBlock name used in migration stream * must be unique within any device * @size: size of the region. - * @share: allow remapping RAM to different addresses + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. * @errp: pointer to Error*, to store an error if it happens. * - * Note that this function is similar to memory_region_init_ram_nomigrate. - * The only difference is part of the RAM region can be remapped. + * Note that this function does not do anything to cause the data in the + * RAM memory region to be migrated; that is the responsibility of the caller. */ -void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - bool share, - Error **errp); +void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + uint32_t ram_flags, + Error **errp); /** * memory_region_init_resizeable_ram: Initialize memory region with resizeable * RAM. Accesses into the region will * modify memory directly. Only an initial * portion of this RAM is actually used. - * The used size can change across reboots. + * Changing the size while migrating + * can result in the migration being + * canceled. * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count @@ -991,10 +1281,8 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * @size: size of the region. * @align: alignment of the region base address; if 0, the default alignment * (getpagesize()) will be used. - * @ram_flags: Memory region features: - * - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag - * - RAM_PMEM: the memory is persistent memory - * Other bits are ignored now. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, * @path: the path in which to allocate the RAM. * @readonly: true to open @path for reading, false for read/write. * @errp: pointer to Error*, to store an error if it happens. @@ -1020,7 +1308,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, * @owner: the object that tracks the region's reference count * @name: the name of the region. * @size: size of the region. - * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED. * @fd: the fd to mmap. * @offset: offset within the file referenced by fd * @errp: pointer to Error*, to store an error if it happens. @@ -1032,7 +1321,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, - bool share, + uint32_t ram_flags, int fd, ram_addr_t offset, Error **errp); @@ -1321,6 +1610,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) return mr->rom_device && mr->romd_mode; } +/** + * memory_region_is_protected: check whether a memory region is protected + * + * Returns %true if a memory region is protected RAM and cannot be accessed + * via standard mechanisms, e.g. DMA. + * + * @mr: the memory region being queried + */ +bool memory_region_is_protected(MemoryRegion *mr); + /** * memory_region_get_iommu: check whether a memory region is an iommu * @@ -1586,8 +1885,8 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr); /* memory_region_ram_resize: Resize a RAM region. * - * Only legal before guest might have detected the memory size: e.g. on - * incoming migration, or right after reset. + * Resizing RAM while migrating can result in the migration being canceled. + * Care has to be taken if the guest might have already detected the memory. * * @mr: a memory region created with @memory_region_init_resizeable_ram. * @newsize: the new size the region @@ -2003,6 +2302,41 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr); */ bool memory_region_is_mapped(MemoryRegion *mr); +/** + * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a + * #MemoryRegion + * + * The #RamDiscardManager cannot change while a memory region is mapped. + * + * @mr: the #MemoryRegion + */ +RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr); + +/** + * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a + * #RamDiscardManager assigned + * + * @mr: the #MemoryRegion + */ +static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr) +{ + return !!memory_region_get_ram_discard_manager(mr); +} + +/** + * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a + * #MemoryRegion + * + * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion + * that does not cover RAM, or a #MemoryRegion that already has a + * #RamDiscardManager assigned. + * + * @mr: the #MemoryRegion + * @rdm: #RamDiscardManager to set + */ +void memory_region_set_ram_discard_manager(MemoryRegion *mr, + RamDiscardManager *rdm); + /** * memory_region_find: translate an address/size relative to a * MemoryRegion into a #MemoryRegionSection. @@ -2085,13 +2419,17 @@ void memory_listener_unregister(MemoryListener *listener); /** * memory_global_dirty_log_start: begin dirty logging for all regions + * + * @flags: purpose of starting dirty log, migration or dirty rate */ -void memory_global_dirty_log_start(void); +void memory_global_dirty_log_start(unsigned int flags); /** * memory_global_dirty_log_stop: end dirty logging for all regions + * + * @flags: purpose of stopping dirty log, migration or dirty rate */ -void memory_global_dirty_log_stop(void); +void memory_global_dirty_log_stop(unsigned int flags); void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled); @@ -2303,7 +2641,7 @@ static inline uint8_t address_space_ldub_cached(MemoryRegionCache *cache, } static inline void address_space_stb_cached(MemoryRegionCache *cache, - hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) + hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result) { assert(addr < cache->len); if (likely(cache->ptr)) { @@ -2611,6 +2949,12 @@ static inline MemOp devend_memop(enum device_endian end) */ int ram_block_discard_disable(bool state); +/* + * See ram_block_discard_disable(): only disable uncoordinated discards, + * keeping coordinated discards (via the RamDiscardManager) enabled. + */ +int ram_block_uncoordinated_discard_disable(bool state); + /* * Inhibit technologies that disable discarding of pages in RAM blocks. * @@ -2620,12 +2964,20 @@ int ram_block_discard_disable(bool state); int ram_block_discard_require(bool state); /* - * Test if discarding of memory in ram blocks is disabled. + * See ram_block_discard_require(): only inhibit technologies that disable + * uncoordinated discarding of pages in RAM blocks, allowing co-existance with + * technologies that only inhibit uncoordinated discards (via the + * RamDiscardManager). + */ +int ram_block_coordinated_discard_require(bool state); + +/* + * Test if any discarding of memory in ram blocks is disabled. */ bool ram_block_discard_is_disabled(void); /* - * Test if discarding of memory in ram blocks is required to work reliably. + * Test if any discarding of memory in ram blocks is required to work reliably. */ bool ram_block_discard_is_required(void); diff --git a/include/exec/memory_ldst.h.inc b/include/exec/memory_ldst.h.inc index 46e6c220d35..7c3a641f7ed 100644 --- a/include/exec/memory_ldst.h.inc +++ b/include/exec/memory_ldst.h.inc @@ -20,7 +20,7 @@ */ #ifdef TARGET_ENDIANNESS -extern uint32_t glue(address_space_lduw, SUFFIX)(ARG1_DECL, +extern uint16_t glue(address_space_lduw, SUFFIX)(ARG1_DECL, hwaddr addr, MemTxAttrs attrs, MemTxResult *result); extern uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL, hwaddr addr, MemTxAttrs attrs, MemTxResult *result); @@ -29,17 +29,17 @@ extern uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL, extern void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stw, SUFFIX)(ARG1_DECL, - hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); + hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stl, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stq, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result); #else -extern uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL, +extern uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL, hwaddr addr, MemTxAttrs attrs, MemTxResult *result); -extern uint32_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL, +extern uint16_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL, hwaddr addr, MemTxAttrs attrs, MemTxResult *result); -extern uint32_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL, +extern uint16_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL, hwaddr addr, MemTxAttrs attrs, MemTxResult *result); extern uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL, hwaddr addr, MemTxAttrs attrs, MemTxResult *result); @@ -50,11 +50,11 @@ extern uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL, extern uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL, hwaddr addr, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stb, SUFFIX)(ARG1_DECL, - hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); + hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stw_le, SUFFIX)(ARG1_DECL, - hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); + hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stw_be, SUFFIX)(ARG1_DECL, - hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); + hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stl_le, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); extern void glue(address_space_stl_be, SUFFIX)(ARG1_DECL, diff --git a/include/exec/memory_ldst_cached.h.inc b/include/exec/memory_ldst_cached.h.inc index 7bc8790d346..d7834f852c4 100644 --- a/include/exec/memory_ldst_cached.h.inc +++ b/include/exec/memory_ldst_cached.h.inc @@ -24,6 +24,18 @@ #define LD_P(size) \ glue(glue(ld, size), glue(ENDIANNESS, _p)) +static inline uint16_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + assert(addr < cache->len && 2 <= cache->len - addr); + fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr); + if (likely(cache->ptr)) { + return LD_P(uw)(cache->ptr + addr); + } else { + return ADDRESS_SPACE_LD_CACHED_SLOW(uw)(cache, addr, attrs, result); + } +} + static inline uint32_t ADDRESS_SPACE_LD_CACHED(l)(MemoryRegionCache *cache, hwaddr addr, MemTxAttrs attrs, MemTxResult *result) { @@ -48,18 +60,6 @@ static inline uint64_t ADDRESS_SPACE_LD_CACHED(q)(MemoryRegionCache *cache, } } -static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache, - hwaddr addr, MemTxAttrs attrs, MemTxResult *result) -{ - assert(addr < cache->len && 2 <= cache->len - addr); - fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr); - if (likely(cache->ptr)) { - return LD_P(uw)(cache->ptr + addr); - } else { - return ADDRESS_SPACE_LD_CACHED_SLOW(uw)(cache, addr, attrs, result); - } -} - #undef ADDRESS_SPACE_LD_CACHED #undef ADDRESS_SPACE_LD_CACHED_SLOW #undef LD_P @@ -71,25 +71,25 @@ static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache, #define ST_P(size) \ glue(glue(st, size), glue(ENDIANNESS, _p)) -static inline void ADDRESS_SPACE_ST_CACHED(l)(MemoryRegionCache *cache, - hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +static inline void ADDRESS_SPACE_ST_CACHED(w)(MemoryRegionCache *cache, + hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result) { - assert(addr < cache->len && 4 <= cache->len - addr); + assert(addr < cache->len && 2 <= cache->len - addr); if (likely(cache->ptr)) { - ST_P(l)(cache->ptr + addr, val); + ST_P(w)(cache->ptr + addr, val); } else { - ADDRESS_SPACE_ST_CACHED_SLOW(l)(cache, addr, val, attrs, result); + ADDRESS_SPACE_ST_CACHED_SLOW(w)(cache, addr, val, attrs, result); } } -static inline void ADDRESS_SPACE_ST_CACHED(w)(MemoryRegionCache *cache, +static inline void ADDRESS_SPACE_ST_CACHED(l)(MemoryRegionCache *cache, hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) { - assert(addr < cache->len && 2 <= cache->len - addr); + assert(addr < cache->len && 4 <= cache->len - addr); if (likely(cache->ptr)) { - ST_P(w)(cache->ptr + addr, val); + ST_P(l)(cache->ptr + addr, val); } else { - ADDRESS_SPACE_ST_CACHED_SLOW(w)(cache, addr, val, attrs, result); + ADDRESS_SPACE_ST_CACHED_SLOW(l)(cache, addr, val, attrs, result); } } diff --git a/include/exec/memory_ldst_phys.h.inc b/include/exec/memory_ldst_phys.h.inc index b9dd53c389d..ecd678610d1 100644 --- a/include/exec/memory_ldst_phys.h.inc +++ b/include/exec/memory_ldst_phys.h.inc @@ -20,6 +20,12 @@ */ #ifdef TARGET_ENDIANNESS +static inline uint16_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + static inline uint32_t glue(ldl_phys, SUFFIX)(ARG1_DECL, hwaddr addr) { return glue(address_space_ldl, SUFFIX)(ARG1, addr, @@ -32,10 +38,10 @@ static inline uint64_t glue(ldq_phys, SUFFIX)(ARG1_DECL, hwaddr addr) MEMTXATTRS_UNSPECIFIED, NULL); } -static inline uint32_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +static inline void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val) { - return glue(address_space_lduw, SUFFIX)(ARG1, addr, - MEMTXATTRS_UNSPECIFIED, NULL); + glue(address_space_stw, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); } static inline void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) @@ -44,18 +50,30 @@ static inline void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) MEMTXATTRS_UNSPECIFIED, NULL); } -static inline void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) -{ - glue(address_space_stw, SUFFIX)(ARG1, addr, val, - MEMTXATTRS_UNSPECIFIED, NULL); -} - static inline void glue(stq_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) { glue(address_space_stq, SUFFIX)(ARG1, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); } #else +static inline uint8_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldub, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint16_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw_le, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static inline uint16_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw_be, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + static inline uint32_t glue(ldl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) { return glue(address_space_ldl_le, SUFFIX)(ARG1, addr, @@ -80,22 +98,22 @@ static inline uint64_t glue(ldq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) MEMTXATTRS_UNSPECIFIED, NULL); } -static inline uint32_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +static inline void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint8_t val) { - return glue(address_space_ldub, SUFFIX)(ARG1, addr, - MEMTXATTRS_UNSPECIFIED, NULL); + glue(address_space_stb, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); } -static inline uint32_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +static inline void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val) { - return glue(address_space_lduw_le, SUFFIX)(ARG1, addr, - MEMTXATTRS_UNSPECIFIED, NULL); + glue(address_space_stw_le, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); } -static inline uint32_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +static inline void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val) { - return glue(address_space_lduw_be, SUFFIX)(ARG1, addr, - MEMTXATTRS_UNSPECIFIED, NULL); + glue(address_space_stw_be, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); } static inline void glue(stl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) @@ -110,24 +128,6 @@ static inline void glue(stl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t va MEMTXATTRS_UNSPECIFIED, NULL); } -static inline void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) -{ - glue(address_space_stb, SUFFIX)(ARG1, addr, val, - MEMTXATTRS_UNSPECIFIED, NULL); -} - -static inline void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) -{ - glue(address_space_stw_le, SUFFIX)(ARG1, addr, val, - MEMTXATTRS_UNSPECIFIED, NULL); -} - -static inline void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) -{ - glue(address_space_stw_be, SUFFIX)(ARG1, addr, val, - MEMTXATTRS_UNSPECIFIED, NULL); -} - static inline void glue(stq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) { glue(address_space_stq_le, SUFFIX)(ARG1, addr, val, diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h index b1b72b5d90b..f92f1697398 100644 --- a/include/exec/plugin-gen.h +++ b/include/exec/plugin-gen.h @@ -27,13 +27,21 @@ void plugin_gen_insn_end(void); void plugin_gen_disable_mem_helpers(void); void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info); -static inline void plugin_insn_append(const void *from, size_t size) +static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size) { struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn; + abi_ptr off; if (insn == NULL) { return; } + off = pc - insn->vaddr; + if (off < insn->data->len) { + g_byte_array_set_size(insn->data, off); + } else if (off > insn->data->len) { + /* we have an unexpected gap */ + g_assert_not_reached(); + } insn->data = g_byte_array_append(insn->data, from, size); } @@ -62,7 +70,7 @@ static inline void plugin_gen_disable_mem_helpers(void) static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info) { } -static inline void plugin_insn_append(const void *from, size_t size) +static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size) { } #endif /* CONFIG_PLUGIN */ diff --git a/include/exec/poison.h b/include/exec/poison.h index 2d169d42148..43bdbed31d9 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -4,6 +4,8 @@ #ifndef HW_POISON_H #define HW_POISON_H +#include "config-poison.h" + #pragma GCC poison TARGET_I386 #pragma GCC poison TARGET_X86_64 #pragma GCC poison TARGET_AARCH64 @@ -12,7 +14,6 @@ #pragma GCC poison TARGET_CRIS #pragma GCC poison TARGET_HEXAGON #pragma GCC poison TARGET_HPPA -#pragma GCC poison TARGET_LM32 #pragma GCC poison TARGET_M68K #pragma GCC poison TARGET_MICROBLAZE #pragma GCC poison TARGET_MIPS @@ -21,7 +22,6 @@ #pragma GCC poison TARGET_MIPS64 #pragma GCC poison TARGET_CHERI #pragma GCC poison TARGET_ABI_MIPSN64 -#pragma GCC poison TARGET_MOXIE #pragma GCC poison TARGET_NIOS2 #pragma GCC poison TARGET_OPENRISC #pragma GCC poison TARGET_PPC @@ -36,7 +36,6 @@ #pragma GCC poison TARGET_SPARC #pragma GCC poison TARGET_SPARC64 #pragma GCC poison TARGET_TRICORE -#pragma GCC poison TARGET_UNICORE32 #pragma GCC poison TARGET_XTENSA #pragma GCC poison TARGET_ALIGNED_ONLY @@ -78,12 +77,10 @@ #pragma GCC poison CONFIG_HPPA_DIS #pragma GCC poison CONFIG_I386_DIS #pragma GCC poison CONFIG_HEXAGON_DIS -#pragma GCC poison CONFIG_LM32_DIS #pragma GCC poison CONFIG_M68K_DIS #pragma GCC poison CONFIG_MICROBLAZE_DIS #pragma GCC poison CONFIG_MIPS_DIS #pragma GCC poison CONFIG_NANOMIPS_DIS -#pragma GCC poison CONFIG_MOXIE_DIS #pragma GCC poison CONFIG_NIOS2_DIS #pragma GCC poison CONFIG_PPC_DIS #pragma GCC poison CONFIG_RISCV_DIS @@ -92,8 +89,12 @@ #pragma GCC poison CONFIG_SPARC_DIS #pragma GCC poison CONFIG_XTENSA_DIS +#pragma GCC poison CONFIG_HAX +#pragma GCC poison CONFIG_HVF #pragma GCC poison CONFIG_LINUX_USER #pragma GCC poison CONFIG_KVM #pragma GCC poison CONFIG_SOFTMMU +#pragma GCC poison CONFIG_WHPX +#pragma GCC poison CONFIG_XEN #endif diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 3cb9791df3b..64fb936c7c7 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -26,6 +26,8 @@ #include "exec/ramlist.h" #include "exec/ramblock.h" +extern uint64_t total_dirty_pages; + /** * clear_bmap_size: calculate clear bitmap size * @@ -104,11 +106,8 @@ long qemu_maxrampagesize(void); * Parameters: * @size: the size in bytes of the ram block * @mr: the memory region where the ram block is - * @ram_flags: specify the properties of the ram block, which can be one - * or bit-or of following values - * - RAM_SHARED: mmap the backing file or device with MAP_SHARED - * - RAM_PMEM: the backend @mem_path or @fd is persistent memory - * Other bits are ignored. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE. * @mem_path or @fd: specify the backing file or device * @readonly: true to open @path for reading, false for read/write. * @errp: pointer to Error*, to store an error if it happens @@ -126,7 +125,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); -RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, +RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, Error **errp); RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, void (*resized)(const char*, @@ -372,10 +371,14 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); - if (global_dirty_log) { + if (global_dirty_tracking) { qatomic_or( &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], temp); + if (unlikely( + global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { + total_dirty_pages += ctpopl(temp); + } } if (tcg_enabled()) { @@ -395,7 +398,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, } else { uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; - if (!global_dirty_log) { + if (!global_dirty_tracking) { clients &= ~(1 << DIRTY_MEMORY_MIGRATION); } @@ -406,6 +409,9 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, for (i = 0; i < len; i++) { if (bitmap[i] != 0) { c = leul_to_cpu(bitmap[i]); + if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { + total_dirty_pages += ctpopl(c); + } do { j = ctzl(c); c &= ~(1ul << j); diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 4fa47599693..069e421573f 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -64,6 +64,16 @@ struct RAMBlock { */ unsigned long *clear_bmap; uint8_t clear_bmap_shift; + + /* + * RAM block length that corresponds to the used_length on the migration + * source (after RAM block sizes were synchronized). Especially, after + * starting to run the guest, used_length and postcopy_length can differ. + * Used to register/unregister uffd handlers and as the size of the received + * bitmap. Receiving any page beyond this length will bail out, as it + * could not have been valid on the source. + */ + ram_addr_t postcopy_length; }; #endif diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h index 26704aa3b0d..2ad2a81accf 100644 --- a/include/exec/ramlist.h +++ b/include/exec/ramlist.h @@ -65,16 +65,21 @@ void qemu_mutex_lock_ramlist(void); void qemu_mutex_unlock_ramlist(void); struct RAMBlockNotifier { - void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size); - void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size); + void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size); + void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size); + void (*ram_block_resized)(RAMBlockNotifier *n, void *host, size_t old_size, + size_t new_size); QLIST_ENTRY(RAMBlockNotifier) next; }; void ram_block_notifier_add(RAMBlockNotifier *n); void ram_block_notifier_remove(RAMBlockNotifier *n); -void ram_block_notify_add(void *host, size_t size); -void ram_block_notify_remove(void *host, size_t size); +void ram_block_notify_add(void *host, size_t size, size_t max_size); +void ram_block_notify_remove(void *host, size_t size, size_t max_size); +void ram_block_notify_resize(void *host, size_t old_size, size_t new_size); -void ram_block_dump(Monitor *mon); +GString *ram_block_format(void); #endif /* RAMLIST_H */ diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h deleted file mode 100644 index 2f314eb6eb2..00000000000 --- a/include/exec/tb-lookup.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2017, Emilio G. Cota - * - * License: GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ -#ifndef EXEC_TB_LOOKUP_H -#define EXEC_TB_LOOKUP_H - -#ifdef NEED_CPU_H -#include "cpu.h" -#else -#include "exec/poison.h" -#endif - -#include "exec/exec-all.h" -#include "exec/tb-hash.h" - -/* Might cause an exception, so have a longjmp destination ready */ -static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, - target_ulong cs_top, - uint32_t cheri_flags, - uint32_t flags, uint32_t cflags) -{ - TranslationBlock *tb; - uint32_t hash; - - /* we should never be trying to look up an INVALID tb */ - tcg_debug_assert(!(cflags & CF_INVALID)); - - hash = tb_jmp_cache_hash_func(pc); - tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); - - if (likely(tb && - tb->pc == pc && - tb->cs_base == cs_base && - tb->cs_top == cs_top && - tb->cheri_flags == cheri_flags && - tb->flags == flags && - tb->trace_vcpu_dstate == *cpu->trace_dstate && - tb_cflags(tb) == cflags)) { - return tb; - } - tb = tb_htable_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags); - - if (tb == NULL) { - return NULL; - } - qatomic_set(&cpu->tb_jmp_cache[hash], tb); - return tb; -} - -#endif /* EXEC_TB_LOOKUP_H */ diff --git a/include/exec/translate-all.h b/include/exec/translate-all.h index a557b4e2bb9..9f646389afe 100644 --- a/include/exec/translate-all.h +++ b/include/exec/translate-all.h @@ -33,6 +33,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end); void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr); #ifdef CONFIG_USER_ONLY +void page_protect(tb_page_addr_t page_addr); int page_unprotect(target_ulong address, uintptr_t pc); #endif diff --git a/include/exec/translator.h b/include/exec/translator.h index 91919f49ad7..e9eb2945a49 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/plugin-gen.h" +#include "exec/translate-all.h" #include "tcg/tcg.h" @@ -87,6 +88,17 @@ typedef struct DisasContextBase { bool log_instr_enabled; uint8_t printf_used_ptr; #endif +#ifdef CONFIG_USER_ONLY + /* + * Guest address of the last byte of the last protected page. + * + * Pages containing the translated instructions are made non-writable in + * order to achieve consistency in case another thread is modifying the + * code while translate_insn() fetches the instruction bytes piecemeal. + * Such writer threads are blocked on mmap_lock() in page_unprotect(). + */ + target_ulong page_protect_end; +#endif } DisasContextBase; #ifdef TARGET_CHERI @@ -115,15 +127,6 @@ typedef struct DisasContextBase { * @insn_start: * Emit the tcg_gen_insn_start opcode. * - * @breakpoint_check: - * When called, the breakpoint has already been checked to match the PC, - * but the target may decide the breakpoint missed the address - * (e.g., due to conditions encoded in their flags). Return true to - * indicate that the breakpoint did hit, in which case no more breakpoints - * are checked. If the breakpoint did hit, emit any code required to - * signal the exception, and set db->is_jmp as necessary to terminate - * the main loop. - * * @translate_insn: * Disassemble one instruction and set db->pc_next for the start * of the following instruction. Set db->is_jmp as necessary to @@ -139,8 +142,6 @@ typedef struct TranslatorOps { void (*init_disas_context)(DisasContextBase *db, CPUState *cpu); void (*tb_start)(DisasContextBase *db, CPUState *cpu); void (*insn_start)(DisasContextBase *db, CPUState *cpu); - bool (*breakpoint_check)(DisasContextBase *db, CPUState *cpu, - const CPUBreakpoint *bp); void (*translate_insn)(DisasContextBase *db, CPUState *cpu); void (*tb_stop)(DisasContextBase *db, CPUState *cpu); void (*disas_log)(const DisasContextBase *db, CPUState *cpu); @@ -171,6 +172,16 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, void translator_loop_temp_check(DisasContextBase *db); +/** + * translator_use_goto_tb + * @db: Disassembly context + * @dest: target pc of the goto + * + * Return true if goto_tb is allowed between the current TB + * and the destination PC. + */ +bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest); + /* * Translator Load Functions * @@ -183,27 +194,23 @@ void translator_loop_temp_check(DisasContextBase *db); */ #define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \ - static inline type \ - fullname ## _swap(CPUArchState *env, abi_ptr pc, bool do_swap) \ - { \ - type ret = load_fn(env, pc); \ - if (do_swap) { \ - ret = swap_fn(ret); \ - } \ - plugin_insn_append(&ret, sizeof(ret)); \ - return ret; \ - } \ - \ - static inline type fullname(CPUArchState *env, abi_ptr pc) \ + type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \ + abi_ptr pc, bool do_swap); \ + static inline type fullname(CPUArchState *env, \ + DisasContextBase *dcbase, abi_ptr pc) \ { \ - return fullname ## _swap(env, pc, false); \ + return fullname ## _swap(env, dcbase, pc, false); \ } -GEN_TRANSLATOR_LD(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) -GEN_TRANSLATOR_LD(translator_ldsw, int16_t, cpu_ldsw_code, bswap16) -GEN_TRANSLATOR_LD(translator_lduw, uint16_t, cpu_lduw_code, bswap16) -GEN_TRANSLATOR_LD(translator_ldl, uint32_t, cpu_ldl_code, bswap32) -GEN_TRANSLATOR_LD(translator_ldq, uint64_t, cpu_ldq_code, bswap64) +#define FOR_EACH_TRANSLATOR_LD(F) \ + F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) \ + F(translator_ldsw, int16_t, cpu_ldsw_code, bswap16) \ + F(translator_lduw, uint16_t, cpu_lduw_code, bswap16) \ + F(translator_ldl, uint32_t, cpu_ldl_code, bswap32) \ + F(translator_ldq, uint64_t, cpu_ldq_code, bswap64) + +FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD) + #undef GEN_TRANSLATOR_LD #endif /* EXEC__TRANSLATOR_H */ diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h index 2f0674fbdde..a98d759cd3a 100644 --- a/include/fpu/softfloat-helpers.h +++ b/include/fpu/softfloat-helpers.h @@ -48,8 +48,8 @@ this code that are retained. =============================================================================== */ -#ifndef _SOFTFLOAT_HELPERS_H_ -#define _SOFTFLOAT_HELPERS_H_ +#ifndef SOFTFLOAT_HELPERS_H +#define SOFTFLOAT_HELPERS_H #include "fpu/softfloat-types.h" @@ -69,7 +69,7 @@ static inline void set_float_exception_flags(int val, float_status *status) status->float_exception_flags = val; } -static inline void set_floatx80_rounding_precision(int val, +static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val, float_status *status) { status->floatx80_rounding_precision = val; @@ -120,7 +120,8 @@ static inline int get_float_exception_flags(float_status *status) return status->float_exception_flags; } -static inline int get_floatx80_rounding_precision(float_status *status) +static inline FloatX80RoundPrec +get_floatx80_rounding_precision(float_status *status) { return status->floatx80_rounding_precision; } diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index a35ec2893a2..f35cdbfa636 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -8,7 +8,6 @@ * so some portions are provided under: * the SoftFloat-2a license * the BSD license - * GPL-v2-or-later * * Any future contributions to this file after December 1st 2014 will be * taken to be licensed under the Softfloat-2a license unless specifically @@ -75,14 +74,47 @@ this code that are retained. * THE POSSIBILITY OF SUCH DAMAGE. */ -/* Portions of this work are licensed under the terms of the GNU GPL, - * version 2 or later. See the COPYING file in the top-level directory. - */ - #ifndef FPU_SOFTFLOAT_MACROS_H #define FPU_SOFTFLOAT_MACROS_H #include "fpu/softfloat-types.h" +#include "qemu/host-utils.h" + +/** + * shl_double: double-word merging left shift + * @l: left or most-significant word + * @r: right or least-significant word + * @c: shift count + * + * Shift @l left by @c bits, shifting in bits from @r. + */ +static inline uint64_t shl_double(uint64_t l, uint64_t r, int c) +{ +#if defined(__x86_64__) + asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c)); + return l; +#else + return c ? (l << c) | (r >> (64 - c)) : l; +#endif +} + +/** + * shr_double: double-word merging right shift + * @l: left or most-significant word + * @r: right or least-significant word + * @c: shift count + * + * Shift @r right by @c bits, shifting in bits from @l. + */ +static inline uint64_t shr_double(uint64_t l, uint64_t r, int c) +{ +#if defined(__x86_64__) + asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c)); + return r; +#else + return c ? (r >> c) | (l << (64 - c)) : r; +#endif +} /*---------------------------------------------------------------------------- | Shifts `a' right by the number of bits given in `count'. If any nonzero @@ -403,16 +435,12 @@ static inline void | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -static inline void - add128( - uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) +static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, + uint64_t *z0Ptr, uint64_t *z1Ptr) { - uint64_t z1; - - z1 = a1 + b1; - *z1Ptr = z1; - *z0Ptr = a0 + b0 + ( z1 < a1 ); - + bool c = 0; + *z1Ptr = uadd64_carry(a1, b1, &c); + *z0Ptr = uadd64_carry(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -423,34 +451,14 @@ static inline void | `z1Ptr', and `z2Ptr'. *----------------------------------------------------------------------------*/ -static inline void - add192( - uint64_t a0, - uint64_t a1, - uint64_t a2, - uint64_t b0, - uint64_t b1, - uint64_t b2, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) +static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t b0, uint64_t b1, uint64_t b2, + uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) { - uint64_t z0, z1, z2; - int8_t carry0, carry1; - - z2 = a2 + b2; - carry1 = ( z2 < a2 ); - z1 = a1 + b1; - carry0 = ( z1 < a1 ); - z0 = a0 + b0; - z1 += carry1; - z0 += ( z1 < carry1 ); - z0 += carry0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - + bool c = 0; + *z2Ptr = uadd64_carry(a2, b2, &c); + *z1Ptr = uadd64_carry(a1, b1, &c); + *z0Ptr = uadd64_carry(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -461,14 +469,12 @@ static inline void | `z1Ptr'. *----------------------------------------------------------------------------*/ -static inline void - sub128( - uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) +static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, + uint64_t *z0Ptr, uint64_t *z1Ptr) { - - *z1Ptr = a1 - b1; - *z0Ptr = a0 - b0 - ( a1 < b1 ); - + bool c = 0; + *z1Ptr = usub64_borrow(a1, b1, &c); + *z0Ptr = usub64_borrow(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -479,34 +485,14 @@ static inline void | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. *----------------------------------------------------------------------------*/ -static inline void - sub192( - uint64_t a0, - uint64_t a1, - uint64_t a2, - uint64_t b0, - uint64_t b1, - uint64_t b2, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) +static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t b0, uint64_t b1, uint64_t b2, + uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) { - uint64_t z0, z1, z2; - int8_t borrow0, borrow1; - - z2 = a2 - b2; - borrow1 = ( a2 < b2 ); - z1 = a1 - b1; - borrow0 = ( a1 < b1 ); - z0 = a0 - b0; - z0 -= ( z1 < borrow1 ); - z1 -= borrow1; - z0 -= borrow0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - + bool c = 0; + *z2Ptr = usub64_borrow(a2, b2, &c); + *z1Ptr = usub64_borrow(a1, b1, &c); + *z0Ptr = usub64_borrow(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -515,27 +501,10 @@ static inline void | `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr ) +static inline void +mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr) { - uint32_t aHigh, aLow, bHigh, bLow; - uint64_t z0, zMiddleA, zMiddleB, z1; - - aLow = a; - aHigh = a>>32; - bLow = b; - bHigh = b>>32; - z1 = ( (uint64_t) aLow ) * bLow; - zMiddleA = ( (uint64_t) aLow ) * bHigh; - zMiddleB = ( (uint64_t) aHigh ) * bLow; - z0 = ( (uint64_t) aHigh ) * bHigh; - zMiddleA += zMiddleB; - z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); - zMiddleA <<= 32; - z1 += zMiddleA; - z0 += ( z1 < zMiddleA ); - *z1Ptr = z1; - *z0Ptr = z0; - + mulu64(z1Ptr, z0Ptr, a, b); } /*---------------------------------------------------------------------------- @@ -546,24 +515,14 @@ static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *----------------------------------------------------------------------------*/ static inline void - mul128By64To192( - uint64_t a0, - uint64_t a1, - uint64_t b, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) +mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b, + uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) { - uint64_t z0, z1, z2, more1; - - mul64To128( a1, b, &z1, &z2 ); - mul64To128( a0, b, &z0, &more1 ); - add128( z0, more1, 0, z1, &z0, &z1 ); - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; + uint64_t z0, z1, m1; + mul64To128(a1, b, &m1, z2Ptr); + mul64To128(a0, b, &z0, &z1); + add128(z0, z1, 0, m1, z0Ptr, z1Ptr); } /*---------------------------------------------------------------------------- @@ -573,34 +532,21 @@ static inline void | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. *----------------------------------------------------------------------------*/ -static inline void - mul128To256( - uint64_t a0, - uint64_t a1, - uint64_t b0, - uint64_t b1, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr, - uint64_t *z3Ptr - ) +static inline void mul128To256(uint64_t a0, uint64_t a1, + uint64_t b0, uint64_t b1, + uint64_t *z0Ptr, uint64_t *z1Ptr, + uint64_t *z2Ptr, uint64_t *z3Ptr) { - uint64_t z0, z1, z2, z3; - uint64_t more1, more2; - - mul64To128( a1, b1, &z2, &z3 ); - mul64To128( a1, b0, &z1, &more2 ); - add128( z1, more2, 0, z2, &z1, &z2 ); - mul64To128( a0, b0, &z0, &more1 ); - add128( z0, more1, 0, z1, &z0, &z1 ); - mul64To128( a0, b1, &more1, &more2 ); - add128( more1, more2, 0, z2, &more1, &z2 ); - add128( z0, z1, 0, more1, &z0, &z1 ); - *z3Ptr = z3; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; + uint64_t z0, z1, z2; + uint64_t m0, m1, m2, n1, n2; + mul64To128(a1, b0, &m1, &m2); + mul64To128(a0, b1, &n1, &n2); + mul64To128(a1, b1, &z2, z3Ptr); + mul64To128(a0, b0, &z0, &z1); + + add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2); + add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr); } /*---------------------------------------------------------------------------- @@ -634,83 +580,6 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) } -/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd - * (https://gmplib.org/repo/gmp/file/tip/longlong.h) - * - * Licensed under the GPLv2/LGPLv3 - */ -static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, - uint64_t n0, uint64_t d) -{ -#if defined(__x86_64__) - uint64_t q; - asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); - return q; -#elif defined(__s390x__) && !defined(__clang__) - /* Need to use a TImode type to get an even register pair for DLGR. */ - unsigned __int128 n = (unsigned __int128)n1 << 64 | n0; - asm("dlgr %0, %1" : "+r"(n) : "r"(d)); - *r = n >> 64; - return n; -#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7) - /* From Power ISA 2.06, programming note for divdeu. */ - uint64_t q1, q2, Q, r1, r2, R; - asm("divdeu %0,%2,%4; divdu %1,%3,%4" - : "=&r"(q1), "=r"(q2) - : "r"(n1), "r"(n0), "r"(d)); - r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */ - r2 = n0 - (q2 * d); - Q = q1 + q2; - R = r1 + r2; - if (R >= d || R < r2) { /* overflow implies R > d */ - Q += 1; - R -= d; - } - *r = R; - return Q; -#else - uint64_t d0, d1, q0, q1, r1, r0, m; - - d0 = (uint32_t)d; - d1 = d >> 32; - - r1 = n1 % d1; - q1 = n1 / d1; - m = q1 * d0; - r1 = (r1 << 32) | (n0 >> 32); - if (r1 < m) { - q1 -= 1; - r1 += d; - if (r1 >= d) { - if (r1 < m) { - q1 -= 1; - r1 += d; - } - } - } - r1 -= m; - - r0 = r1 % d1; - q0 = r1 / d1; - m = q0 * d0; - r0 = (r0 << 32) | (uint32_t)n0; - if (r0 < m) { - q0 -= 1; - r0 += d; - if (r0 >= d) { - if (r0 < m) { - q0 -= 1; - r0 += d; - } - } - } - r0 -= m; - - *r = r0; - return (q1 << 32) | q0; -#endif -} - /*---------------------------------------------------------------------------- | Returns an approximation to the square root of the 32-bit significand given | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of @@ -794,4 +663,38 @@ static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) return a0 != b0 || a1 != b1; } +/* + * Similarly, comparisons of 192-bit values. + */ + +static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t b0, uint64_t b1, uint64_t b2) +{ + return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0; +} + +static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t b0, uint64_t b1, uint64_t b2) +{ + if (a0 != b0) { + return a0 < b0; + } + if (a1 != b1) { + return a1 < b1; + } + return a2 <= b2; +} + +static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t b0, uint64_t b1, uint64_t b2) +{ + if (a0 != b0) { + return a0 < b0; + } + if (a1 != b1) { + return a1 < b1; + } + return a2 < b2; +} + #endif diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index 8a3f20fae9e..5bcbd041f74 100644 --- a/include/fpu/softfloat-types.h +++ b/include/fpu/softfloat-types.h @@ -134,8 +134,10 @@ typedef enum __attribute__((__packed__)) { float_round_up = 2, float_round_to_zero = 3, float_round_ties_away = 4, - /* Not an IEEE rounding mode: round to the closest odd mantissa value */ + /* Not an IEEE rounding mode: round to closest odd, overflow to max */ float_round_to_odd = 5, + /* Not an IEEE rounding mode: round to closest odd, overflow to inf */ + float_round_to_odd_inf = 6, } FloatRoundMode; /* @@ -152,6 +154,14 @@ enum { float_flag_output_denormal = 128 }; +/* + * Rounding precision for floatx80. + */ +typedef enum __attribute__((__packed__)) { + floatx80_precision_x, + floatx80_precision_d, + floatx80_precision_s, +} FloatX80RoundPrec; /* * Floating Point Status. Individual architectures may maintain @@ -163,7 +173,7 @@ enum { typedef struct float_status { FloatRoundMode float_rounding_mode; uint8_t float_exception_flags; - signed char floatx80_rounding_precision; + FloatX80RoundPrec floatx80_rounding_precision; bool tininess_before_rounding; /* should denormalised results go to zero and set the inexact flag? */ bool flush_to_zero; diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index 78ad5ca738b..a249991e612 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -100,7 +100,10 @@ typedef enum { | Routine to raise any or all of the software IEC/IEEE floating-point | exception flags. *----------------------------------------------------------------------------*/ -void float_raise(uint8_t flags, float_status *status); +static inline void float_raise(uint8_t flags, float_status *status) +{ + status->float_exception_flags |= flags; +} /*---------------------------------------------------------------------------- | If `a' is denormal and we are in flush-to-zero mode then set the @@ -240,6 +243,8 @@ float16 float16_minnum(float16, float16, float_status *status); float16 float16_maxnum(float16, float16, float_status *status); float16 float16_minnummag(float16, float16, float_status *status); float16 float16_maxnummag(float16, float16, float_status *status); +float16 float16_minimum_number(float16, float16, float_status *status); +float16 float16_maximum_number(float16, float16, float_status *status); float16 float16_sqrt(float16, float_status *status); FloatRelation float16_compare(float16, float16, float_status *status); FloatRelation float16_compare_quiet(float16, float16, float_status *status); @@ -419,6 +424,8 @@ bfloat16 bfloat16_minnum(bfloat16, bfloat16, float_status *status); bfloat16 bfloat16_maxnum(bfloat16, bfloat16, float_status *status); bfloat16 bfloat16_minnummag(bfloat16, bfloat16, float_status *status); bfloat16 bfloat16_maxnummag(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_minimum_number(bfloat16, bfloat16, float_status *status); +bfloat16 bfloat16_maximum_number(bfloat16, bfloat16, float_status *status); bfloat16 bfloat16_sqrt(bfloat16, float_status *status); FloatRelation bfloat16_compare(bfloat16, bfloat16, float_status *status); FloatRelation bfloat16_compare_quiet(bfloat16, bfloat16, float_status *status); @@ -586,6 +593,8 @@ float32 float32_minnum(float32, float32, float_status *status); float32 float32_maxnum(float32, float32, float_status *status); float32 float32_minnummag(float32, float32, float_status *status); float32 float32_maxnummag(float32, float32, float_status *status); +float32 float32_minimum_number(float32, float32, float_status *status); +float32 float32_maximum_number(float32, float32, float_status *status); bool float32_is_quiet_nan(float32, float_status *status); bool float32_is_signaling_nan(float32, float_status *status); float32 float32_silence_nan(float32, float_status *status); @@ -775,6 +784,8 @@ float64 float64_minnum(float64, float64, float_status *status); float64 float64_maxnum(float64, float64, float_status *status); float64 float64_minnummag(float64, float64, float_status *status); float64 float64_maxnummag(float64, float64, float_status *status); +float64 float64_minimum_number(float64, float64, float_status *status); +float64 float64_maximum_number(float64, float64, float_status *status); bool float64_is_quiet_nan(float64 a, float_status *status); bool float64_is_signaling_nan(float64, float_status *status); float64 float64_silence_nan(float64, float_status *status); @@ -1149,7 +1160,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status); | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, +floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); @@ -1162,7 +1173,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, | normalized. *----------------------------------------------------------------------------*/ -floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, +floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); @@ -1194,11 +1205,21 @@ float128 float128_round_to_int(float128, float_status *status); float128 float128_add(float128, float128, float_status *status); float128 float128_sub(float128, float128, float_status *status); float128 float128_mul(float128, float128, float_status *status); +float128 float128_muladd(float128, float128, float128, int, + float_status *status); float128 float128_div(float128, float128, float_status *status); float128 float128_rem(float128, float128, float_status *status); float128 float128_sqrt(float128, float_status *status); FloatRelation float128_compare(float128, float128, float_status *status); FloatRelation float128_compare_quiet(float128, float128, float_status *status); +float128 float128_min(float128, float128, float_status *status); +float128 float128_max(float128, float128, float_status *status); +float128 float128_minnum(float128, float128, float_status *status); +float128 float128_maxnum(float128, float128, float_status *status); +float128 float128_minnummag(float128, float128, float_status *status); +float128 float128_maxnummag(float128, float128, float_status *status); +float128 float128_minimum_number(float128, float128, float_status *status); +float128 float128_maximum_number(float128, float128, float_status *status); bool float128_is_quiet_nan(float128, float_status *status); bool float128_is_signaling_nan(float128, float_status *status); float128 float128_silence_nan(float128, float_status *status); diff --git a/include/glib-compat.h b/include/glib-compat.h index 695a96f7ea6..9e95c888f54 100644 --- a/include/glib-compat.h +++ b/include/glib-compat.h @@ -19,12 +19,12 @@ /* Ask for warnings for anything that was marked deprecated in * the defined version, or before. It is a candidate for rewrite. */ -#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_48 +#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_56 /* Ask for warnings if code tries to use function that did not * exist in the defined version. These risk breaking builds */ -#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_48 +#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_56 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -68,15 +68,6 @@ * without generating warnings. */ -#if defined(_WIN32) && !GLIB_CHECK_VERSION(2, 50, 0) -/* - * g_poll has a problem on Windows when using - * timeouts < 10ms, so use wrapper. - */ -#define g_poll(fds, nfds, timeout) g_poll_fixed(fds, nfds, timeout) -gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout); -#endif - #if defined(G_OS_UNIX) /* * Note: The fallback implementation is not MT-safe, and it returns a copy of @@ -100,6 +91,23 @@ g_unix_get_passwd_entry_qemu(const gchar *user_name, GError **error) } #endif /* G_OS_UNIX */ +static inline bool +qemu_g_test_slow(void) +{ + static int cached = -1; + if (cached == -1) { + cached = g_test_slow() || getenv("G_TEST_SLOW") != NULL; + } + return cached; +} + +#undef g_test_slow +#undef g_test_thorough +#undef g_test_quick +#define g_test_slow() qemu_g_test_slow() +#define g_test_thorough() qemu_g_test_slow() +#define g_test_quick() (!qemu_g_test_slow()) + #pragma GCC diagnostic pop #endif diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index cf9f44299ca..c97e8633ad8 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -48,31 +48,6 @@ typedef struct AcpiRsdpData { unsigned *xsdt_tbl_offset; } AcpiRsdpData; -/* Table structure from Linux kernel (the ACPI tables are under the - BSD license) */ - - -#define ACPI_TABLE_HEADER_DEF /* ACPI common table header */ \ - uint32_t signature; /* ACPI signature (4 ASCII characters) */ \ - uint32_t length; /* Length of table, in bytes, including header */ \ - uint8_t revision; /* ACPI Specification minor version # */ \ - uint8_t checksum; /* To make sum of entire table == 0 */ \ - uint8_t oem_id[6] \ - QEMU_NONSTRING; /* OEM identification */ \ - uint8_t oem_table_id[8] \ - QEMU_NONSTRING; /* OEM table identification */ \ - uint32_t oem_revision; /* OEM revision number */ \ - uint8_t asl_compiler_id[4] \ - QEMU_NONSTRING; /* ASL compiler vendor ID */ \ - uint32_t asl_compiler_revision; /* ASL compiler revision number */ - - -/* ACPI common table header */ -struct AcpiTableHeader { - ACPI_TABLE_HEADER_DEF -} QEMU_PACKED; -typedef struct AcpiTableHeader AcpiTableHeader; - struct AcpiGenericAddress { uint8_t space_id; /* Address space where struct or register exists */ uint8_t bit_width; /* Size in bits of given register */ @@ -80,7 +55,7 @@ struct AcpiGenericAddress { uint8_t access_width; /* ACPI 3.0: Minimum Access size (ACPI 3.0), ACPI 2.0: Reserved, Table 5-1 */ uint64_t address; /* 64-bit address of struct or register */ -} QEMU_PACKED; +}; typedef struct AcpiFadtData { struct AcpiGenericAddress pm1a_cnt; /* PM1a_CNT_BLK */ @@ -117,505 +92,4 @@ typedef struct AcpiFadtData { #define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) #define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) -/* - * Serial Port Console Redirection Table (SPCR), Rev. 1.02 - * - * For .interface_type see Debug Port Table 2 (DBG2) serial port - * subtypes in Table 3, Rev. May 22, 2012 - */ -struct AcpiSerialPortConsoleRedirection { - ACPI_TABLE_HEADER_DEF - uint8_t interface_type; - uint8_t reserved1[3]; - struct AcpiGenericAddress base_address; - uint8_t interrupt_types; - uint8_t irq; - uint32_t gsi; - uint8_t baud; - uint8_t parity; - uint8_t stopbits; - uint8_t flowctrl; - uint8_t term_type; - uint8_t reserved2; - uint16_t pci_device_id; - uint16_t pci_vendor_id; - uint8_t pci_bus; - uint8_t pci_slot; - uint8_t pci_func; - uint32_t pci_flags; - uint8_t pci_seg; - uint32_t reserved3; -} QEMU_PACKED; -typedef struct AcpiSerialPortConsoleRedirection - AcpiSerialPortConsoleRedirection; - -/* - * ACPI 1.0 Root System Description Table (RSDT) - */ -struct AcpiRsdtDescriptorRev1 { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t table_offset_entry[]; /* Array of pointers to other */ - /* ACPI tables */ -} QEMU_PACKED; -typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; - -/* - * ACPI 2.0 eXtended System Description Table (XSDT) - */ -struct AcpiXsdtDescriptorRev2 { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint64_t table_offset_entry[]; /* Array of pointers to other */ - /* ACPI tables */ -} QEMU_PACKED; -typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2; - -/* - * ACPI 1.0 Firmware ACPI Control Structure (FACS) - */ -struct AcpiFacsDescriptorRev1 { - uint32_t signature; /* ACPI Signature */ - uint32_t length; /* Length of structure, in bytes */ - uint32_t hardware_signature; /* Hardware configuration signature */ - uint32_t firmware_waking_vector; /* ACPI OS waking vector */ - uint32_t global_lock; /* Global Lock */ - uint32_t flags; - uint8_t resverved3 [40]; /* Reserved - must be zero */ -} QEMU_PACKED; -typedef struct AcpiFacsDescriptorRev1 AcpiFacsDescriptorRev1; - -/* - * Differentiated System Description Table (DSDT) - */ - -/* - * MADT values and structures - */ - -/* Values for MADT PCATCompat */ - -#define ACPI_DUAL_PIC 0 -#define ACPI_MULTIPLE_APIC 1 - -/* Master MADT */ - -struct AcpiMultipleApicTable { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t local_apic_address; /* Physical address of local APIC */ - uint32_t flags; -} QEMU_PACKED; -typedef struct AcpiMultipleApicTable AcpiMultipleApicTable; - -/* Values for Type in APIC sub-headers */ - -#define ACPI_APIC_PROCESSOR 0 -#define ACPI_APIC_IO 1 -#define ACPI_APIC_XRUPT_OVERRIDE 2 -#define ACPI_APIC_NMI 3 -#define ACPI_APIC_LOCAL_NMI 4 -#define ACPI_APIC_ADDRESS_OVERRIDE 5 -#define ACPI_APIC_IO_SAPIC 6 -#define ACPI_APIC_LOCAL_SAPIC 7 -#define ACPI_APIC_XRUPT_SOURCE 8 -#define ACPI_APIC_LOCAL_X2APIC 9 -#define ACPI_APIC_LOCAL_X2APIC_NMI 10 -#define ACPI_APIC_GENERIC_CPU_INTERFACE 11 -#define ACPI_APIC_GENERIC_DISTRIBUTOR 12 -#define ACPI_APIC_GENERIC_MSI_FRAME 13 -#define ACPI_APIC_GENERIC_REDISTRIBUTOR 14 -#define ACPI_APIC_GENERIC_TRANSLATOR 15 -#define ACPI_APIC_RESERVED 16 /* 16 and greater are reserved */ - -/* - * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE) - */ -#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\ - uint8_t type; \ - uint8_t length; - -/* Sub-structures for MADT */ - -struct AcpiMadtProcessorApic { - ACPI_SUB_HEADER_DEF - uint8_t processor_id; /* ACPI processor id */ - uint8_t local_apic_id; /* Processor's local APIC id */ - uint32_t flags; -} QEMU_PACKED; -typedef struct AcpiMadtProcessorApic AcpiMadtProcessorApic; - -struct AcpiMadtIoApic { - ACPI_SUB_HEADER_DEF - uint8_t io_apic_id; /* I/O APIC ID */ - uint8_t reserved; /* Reserved - must be zero */ - uint32_t address; /* APIC physical address */ - uint32_t interrupt; /* Global system interrupt where INTI - * lines start */ -} QEMU_PACKED; -typedef struct AcpiMadtIoApic AcpiMadtIoApic; - -struct AcpiMadtIntsrcovr { - ACPI_SUB_HEADER_DEF - uint8_t bus; - uint8_t source; - uint32_t gsi; - uint16_t flags; -} QEMU_PACKED; -typedef struct AcpiMadtIntsrcovr AcpiMadtIntsrcovr; - -struct AcpiMadtLocalNmi { - ACPI_SUB_HEADER_DEF - uint8_t processor_id; /* ACPI processor id */ - uint16_t flags; /* MPS INTI flags */ - uint8_t lint; /* Local APIC LINT# */ -} QEMU_PACKED; -typedef struct AcpiMadtLocalNmi AcpiMadtLocalNmi; - -struct AcpiMadtProcessorX2Apic { - ACPI_SUB_HEADER_DEF - uint16_t reserved; - uint32_t x2apic_id; /* Processor's local x2APIC ID */ - uint32_t flags; - uint32_t uid; /* Processor object _UID */ -} QEMU_PACKED; -typedef struct AcpiMadtProcessorX2Apic AcpiMadtProcessorX2Apic; - -struct AcpiMadtLocalX2ApicNmi { - ACPI_SUB_HEADER_DEF - uint16_t flags; /* MPS INTI flags */ - uint32_t uid; /* Processor object _UID */ - uint8_t lint; /* Local APIC LINT# */ - uint8_t reserved[3]; /* Local APIC LINT# */ -} QEMU_PACKED; -typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi; - -struct AcpiMadtGenericCpuInterface { - ACPI_SUB_HEADER_DEF - uint16_t reserved; - uint32_t cpu_interface_number; - uint32_t uid; - uint32_t flags; - uint32_t parking_version; - uint32_t performance_interrupt; - uint64_t parked_address; - uint64_t base_address; - uint64_t gicv_base_address; - uint64_t gich_base_address; - uint32_t vgic_interrupt; - uint64_t gicr_base_address; - uint64_t arm_mpidr; -} QEMU_PACKED; - -typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface; - -/* GICC CPU Interface Flags */ -#define ACPI_MADT_GICC_ENABLED 1 - -struct AcpiMadtGenericDistributor { - ACPI_SUB_HEADER_DEF - uint16_t reserved; - uint32_t gic_id; - uint64_t base_address; - uint32_t global_irq_base; - /* ACPI 5.1 Errata 1228 Present GIC version in MADT table */ - uint8_t version; - uint8_t reserved2[3]; -} QEMU_PACKED; - -typedef struct AcpiMadtGenericDistributor AcpiMadtGenericDistributor; - -struct AcpiMadtGenericMsiFrame { - ACPI_SUB_HEADER_DEF - uint16_t reserved; - uint32_t gic_msi_frame_id; - uint64_t base_address; - uint32_t flags; - uint16_t spi_count; - uint16_t spi_base; -} QEMU_PACKED; - -typedef struct AcpiMadtGenericMsiFrame AcpiMadtGenericMsiFrame; - -struct AcpiMadtGenericRedistributor { - ACPI_SUB_HEADER_DEF - uint16_t reserved; - uint64_t base_address; - uint32_t range_length; -} QEMU_PACKED; - -typedef struct AcpiMadtGenericRedistributor AcpiMadtGenericRedistributor; - -struct AcpiMadtGenericTranslator { - ACPI_SUB_HEADER_DEF - uint16_t reserved; - uint32_t translation_id; - uint64_t base_address; - uint32_t reserved2; -} QEMU_PACKED; - -typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator; - -/* - * Generic Timer Description Table (GTDT) - */ -#define ACPI_GTDT_INTERRUPT_MODE_LEVEL (0 << 0) -#define ACPI_GTDT_INTERRUPT_MODE_EDGE (1 << 0) -#define ACPI_GTDT_CAP_ALWAYS_ON (1 << 2) - -struct AcpiGenericTimerTable { - ACPI_TABLE_HEADER_DEF - uint64_t counter_block_addresss; - uint32_t reserved; - uint32_t secure_el1_interrupt; - uint32_t secure_el1_flags; - uint32_t non_secure_el1_interrupt; - uint32_t non_secure_el1_flags; - uint32_t virtual_timer_interrupt; - uint32_t virtual_timer_flags; - uint32_t non_secure_el2_interrupt; - uint32_t non_secure_el2_flags; - uint64_t counter_read_block_address; - uint32_t platform_timer_count; - uint32_t platform_timer_offset; -} QEMU_PACKED; -typedef struct AcpiGenericTimerTable AcpiGenericTimerTable; - -/* - * HPET Description Table - */ -struct Acpi20Hpet { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t timer_block_id; - struct AcpiGenericAddress addr; - uint8_t hpet_number; - uint16_t min_tick; - uint8_t page_protect; -} QEMU_PACKED; -typedef struct Acpi20Hpet Acpi20Hpet; - -/* - * SRAT (NUMA topology description) table - */ - -struct AcpiSystemResourceAffinityTable { - ACPI_TABLE_HEADER_DEF - uint32_t reserved1; - uint32_t reserved2[2]; -} QEMU_PACKED; -typedef struct AcpiSystemResourceAffinityTable AcpiSystemResourceAffinityTable; - -#define ACPI_SRAT_PROCESSOR_APIC 0 -#define ACPI_SRAT_MEMORY 1 -#define ACPI_SRAT_PROCESSOR_x2APIC 2 -#define ACPI_SRAT_PROCESSOR_GICC 3 - -struct AcpiSratProcessorAffinity { - ACPI_SUB_HEADER_DEF - uint8_t proximity_lo; - uint8_t local_apic_id; - uint32_t flags; - uint8_t local_sapic_eid; - uint8_t proximity_hi[3]; - uint32_t reserved; -} QEMU_PACKED; -typedef struct AcpiSratProcessorAffinity AcpiSratProcessorAffinity; - -struct AcpiSratProcessorX2ApicAffinity { - ACPI_SUB_HEADER_DEF - uint16_t reserved; - uint32_t proximity_domain; - uint32_t x2apic_id; - uint32_t flags; - uint32_t clk_domain; - uint32_t reserved2; -} QEMU_PACKED; -typedef struct AcpiSratProcessorX2ApicAffinity AcpiSratProcessorX2ApicAffinity; - -struct AcpiSratMemoryAffinity { - ACPI_SUB_HEADER_DEF - uint32_t proximity; - uint16_t reserved1; - uint64_t base_addr; - uint64_t range_length; - uint32_t reserved2; - uint32_t flags; - uint32_t reserved3[2]; -} QEMU_PACKED; -typedef struct AcpiSratMemoryAffinity AcpiSratMemoryAffinity; - -struct AcpiSratProcessorGiccAffinity { - ACPI_SUB_HEADER_DEF - uint32_t proximity; - uint32_t acpi_processor_uid; - uint32_t flags; - uint32_t clock_domain; -} QEMU_PACKED; - -typedef struct AcpiSratProcessorGiccAffinity AcpiSratProcessorGiccAffinity; - -/* - * TCPA Description Table - * - * Following Level 00, Rev 00.37 of specs: - * http://www.trustedcomputinggroup.org/resources/tcg_acpi_specification - */ -struct Acpi20Tcpa { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint16_t platform_class; - uint32_t log_area_minimum_length; - uint64_t log_area_start_address; -} QEMU_PACKED; -typedef struct Acpi20Tcpa Acpi20Tcpa; - -/* DMAR - DMA Remapping table r2.2 */ -struct AcpiTableDmar { - ACPI_TABLE_HEADER_DEF - uint8_t host_address_width; /* Maximum DMA physical addressability */ - uint8_t flags; - uint8_t reserved[10]; -} QEMU_PACKED; -typedef struct AcpiTableDmar AcpiTableDmar; - -/* Masks for Flags field above */ -#define ACPI_DMAR_INTR_REMAP 1 -#define ACPI_DMAR_X2APIC_OPT_OUT (1 << 1) - -/* Values for sub-structure type for DMAR */ -enum { - ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, /* DRHD */ - ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */ - ACPI_DMAR_TYPE_ATSR = 2, /* ATSR */ - ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, /* RHSR */ - ACPI_DMAR_TYPE_ANDD = 4, /* ANDD */ - ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */ -}; - -/* - * Sub-structures for DMAR - */ - -/* Device scope structure for DRHD. */ -struct AcpiDmarDeviceScope { - uint8_t entry_type; - uint8_t length; - uint16_t reserved; - uint8_t enumeration_id; - uint8_t bus; - struct { - uint8_t device; - uint8_t function; - } path[]; -} QEMU_PACKED; -typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; - -/* Type 0: Hardware Unit Definition */ -struct AcpiDmarHardwareUnit { - uint16_t type; - uint16_t length; - uint8_t flags; - uint8_t reserved; - uint16_t pci_segment; /* The PCI Segment associated with this unit */ - uint64_t address; /* Base address of remapping hardware register-set */ - AcpiDmarDeviceScope scope[]; -} QEMU_PACKED; -typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit; - -/* Type 2: Root Port ATS Capability Reporting Structure */ -struct AcpiDmarRootPortATS { - uint16_t type; - uint16_t length; - uint8_t flags; - uint8_t reserved; - uint16_t pci_segment; - AcpiDmarDeviceScope scope[]; -} QEMU_PACKED; -typedef struct AcpiDmarRootPortATS AcpiDmarRootPortATS; - -/* Masks for Flags field above */ -#define ACPI_DMAR_INCLUDE_PCI_ALL 1 -#define ACPI_DMAR_ATSR_ALL_PORTS 1 - -/* - * Input Output Remapping Table (IORT) - * Conforms to "IO Remapping Table System Software on ARM Platforms", - * Document number: ARM DEN 0049B, October 2015 - */ - -struct AcpiIortTable { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t node_count; - uint32_t node_offset; - uint32_t reserved; -} QEMU_PACKED; -typedef struct AcpiIortTable AcpiIortTable; - -/* - * IORT node types - */ - -#define ACPI_IORT_NODE_HEADER_DEF /* Node format common fields */ \ - uint8_t type; \ - uint16_t length; \ - uint8_t revision; \ - uint32_t reserved; \ - uint32_t mapping_count; \ - uint32_t mapping_offset; - -/* Values for node Type above */ -enum { - ACPI_IORT_NODE_ITS_GROUP = 0x00, - ACPI_IORT_NODE_NAMED_COMPONENT = 0x01, - ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02, - ACPI_IORT_NODE_SMMU = 0x03, - ACPI_IORT_NODE_SMMU_V3 = 0x04 -}; - -struct AcpiIortIdMapping { - uint32_t input_base; - uint32_t id_count; - uint32_t output_base; - uint32_t output_reference; - uint32_t flags; -} QEMU_PACKED; -typedef struct AcpiIortIdMapping AcpiIortIdMapping; - -struct AcpiIortMemoryAccess { - uint32_t cache_coherency; - uint8_t hints; - uint16_t reserved; - uint8_t memory_flags; -} QEMU_PACKED; -typedef struct AcpiIortMemoryAccess AcpiIortMemoryAccess; - -struct AcpiIortItsGroup { - ACPI_IORT_NODE_HEADER_DEF - uint32_t its_count; - uint32_t identifiers[]; -} QEMU_PACKED; -typedef struct AcpiIortItsGroup AcpiIortItsGroup; - -#define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE 1 - -struct AcpiIortSmmu3 { - ACPI_IORT_NODE_HEADER_DEF - uint64_t base_address; - uint32_t flags; - uint32_t reserved2; - uint64_t vatos_address; - uint32_t model; - uint32_t event_gsiv; - uint32_t pri_gsiv; - uint32_t gerr_gsiv; - uint32_t sync_gsiv; - AcpiIortIdMapping id_mapping_array[]; -} QEMU_PACKED; -typedef struct AcpiIortSmmu3 AcpiIortSmmu3; - -struct AcpiIortRC { - ACPI_IORT_NODE_HEADER_DEF - AcpiIortMemoryAccess memory_properties; - uint32_t ats_attribute; - uint32_t pci_segment_number; - AcpiIortIdMapping id_mapping_array[]; -} QEMU_PACKED; -typedef struct AcpiIortRC AcpiIortRC; - #endif diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h index 9e8a76f2e2a..cc0d3707458 100644 --- a/include/hw/acpi/acpi.h +++ b/include/hw/acpi/acpi.h @@ -47,6 +47,8 @@ #define ACPI_PM_PROP_PM_IO_BASE "pm_io_base" #define ACPI_PM_PROP_GPE0_BLK "gpe0_blk" #define ACPI_PM_PROP_GPE0_BLK_LEN "gpe0_blk_len" +#define ACPI_PM_PROP_ACPI_PCIHP_BRIDGE "acpi-pci-hotplug-with-bridge-support" +#define ACPI_PM_PROP_ACPI_PCI_ROOTHP "acpi-root-pci-hotplug" /* PM Timer ticks per second (HZ) */ #define PM_TIMER_FREQUENCY 3579545 diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index 769ff55c7ee..ea6056ab926 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -53,6 +53,7 @@ struct AcpiDeviceIfClass { void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); void (*madt_cpu)(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *apic_ids, GArray *entry); + const CPUArchIdList *apic_ids, GArray *entry, + bool force_enabled); }; #endif diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 471266d7391..8346003a224 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -413,10 +413,37 @@ Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target); Aml *aml_object_type(Aml *object); void build_append_int_noprefix(GArray *table, uint64_t value, int size); -void -build_header(BIOSLinker *linker, GArray *table_data, - AcpiTableHeader *h, const char *sig, int len, uint8_t rev, - const char *oem_id, const char *oem_table_id); + +typedef struct AcpiTable { + const char *sig; + const uint8_t rev; + const char *oem_id; + const char *oem_table_id; + /* private vars tracking table state */ + GArray *array; + unsigned table_offset; +} AcpiTable; + +/** + * acpi_table_begin: + * initializes table header and keeps track of + * table data/offsets + * @desc: ACPI table descriptor + * @array: blob where the ACPI table will be composed/stored. + */ +void acpi_table_begin(AcpiTable *desc, GArray *array); + +/** + * acpi_table_end: + * sets actual table length and tells bios loader + * where table is for the later initialization on + * guest side. + * @linker: reference to BIOSLinker object to use for the table + * @table: ACPI table descriptor that was used with @acpi_table_begin + * counterpart + */ +void acpi_table_end(BIOSLinker *linker, AcpiTable *table); + void *acpi_data_push(GArray *table_data, unsigned size); unsigned acpi_data_len(GArray *table); void acpi_add_table(GArray *table_offsets, GArray *table_data); @@ -456,12 +483,15 @@ Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set, uint32_t io_offset, uint32_t mmio32_offset, uint64_t mmio64_offset, uint16_t bus_nr_offset); -void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, +void build_srat_memory(GArray *table_data, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, const char *oem_id, const char *oem_table_id); +void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id); + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, const char *oem_id, const char *oem_table_id); diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h index 6bed92e8fc5..d49217c445f 100644 --- a/include/hw/acpi/generic_event_device.h +++ b/include/hw/acpi/generic_event_device.h @@ -70,8 +70,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) #define TYPE_ACPI_GED_X86 "acpi-ged-x86" -#define ACPI_GED_X86(obj) \ - OBJECT_CHECK(AcpiGedX86State, (obj), TYPE_ACPI_GED_X86) #define ACPI_GED_EVT_SEL_OFFSET 0x0 #define ACPI_GED_EVT_SEL_LEN 0x4 diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 2ae8bc1ded3..674f6958e90 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -64,6 +64,7 @@ enum { typedef struct AcpiGhesState { uint64_t ghes_addr_le; + bool present; /* True if GHES is present at all on this board */ } AcpiGhesState; void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker); @@ -72,4 +73,12 @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker, void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr); + +/** + * acpi_ghes_present: Report whether ACPI GHES table is present + * + * Returns: true if the system has an ACPI GHES table and it is + * safe to call acpi_ghes_record_errors() to record a memory error. + */ +bool acpi_ghes_present(void); #endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index df519e40b57..7ca92843c6b 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -24,10 +24,13 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/acpi/cpu.h" +#include "hw/acpi/pcihp.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/acpi_dev_interface.h" #include "hw/acpi/tco.h" +#define ACPI_PCIHP_ADDR_ICH9 0x0cc0 + typedef struct ICH9LPCPMRegs { /* * In ich9 spec says that pm1_cnt register is 32bit width and @@ -53,6 +56,9 @@ typedef struct ICH9LPCPMRegs { AcpiCpuHotplug gpe_cpu; CPUHotplugState cpuhp_state; + bool keep_pci_slot_hpc; + bool use_acpi_hotplug_bridge; + AcpiPciHpState acpi_pci_hotplug; MemHotplugState acpi_memory_hotplug; uint8_t disable_s3; diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index 2dd90aea309..af1a169fc32 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -55,7 +55,8 @@ typedef struct AcpiPciHpState { } AcpiPciHpState; void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, - MemoryRegion *address_space_io, bool bridges_enabled); + MemoryRegion *address_space_io, bool bridges_enabled, + uint16_t io_base); void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); diff --git a/include/hw/acpi/tpm.h b/include/hw/acpi/tpm.h index 1a2a57a21f0..559ba6906c8 100644 --- a/include/hw/acpi/tpm.h +++ b/include/hw/acpi/tpm.h @@ -21,6 +21,8 @@ #include "hw/acpi/aml-build.h" #include "sysemu/tpm.h" +#ifdef CONFIG_TPM + #define TPM_TIS_ADDR_BASE 0xFED40000 #define TPM_TIS_ADDR_SIZE 0x5000 @@ -209,4 +211,6 @@ REG32(CRB_DATA_BUFFER, 0x80) void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev); +#endif /* CONFIG_TPM */ + #endif /* HW_ACPI_TPM_H */ diff --git a/include/hw/adc/aspeed_adc.h b/include/hw/adc/aspeed_adc.h new file mode 100644 index 00000000000..2f166e8be11 --- /dev/null +++ b/include/hw/adc/aspeed_adc.h @@ -0,0 +1,55 @@ +/* + * Aspeed ADC + * + * Copyright 2017-2021 IBM Corp. + * + * Andrew Jeffery + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_ADC_ASPEED_ADC_H +#define HW_ADC_ASPEED_ADC_H + +#include "hw/sysbus.h" + +#define TYPE_ASPEED_ADC "aspeed.adc" +#define TYPE_ASPEED_2400_ADC TYPE_ASPEED_ADC "-ast2400" +#define TYPE_ASPEED_2500_ADC TYPE_ASPEED_ADC "-ast2500" +#define TYPE_ASPEED_2600_ADC TYPE_ASPEED_ADC "-ast2600" +OBJECT_DECLARE_TYPE(AspeedADCState, AspeedADCClass, ASPEED_ADC) + +#define TYPE_ASPEED_ADC_ENGINE "aspeed.adc.engine" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedADCEngineState, ASPEED_ADC_ENGINE) + +#define ASPEED_ADC_NR_CHANNELS 16 +#define ASPEED_ADC_NR_REGS (0xD0 >> 2) + +struct AspeedADCEngineState { + /* */ + SysBusDevice parent; + + MemoryRegion mmio; + qemu_irq irq; + uint32_t engine_id; + uint32_t nr_channels; + uint32_t regs[ASPEED_ADC_NR_REGS]; +}; + +struct AspeedADCState { + /* */ + SysBusDevice parent; + + MemoryRegion mmio; + qemu_irq irq; + + AspeedADCEngineState engines[2]; +}; + +struct AspeedADCClass { + SysBusDeviceClass parent_class; + + uint32_t nr_engines; +}; + +#endif /* HW_ADC_ASPEED_ADC_H */ diff --git a/include/hw/misc/max111x.h b/include/hw/adc/max111x.h similarity index 100% rename from include/hw/misc/max111x.h rename to include/hw/adc/max111x.h diff --git a/include/hw/misc/zynq-xadc.h b/include/hw/adc/zynq-xadc.h similarity index 100% rename from include/hw/misc/zynq-xadc.h rename to include/hw/adc/zynq-xadc.h diff --git a/include/hw/arm/allwinner-h3.h b/include/hw/arm/allwinner-h3.h index cc308a5d2c9..63025fb27c8 100644 --- a/include/hw/arm/allwinner-h3.h +++ b/include/hw/arm/allwinner-h3.h @@ -18,7 +18,7 @@ */ /* - * The Allwinner H3 is a System on Chip containing four ARM Cortex A7 + * The Allwinner H3 is a System on Chip containing four ARM Cortex-A7 * processor cores. Features and specifications include DDR2/DDR3 memory, * SD/MMC storage cards, 10/100/1000Mbit Ethernet, USB 2.0, HDMI and * various I/O modules. diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h index 36592be62c5..9648e7a4193 100644 --- a/include/hw/arm/armsse.h +++ b/include/hw/arm/armsse.h @@ -198,6 +198,8 @@ struct ARMSSE { MemoryRegion alias2; MemoryRegion alias3[SSE_MAX_CPUS]; MemoryRegion sram[MAX_SRAM_BANKS]; + MemoryRegion itcm; + MemoryRegion dtcm; qemu_irq *exp_irqs[SSE_MAX_CPUS]; qemu_irq ppc0_irq; diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h index 189b23a8ceb..b7ba0ff409c 100644 --- a/include/hw/arm/armv7m.h +++ b/include/hw/arm/armv7m.h @@ -12,8 +12,10 @@ #include "hw/sysbus.h" #include "hw/intc/armv7m_nvic.h" +#include "hw/misc/armv7m_ras.h" #include "target/arm/idau.h" #include "qom/object.h" +#include "hw/clock.h" #define TYPE_BITBAND "ARM-bitband-memory" OBJECT_DECLARE_SIMPLE_TYPE(BitBandState, BITBAND) @@ -46,9 +48,12 @@ OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MState, ARMV7M) * devices will be automatically layered on top of this view.) * + Property "idau": IDAU interface (forwarded to CPU object) * + Property "init-svtor": secure VTOR reset value (forwarded to CPU object) + * + Property "init-nsvtor": non-secure VTOR reset value (forwarded to CPU object) * + Property "vfp": enable VFP (forwarded to CPU object) * + Property "dsp": enable DSP (forwarded to CPU object) * + Property "enable-bitband": expose bitbanded IO + * + Clock input "refclk" is the external reference clock for the systick timers + * + Clock input "cpuclk" is the main CPU clock */ struct ARMv7MState { /*< private >*/ @@ -57,11 +62,31 @@ struct ARMv7MState { NVICState nvic; BitBandState bitband[ARMV7M_NUM_BITBANDS]; ARMCPU *cpu; + ARMv7MRAS ras; + SysTickState systick[M_REG_NUM_BANKS]; /* MemoryRegion we pass to the CPU, with our devices layered on * top of the ones the board provides in board_memory. */ MemoryRegion container; + /* + * MemoryRegion which passes the transaction to either the S or the + * NS systick device depending on the transaction attributes + */ + MemoryRegion systickmem; + /* + * MemoryRegion which enforces the S/NS handling of the systick + * device NS alias region and passes the transaction to the + * NS systick device if appropriate. + */ + MemoryRegion systick_ns_mem; + /* Ditto, for the sysregs region provided by the NVIC */ + MemoryRegion sysreg_ns_mem; + /* MR providing default PPB behaviour */ + MemoryRegion defaultmem; + + Clock *refclk; + Clock *cpuclk; /* Properties */ char *cpu_type; @@ -69,6 +94,7 @@ struct ARMv7MState { MemoryRegion *board_memory; Object *idau; uint32_t init_svtor; + uint32_t init_nsvtor; bool enable_bitband; bool start_powered_off; bool vfp; diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h index c9747b15fc5..cbeacb214ca 100644 --- a/include/hw/arm/aspeed.h +++ b/include/hw/arm/aspeed.h @@ -38,6 +38,7 @@ struct AspeedMachineClass { uint32_t num_cs; uint32_t macs_mask; void (*i2c_init)(AspeedMachineState *bmc); + uint32_t uart_default; }; diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h index 9359d6da336..8139358549d 100644 --- a/include/hw/arm/aspeed_soc.h +++ b/include/hw/arm/aspeed_soc.h @@ -15,12 +15,14 @@ #include "hw/cpu/a15mpcore.h" #include "hw/intc/aspeed_vic.h" #include "hw/misc/aspeed_scu.h" +#include "hw/adc/aspeed_adc.h" #include "hw/misc/aspeed_sdmc.h" #include "hw/misc/aspeed_xdma.h" #include "hw/timer/aspeed_timer.h" #include "hw/rtc/aspeed_rtc.h" #include "hw/i2c/aspeed_i2c.h" #include "hw/ssi/aspeed_smc.h" +#include "hw/misc/aspeed_hace.h" #include "hw/watchdog/wdt_aspeed.h" #include "hw/net/ftgmac100.h" #include "target/arm/cpu.h" @@ -50,7 +52,9 @@ struct AspeedSoCState { AspeedTimerCtrlState timerctrl; AspeedI2CState i2c; AspeedSCUState scu; + AspeedHACEState hace; AspeedXDMAState xdma; + AspeedADCState adc; AspeedSMCState fmc; AspeedSMCState spi[ASPEED_SPIS_NUM]; EHCISysBusState ehci[ASPEED_EHCIS_NUM]; @@ -63,6 +67,7 @@ struct AspeedSoCState { AspeedSDHCIState sdhci; AspeedSDHCIState emmc; AspeedLPCState lpc; + uint32_t uart_default; }; #define TYPE_ASPEED_SOC "aspeed-soc" @@ -133,6 +138,7 @@ enum { ASPEED_DEV_XDMA, ASPEED_DEV_EMMC, ASPEED_DEV_KCS, + ASPEED_DEV_HACE, }; #endif /* ASPEED_SOC_H */ diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index 479e2346e80..d864879421a 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -24,6 +24,7 @@ #include "hw/misc/bcm2835_mphi.h" #include "hw/misc/bcm2835_thermal.h" #include "hw/misc/bcm2835_cprman.h" +#include "hw/misc/bcm2835_powermgt.h" #include "hw/sd/sdhci.h" #include "hw/sd/bcm2835_sdhost.h" #include "hw/gpio/bcm2835_gpio.h" @@ -48,7 +49,7 @@ struct BCM2835PeripheralState { BCM2835MphiState mphi; UnimplementedDeviceState txp; UnimplementedDeviceState armtmr; - UnimplementedDeviceState powermgt; + BCM2835PowerMgtState powermgt; BCM2835CprmanState cprman; PL011State uart0; BCM2835AuxState aux; diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h index f5d527a4906..1c5fa6fd676 100644 --- a/include/hw/arm/fsl-imx7.h +++ b/include/hw/arm/fsl-imx7.h @@ -174,6 +174,11 @@ enum FslIMX7MemoryMap { FSL_IMX7_UART6_ADDR = 0x30A80000, FSL_IMX7_UART7_ADDR = 0x30A90000, + FSL_IMX7_SAI1_ADDR = 0x308A0000, + FSL_IMX7_SAI2_ADDR = 0x308B0000, + FSL_IMX7_SAI3_ADDR = 0x308C0000, + FSL_IMX7_SAIn_SIZE = 0x10000, + FSL_IMX7_ENET1_ADDR = 0x30BE0000, FSL_IMX7_ENET2_ADDR = 0x30BF0000, diff --git a/include/hw/arm/msf2-soc.h b/include/hw/arm/msf2-soc.h index d4061846855..ce417a6266a 100644 --- a/include/hw/arm/msf2-soc.h +++ b/include/hw/arm/msf2-soc.h @@ -30,6 +30,7 @@ #include "hw/misc/msf2-sysreg.h" #include "hw/ssi/mss-spi.h" #include "hw/net/msf2-emac.h" +#include "hw/clock.h" #include "qom/object.h" #define TYPE_MSF2_SOC "msf2-soc" @@ -57,7 +58,8 @@ struct MSF2State { uint64_t envm_size; uint64_t esram_size; - uint32_t m3clk; + Clock *m3clk; + Clock *refclk; uint8_t apb0div; uint8_t apb1div; @@ -65,6 +67,10 @@ struct MSF2State { MSSTimerState timer; MSSSpiState spi[MSF2_NUM_SPIS]; MSF2EmacState emac; + + MemoryRegion nvm; + MemoryRegion nvm_alias; + MemoryRegion sram; }; #endif diff --git a/include/hw/arm/npcm7xx.h b/include/hw/arm/npcm7xx.h index 61ecc57ab90..ce593235d94 100644 --- a/include/hw/arm/npcm7xx.h +++ b/include/hw/arm/npcm7xx.h @@ -35,6 +35,7 @@ #include "hw/usb/hcd-ehci.h" #include "hw/usb/hcd-ohci.h" #include "target/arm/cpu.h" +#include "hw/sd/npcm7xx_sdhci.h" #define NPCM7XX_MAX_NUM_CPUS (2) @@ -103,6 +104,7 @@ typedef struct NPCM7xxState { OHCISysBusState ohci; NPCM7xxFIUState fiu[2]; NPCM7xxEMCState emc[2]; + NPCM7xxSDHCIState mmc; } NPCM7xxState; #define TYPE_NPCM7XX "npcm7xx" diff --git a/include/hw/arm/nrf51_soc.h b/include/hw/arm/nrf51_soc.h index f8a6725b775..e52a56e75e0 100644 --- a/include/hw/arm/nrf51_soc.h +++ b/include/hw/arm/nrf51_soc.h @@ -17,6 +17,7 @@ #include "hw/gpio/nrf51_gpio.h" #include "hw/nvram/nrf51_nvm.h" #include "hw/timer/nrf51_timer.h" +#include "hw/clock.h" #include "qom/object.h" #define TYPE_NRF51_SOC "nrf51-soc" @@ -50,6 +51,7 @@ struct NRF51State { MemoryRegion container; + Clock *sysclk; }; #endif diff --git a/include/hw/arm/stm32f100_soc.h b/include/hw/arm/stm32f100_soc.h new file mode 100644 index 00000000000..40cd415b284 --- /dev/null +++ b/include/hw/arm/stm32f100_soc.h @@ -0,0 +1,65 @@ +/* + * STM32F100 SoC + * + * Copyright (c) 2021 Alexandre Iooss + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_ARM_STM32F100_SOC_H +#define HW_ARM_STM32F100_SOC_H + +#include "hw/char/stm32f2xx_usart.h" +#include "hw/ssi/stm32f2xx_spi.h" +#include "hw/arm/armv7m.h" +#include "qom/object.h" +#include "hw/clock.h" + +#define TYPE_STM32F100_SOC "stm32f100-soc" +OBJECT_DECLARE_SIMPLE_TYPE(STM32F100State, STM32F100_SOC) + +#define STM_NUM_USARTS 3 +#define STM_NUM_SPIS 2 + +#define FLASH_BASE_ADDRESS 0x08000000 +#define FLASH_SIZE (128 * 1024) +#define SRAM_BASE_ADDRESS 0x20000000 +#define SRAM_SIZE (8 * 1024) + +struct STM32F100State { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + char *cpu_type; + + ARMv7MState armv7m; + + STM32F2XXUsartState usart[STM_NUM_USARTS]; + STM32F2XXSPIState spi[STM_NUM_SPIS]; + + MemoryRegion sram; + MemoryRegion flash; + MemoryRegion flash_alias; + + Clock *sysclk; + Clock *refclk; +}; + +#endif diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h index 985ff63aa9e..849d3ed8891 100644 --- a/include/hw/arm/stm32f205_soc.h +++ b/include/hw/arm/stm32f205_soc.h @@ -32,6 +32,7 @@ #include "hw/or-irq.h" #include "hw/ssi/stm32f2xx_spi.h" #include "hw/arm/armv7m.h" +#include "hw/clock.h" #include "qom/object.h" #define TYPE_STM32F205_SOC "stm32f205-soc" @@ -63,6 +64,13 @@ struct STM32F205State { STM32F2XXSPIState spi[STM_NUM_SPIS]; qemu_or_irq *adc_irqs; + + MemoryRegion sram; + MemoryRegion flash; + MemoryRegion flash_alias; + + Clock *sysclk; + Clock *refclk; }; #endif diff --git a/include/hw/arm/stm32f405_soc.h b/include/hw/arm/stm32f405_soc.h index 347105e709b..5bb0c8d5697 100644 --- a/include/hw/arm/stm32f405_soc.h +++ b/include/hw/arm/stm32f405_soc.h @@ -68,6 +68,9 @@ struct STM32F405State { MemoryRegion sram; MemoryRegion flash; MemoryRegion flash_alias; + + Clock *sysclk; + Clock *refclk; }; #endif diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 921416f918b..dc6b66ffc8f 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -120,15 +120,18 @@ struct VirtMachineClass { MachineClass parent; bool disallow_affinity_adjustment; bool no_its; + bool no_tcg_its; bool no_pmu; bool claim_edge_triggered_timers; bool smbios_old_sys_ver; bool no_highmem_ecam; - bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ + bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */ bool kvm_no_adjvtime; bool no_kvm_steal_time; bool acpi_expose_flash; bool no_secure_gpio; + /* Machines < 6.2 have no support for describing cpu topology to guest */ + bool no_cpu_topology; }; struct VirtMachineState { @@ -141,12 +144,14 @@ struct VirtMachineState { bool highmem; bool highmem_ecam; bool its; + bool tcg_its; bool virt; bool ras; bool mte; OnOffAuto acpi; VirtGICType gic_version; VirtIOMMUType iommu; + bool default_bus_bypass_iommu; VirtMSIControllerType msi_controller; uint16_t virtio_iommu_bdf; struct arm_boot_info bootinfo; diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h index 22a8fa5d11b..895ba12c61e 100644 --- a/include/hw/arm/xlnx-versal.h +++ b/include/hw/arm/xlnx-versal.h @@ -24,6 +24,8 @@ #include "qom/object.h" #include "hw/usb/xlnx-usb-subsystem.h" #include "hw/misc/xlnx-versal-xramc.h" +#include "hw/nvram/xlnx-bbram.h" +#include "hw/nvram/xlnx-versal-efuse.h" #define TYPE_XLNX_VERSAL "xlnx-versal" OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL) @@ -79,6 +81,10 @@ struct Versal { } iou; XlnxZynqMPRTC rtc; + XlnxBBRam bbram; + XlnxEFuse efuse; + XlnxVersalEFuseCtrl efuse_ctrl; + XlnxVersalEFuseCache efuse_cache; } pmc; struct { @@ -105,8 +111,10 @@ struct Versal { #define VERSAL_GEM1_WAKE_IRQ_0 59 #define VERSAL_ADMA_IRQ_0 60 #define VERSAL_XRAM_IRQ_0 79 +#define VERSAL_BBRAM_APB_IRQ_0 121 #define VERSAL_RTC_APB_ERR_IRQ 121 #define VERSAL_SD0_IRQ_0 126 +#define VERSAL_EFUSE_IRQ 139 #define VERSAL_RTC_ALARM_IRQ 142 #define VERSAL_RTC_SECONDS_IRQ 143 @@ -167,9 +175,18 @@ struct Versal { #define MM_IOU_SCNTRS_SIZE 0x10000 #define MM_FPD_CRF 0xfd1a0000U #define MM_FPD_CRF_SIZE 0x140000 +#define MM_FPD_FPD_APU 0xfd5c0000 +#define MM_FPD_FPD_APU_SIZE 0x100 #define MM_PMC_SD0 0xf1040000U #define MM_PMC_SD0_SIZE 0x10000 +#define MM_PMC_BBRAM_CTRL 0xf11f0000 +#define MM_PMC_BBRAM_CTRL_SIZE 0x00050 +#define MM_PMC_EFUSE_CTRL 0xf1240000 +#define MM_PMC_EFUSE_CTRL_SIZE 0x00104 +#define MM_PMC_EFUSE_CACHE 0xf1250000 +#define MM_PMC_EFUSE_CACHE_SIZE 0x00C00 + #define MM_PMC_CRP 0xf1260000U #define MM_PMC_CRP_SIZE 0x10000 #define MM_PMC_RTC 0xf12a0000 diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h index d3e2ef97f67..062e637fe49 100644 --- a/include/hw/arm/xlnx-zynqmp.h +++ b/include/hw/arm/xlnx-zynqmp.h @@ -36,6 +36,8 @@ #include "qom/object.h" #include "net/can_emu.h" #include "hw/dma/xlnx_csu_dma.h" +#include "hw/nvram/xlnx-bbram.h" +#include "hw/nvram/xlnx-zynqmp-efuse.h" #define TYPE_XLNX_ZYNQMP "xlnx-zynqmp" OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP) @@ -79,6 +81,11 @@ OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP) #define XLNX_ZYNQMP_MAX_RAM_SIZE (XLNX_ZYNQMP_MAX_LOW_RAM_SIZE + \ XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE) +/* + * Unimplemented mmio regions needed to boot some images. + */ +#define XLNX_ZYNQMP_NUM_UNIMP_AREAS 1 + struct XlnxZynqMPState { /*< private >*/ DeviceState parent_obj; @@ -95,6 +102,11 @@ struct XlnxZynqMPState { MemoryRegion *ddr_ram; MemoryRegion ddr_ram_low, ddr_ram_high; + XlnxBBRam bbram; + XlnxEFuse efuse; + XlnxZynqMPEFuse efuse_ctrl; + + MemoryRegion mr_unimp[XLNX_ZYNQMP_NUM_UNIMP_AREAS]; CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS]; CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS]; diff --git a/include/hw/block/block.h b/include/hw/block/block.h index c172cbe65f1..5902c0440a5 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -19,6 +19,7 @@ typedef struct BlockConf { BlockBackend *blk; + OnOffAuto backend_defaults; uint32_t physical_block_size; uint32_t logical_block_size; uint32_t min_io_size; @@ -48,6 +49,8 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) } #define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \ + DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \ + _conf.backend_defaults, ON_OFF_AUTO_AUTO), \ DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ _conf.logical_block_size), \ DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h index 7dde0adcee7..86d8363bb09 100644 --- a/include/hw/block/flash.h +++ b/include/hw/block/flash.h @@ -74,6 +74,6 @@ typedef struct { uint8_t ecc_digest(ECCState *s, uint8_t sample); void ecc_reset(ECCState *s); -extern VMStateDescription vmstate_ecc_state; +extern const VMStateDescription vmstate_ecc_state; #endif diff --git a/include/hw/boards.h b/include/hw/boards.h index ad6c8fd5376..9c1c1901046 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -26,7 +26,6 @@ OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE) extern MachineState *current_machine; void machine_run_board_init(MachineState *machine); -bool machine_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp); bool machine_usb(MachineState *machine); int machine_phandle_start(MachineState *machine); bool machine_dump_guest_core(MachineState *machine); @@ -35,6 +34,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); void machine_set_cpu_numa_node(MachineState *machine, const CpuInstanceProperties *props, Error **errp); +void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp); /** * machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices @@ -52,6 +52,21 @@ void machine_set_cpu_numa_node(MachineState *machine, */ void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type); +/** + * device_type_is_dynamic_sysbus: Check if type is an allowed sysbus device + * type for the machine class. + * @mc: Machine class + * @type: type to check (should be a subtype of TYPE_SYS_BUS_DEVICE) + * + * Returns: true if @type is a type in the machine's list of + * dynamically pluggable sysbus devices; otherwise false. + * + * Check if the QOM type @type is in the list of allowed sysbus device + * types (see machine_class_allowed_dynamic_sysbus_dev()). + * Note that if @type has a parent type in the list, it is allowed too. + */ +bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type); + /** * device_is_dynamic_sysbus: test whether device is a dynamic sysbus device * @mc: Machine class @@ -109,6 +124,16 @@ typedef struct { CPUArchId cpus[]; } CPUArchIdList; +/** + * SMPCompatProps: + * @prefer_sockets - whether sockets are preferred over cores in smp parsing + * @dies_supported - whether dies are supported by the machine + */ +typedef struct { + bool prefer_sockets; + bool dies_supported; +} SMPCompatProps; + /** * MachineClass: * @deprecation_reason: If set, the machine is marked as deprecated. The @@ -170,10 +195,6 @@ typedef struct { * kvm-type may be NULL if it is not needed. * @numa_mem_supported: * true if '--numa node.mem' option is supported and false otherwise - * @smp_parse: - * The function pointer to hook different machine specific functions for - * parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more - * machine specific topology fields, such as smp_dies for PCMachine. * @hotplug_allowed: * If the hook is provided, then it'll be called for each device * hotplug to check whether the device hotplug is allowed. Return @@ -210,7 +231,6 @@ struct MachineClass { void (*reset)(MachineState *state); void (*wakeup)(MachineState *state); int (*kvm_type)(MachineState *machine, const char *arg); - void (*smp_parse)(MachineState *ms, QemuOpts *opts); BlockInterfaceType block_default_type; int units_per_default_bus; @@ -248,6 +268,7 @@ struct MachineClass { bool nvdimm_supported; bool numa_mem_supported; bool auto_enable_numa; + SMPCompatProps smp_props; const char *default_ram_id; HotplugHandler *(*get_hotplug_handler)(MachineState *machine, @@ -275,16 +296,18 @@ typedef struct DeviceMemoryState { /** * CpuTopology: * @cpus: the number of present logical processors on the machine - * @cores: the number of cores in one package - * @threads: the number of threads in one core * @sockets: the number of sockets on the machine + * @dies: the number of dies in one socket + * @cores: the number of cores in one die + * @threads: the number of threads in one core * @max_cpus: the maximum number of logical processors on the machine */ typedef struct CpuTopology { unsigned int cpus; + unsigned int sockets; + unsigned int dies; unsigned int cores; unsigned int threads; - unsigned int sockets; unsigned int max_cpus; } CpuTopology; @@ -294,7 +317,6 @@ typedef struct CpuTopology { struct MachineState { /*< private >*/ Object parent_obj; - Notifier sysbus_notifier; /*< public >*/ @@ -353,6 +375,12 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_6_1[]; +extern const size_t hw_compat_6_1_len; + +extern GlobalProperty hw_compat_6_0[]; +extern const size_t hw_compat_6_0_len; + extern GlobalProperty hw_compat_5_2[]; extern const size_t hw_compat_5_2_len; diff --git a/include/hw/char/avr_usart.h b/include/hw/char/avr_usart.h index bb575324036..62eaa1528ef 100644 --- a/include/hw/char/avr_usart.h +++ b/include/hw/char/avr_usart.h @@ -24,7 +24,6 @@ #include "hw/sysbus.h" #include "chardev/char-fe.h" -#include "hw/hw.h" #include "qom/object.h" /* Offsets of registers. */ diff --git a/include/hw/char/goldfish_tty.h b/include/hw/char/goldfish_tty.h index b9dd67362a6..7503d2fa1e1 100644 --- a/include/hw/char/goldfish_tty.h +++ b/include/hw/char/goldfish_tty.h @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * Goldfish TTY * diff --git a/include/hw/char/ibex_uart.h b/include/hw/char/ibex_uart.h index 546f958eb8a..a39985516a4 100644 --- a/include/hw/char/ibex_uart.h +++ b/include/hw/char/ibex_uart.h @@ -31,43 +31,6 @@ #include "qemu/timer.h" #include "qom/object.h" -REG32(INTR_STATE, 0x00) - FIELD(INTR_STATE, TX_WATERMARK, 0, 1) - FIELD(INTR_STATE, RX_WATERMARK, 1, 1) - FIELD(INTR_STATE, TX_EMPTY, 2, 1) - FIELD(INTR_STATE, RX_OVERFLOW, 3, 1) -REG32(INTR_ENABLE, 0x04) -REG32(INTR_TEST, 0x08) -REG32(CTRL, 0x0C) - FIELD(CTRL, TX_ENABLE, 0, 1) - FIELD(CTRL, RX_ENABLE, 1, 1) - FIELD(CTRL, NF, 2, 1) - FIELD(CTRL, SLPBK, 4, 1) - FIELD(CTRL, LLPBK, 5, 1) - FIELD(CTRL, PARITY_EN, 6, 1) - FIELD(CTRL, PARITY_ODD, 7, 1) - FIELD(CTRL, RXBLVL, 8, 2) - FIELD(CTRL, NCO, 16, 16) -REG32(STATUS, 0x10) - FIELD(STATUS, TXFULL, 0, 1) - FIELD(STATUS, RXFULL, 1, 1) - FIELD(STATUS, TXEMPTY, 2, 1) - FIELD(STATUS, RXIDLE, 4, 1) - FIELD(STATUS, RXEMPTY, 5, 1) -REG32(RDATA, 0x14) -REG32(WDATA, 0x18) -REG32(FIFO_CTRL, 0x1c) - FIELD(FIFO_CTRL, RXRST, 0, 1) - FIELD(FIFO_CTRL, TXRST, 1, 1) - FIELD(FIFO_CTRL, RXILVL, 2, 3) - FIELD(FIFO_CTRL, TXILVL, 5, 2) -REG32(FIFO_STATUS, 0x20) - FIELD(FIFO_STATUS, TXLVL, 0, 5) - FIELD(FIFO_STATUS, RXLVL, 16, 5) -REG32(OVRD, 0x24) -REG32(VAL, 0x28) -REG32(TIMEOUT_CTRL, 0x2c) - #define IBEX_UART_TX_FIFO_SIZE 16 #define IBEX_UART_CLOCK 50000000 /* 50MHz clock */ diff --git a/include/hw/char/lm32_juart.h b/include/hw/char/lm32_juart.h deleted file mode 100644 index 6fce2783266..00000000000 --- a/include/hw/char/lm32_juart.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef QEMU_HW_CHAR_LM32_JUART_H -#define QEMU_HW_CHAR_LM32_JUART_H - -#include "hw/qdev-core.h" - -#define TYPE_LM32_JUART "lm32-juart" - -uint32_t lm32_juart_get_jtx(DeviceState *d); -uint32_t lm32_juart_get_jrx(DeviceState *d); -void lm32_juart_set_jtx(DeviceState *d, uint32_t jtx); -void lm32_juart_set_jrx(DeviceState *d, uint32_t jrx); - -#endif /* QEMU_HW_CHAR_LM32_JUART_H */ diff --git a/include/hw/char/mchp_pfsoc_mmuart.h b/include/hw/char/mchp_pfsoc_mmuart.h index f61990215f0..b0e14ca3554 100644 --- a/include/hw/char/mchp_pfsoc_mmuart.h +++ b/include/hw/char/mchp_pfsoc_mmuart.h @@ -28,18 +28,25 @@ #ifndef HW_MCHP_PFSOC_MMUART_H #define HW_MCHP_PFSOC_MMUART_H +#include "hw/sysbus.h" #include "hw/char/serial.h" -#define MCHP_PFSOC_MMUART_REG_SIZE 52 +#define MCHP_PFSOC_MMUART_REG_COUNT 13 + +#define TYPE_MCHP_PFSOC_UART "mchp.pfsoc.uart" +OBJECT_DECLARE_SIMPLE_TYPE(MchpPfSoCMMUartState, MCHP_PFSOC_UART) typedef struct MchpPfSoCMMUartState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion container; MemoryRegion iomem; - hwaddr base; - qemu_irq irq; - SerialMM *serial; + SerialMM serial_mm; - uint32_t reg[MCHP_PFSOC_MMUART_REG_SIZE / sizeof(uint32_t)]; + uint32_t reg[MCHP_PFSOC_MMUART_REG_COUNT]; } MchpPfSoCMMUartState; /** diff --git a/include/hw/char/shakti_uart.h b/include/hw/char/shakti_uart.h new file mode 100644 index 00000000000..526c408233f --- /dev/null +++ b/include/hw/char/shakti_uart.h @@ -0,0 +1,74 @@ +/* + * SHAKTI UART + * + * Copyright (c) 2021 Vijai Kumar K + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_SHAKTI_UART_H +#define HW_SHAKTI_UART_H + +#include "hw/sysbus.h" +#include "chardev/char-fe.h" + +#define SHAKTI_UART_BAUD 0x00 +#define SHAKTI_UART_TX 0x04 +#define SHAKTI_UART_RX 0x08 +#define SHAKTI_UART_STATUS 0x0C +#define SHAKTI_UART_DELAY 0x10 +#define SHAKTI_UART_CONTROL 0x14 +#define SHAKTI_UART_INT_EN 0x18 +#define SHAKTI_UART_IQ_CYCLES 0x1C +#define SHAKTI_UART_RX_THRES 0x20 + +#define SHAKTI_UART_STATUS_TX_EMPTY (1 << 0) +#define SHAKTI_UART_STATUS_TX_FULL (1 << 1) +#define SHAKTI_UART_STATUS_RX_NOT_EMPTY (1 << 2) +#define SHAKTI_UART_STATUS_RX_FULL (1 << 3) +/* 9600 8N1 is the default setting */ +/* Reg value = (50000000 Hz)/(16 * 9600)*/ +#define SHAKTI_UART_BAUD_DEFAULT 0x0145 +#define SHAKTI_UART_CONTROL_DEFAULT 0x0100 + +#define TYPE_SHAKTI_UART "shakti-uart" +#define SHAKTI_UART(obj) \ + OBJECT_CHECK(ShaktiUartState, (obj), TYPE_SHAKTI_UART) + +typedef struct { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint32_t uart_baud; + uint32_t uart_tx; + uint32_t uart_rx; + uint32_t uart_status; + uint32_t uart_delay; + uint32_t uart_control; + uint32_t uart_interrupt; + uint32_t uart_iq_cycles; + uint32_t uart_rx_threshold; + + CharBackend chr; +} ShaktiUartState; + +#endif /* HW_SHAKTI_UART_H */ diff --git a/include/hw/char/sifive_uart.h b/include/hw/char/sifive_uart.h index 3e962be6592..7f6c79f8bdb 100644 --- a/include/hw/char/sifive_uart.h +++ b/include/hw/char/sifive_uart.h @@ -21,6 +21,7 @@ #define HW_SIFIVE_UART_H #include "chardev/char-fe.h" +#include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "qom/object.h" @@ -49,12 +50,10 @@ enum { #define SIFIVE_UART_GET_TXCNT(txctrl) ((txctrl >> 16) & 0x7) #define SIFIVE_UART_GET_RXCNT(rxctrl) ((rxctrl >> 16) & 0x7) +#define SIFIVE_UART_RX_FIFO_SIZE 8 #define TYPE_SIFIVE_UART "riscv.sifive.uart" - -typedef struct SiFiveUARTState SiFiveUARTState; -DECLARE_INSTANCE_CHECKER(SiFiveUARTState, SIFIVE_UART, - TYPE_SIFIVE_UART) +OBJECT_DECLARE_SIMPLE_TYPE(SiFiveUARTState, SIFIVE_UART) struct SiFiveUARTState { /*< private >*/ @@ -64,8 +63,8 @@ struct SiFiveUARTState { qemu_irq irq; MemoryRegion mmio; CharBackend chr; - uint8_t rx_fifo[8]; - unsigned int rx_fifo_len; + uint8_t rx_fifo[SIFIVE_UART_RX_FIFO_SIZE]; + uint8_t rx_fifo_len; uint32_t ie; uint32_t ip; uint32_t txctrl; diff --git a/include/hw/clock.h b/include/hw/clock.h index a7187eab95e..5c927cee7f8 100644 --- a/include/hw/clock.h +++ b/include/hw/clock.h @@ -81,6 +81,10 @@ struct Clock { void *callback_opaque; unsigned int callback_events; + /* Ratio of the parent clock to run the child clocks at */ + uint32_t multiplier; + uint32_t divider; + /* Clocks are organized in a clock tree */ Clock *source; QLIST_HEAD(, Clock) children; @@ -319,10 +323,7 @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns) if (clk->period == 0) { return 0; } - /* - * Ignore divu128() return value as we've caught div-by-zero and don't - * need different behaviour for overflow. - */ + divu128(&lo, &hi, clk->period); return lo; } @@ -350,4 +351,29 @@ static inline bool clock_is_enabled(const Clock *clk) */ char *clock_display_freq(Clock *clk); +/** + * clock_set_mul_div: set multiplier/divider for child clocks + * @clk: clock + * @multiplier: multiplier value + * @divider: divider value + * + * By default, a Clock's children will all run with the same period + * as their parent. This function allows you to adjust the multiplier + * and divider used to derive the child clock frequency. + * For example, setting a multiplier of 2 and a divider of 3 + * will run child clocks with a period 2/3 of the parent clock, + * so if the parent clock is an 8MHz clock the children will + * be 12MHz. + * + * Setting the multiplier to 0 will stop the child clocks. + * Setting the divider to 0 is a programming error (diagnosed with + * an assertion failure). + * Setting a multiplier value that results in the child period + * overflowing is not diagnosed. + * + * Note that this function does not call clock_propagate(); the + * caller should do that if necessary. + */ +void clock_set_mul_div(Clock *clk, uint32_t multiplier, uint32_t divider); + #endif /* QEMU_HW_CLOCK_H */ diff --git a/include/hw/core/accel-cpu.h b/include/hw/core/accel-cpu.h index 24a6697412e..5dbfd799553 100644 --- a/include/hw/core/accel-cpu.h +++ b/include/hw/core/accel-cpu.h @@ -32,7 +32,7 @@ typedef struct AccelCPUClass { void (*cpu_class_init)(CPUClass *cc); void (*cpu_instance_init)(CPUState *cpu); - void (*cpu_realizefn)(CPUState *cpu, Error **errp); + bool (*cpu_realizefn)(CPUState *cpu, Error **errp); } AccelCPUClass; #endif /* ACCEL_CPU_H */ diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 4dc5739759f..f93b042d6c3 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -83,6 +83,9 @@ struct TCGCPUOps; /* see accel-cpu.h */ struct AccelCPUClass; +/* see sysemu-cpu-ops.h */ +struct SysemuCPUOps; + /** * CPUClass: * @class_by_name: Callback to map -cpu command line model name to an @@ -90,16 +93,9 @@ struct AccelCPUClass; * @parse_features: Callback to parse command line arguments. * @reset_dump_flags: #CPUDumpFlags to use for reset logging. * @has_work: Callback for checking if there is work to do. - * @virtio_is_big_endian: Callback to return %true if a CPU which supports - * runtime configurable endianness is currently big-endian. Non-configurable - * CPUs can use the default implementation of this method. This method should - * not be used by any callers other than the pre-1.0 virtio devices. * @memory_rw_debug: Callback for GDB memory access. * @dump_state: Callback for dumping state. - * @dump_statistics: Callback for dumping statistics. * @get_arch_id: Callback for getting architecture-dependent CPU ID. - * @get_paging_enabled: Callback for inquiring whether paging is enabled. - * @get_memory_mapping: Callback for obtaining the memory mappings. * @set_pc: Callback for setting the Program Counter register. This * should have the semantics used by the target architecture when * setting the PC from a source such as an ELF file entry point; @@ -108,24 +104,11 @@ struct AccelCPUClass; * If the target behaviour here is anything other than "set * the PC register to the value passed in" then the target must * also implement the synchronize_from_tb hook. - * @get_phys_page_debug: Callback for obtaining a physical address. - * @get_phys_page_attrs_debug: Callback for obtaining a physical address and the - * associated memory transaction attributes to use for the access. - * CPUs which use memory transaction attributes should implement this - * instead of get_phys_page_debug. - * @asidx_from_attrs: Callback to return the CPU AddressSpace to use for - * a memory access with the specified memory transaction attributes. * @gdb_read_register: Callback for letting GDB read a register. * @gdb_write_register: Callback for letting GDB write a register. - * @write_elf64_note: Callback for writing a CPU-specific ELF note to a - * 64-bit VM coredump. - * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF - * note to a 32-bit VM coredump. - * @write_elf32_note: Callback for writing a CPU-specific ELF note to a - * 32-bit VM coredump. - * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF - * note to a 32-bit VM coredump. - * @vmsd: State description for migration. + * @gdb_adjust_breakpoint: Callback for adjusting the address of a + * breakpoint. Used by AVR to handle a gdb mis-feature with + * its Harvard architecture split code and data. * @gdb_num_core_regs: Number of core registers accessible to GDB. * @gdb_core_xml_file: File name for core registers GDB XML description. * @gdb_stop_before_watchpoint: Indicates whether GDB expects the CPU to stop @@ -151,36 +134,16 @@ struct CPUClass { ObjectClass *(*class_by_name)(const char *cpu_model); void (*parse_features)(const char *typename, char *str, Error **errp); - int reset_dump_flags; bool (*has_work)(CPUState *cpu); - bool (*virtio_is_big_endian)(CPUState *cpu); int (*memory_rw_debug)(CPUState *cpu, vaddr addr, uint8_t *buf, int len, bool is_write); void (*dump_state)(CPUState *cpu, FILE *, int flags); - GuestPanicInformation* (*get_crash_info)(CPUState *cpu); - void (*dump_statistics)(CPUState *cpu, int flags); int64_t (*get_arch_id)(CPUState *cpu); - bool (*get_paging_enabled)(const CPUState *cpu); - void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, - Error **errp); void (*set_pc)(CPUState *cpu, vaddr value); - hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr); - hwaddr (*get_phys_page_attrs_debug)(CPUState *cpu, vaddr addr, - MemTxAttrs *attrs); - int (*asidx_from_attrs)(CPUState *cpu, MemTxAttrs attrs); int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg); int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg); + vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr); - int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); - int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); - int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); - int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); - - const VMStateDescription *vmsd; const char *gdb_core_xml_file; gchar * (*gdb_arch_name)(CPUState *cpu); const char * (*gdb_get_dynamic_xml)(CPUState *cpu, const char *xmlname); @@ -188,13 +151,26 @@ struct CPUClass { void (*disas_set_info)(CPUState *cpu, disassemble_info *info); const char *deprecation_note; - /* Keep non-pointer data at the end to minimize holes. */ - int gdb_num_core_regs; - bool gdb_stop_before_watchpoint; struct AccelCPUClass *accel_cpu; + /* when system emulation is not available, this pointer is NULL */ + const struct SysemuCPUOps *sysemu_ops; + /* when TCG is not available, this pointer is NULL */ - struct TCGCPUOps *tcg_ops; + const struct TCGCPUOps *tcg_ops; + + /* + * if not NULL, this is called in order for the CPUClass to initialize + * class data that depends on the accelerator, see accel/accel-common.c. + */ + void (*init_accel_cpu)(struct AccelCPUClass *accel_cpu, CPUClass *cc); + + /* + * Keep non-pointer data at the end to minimize holes. + */ + int reset_dump_flags; + int gdb_num_core_regs; + bool gdb_stop_before_watchpoint; }; /* @@ -248,6 +224,7 @@ struct KVMState; struct kvm_run; struct hax_vcpu_state; +struct hvf_vcpu_state; #define TB_JMP_CACHE_BITS 12 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) @@ -326,6 +303,10 @@ struct qemu_work_item; * @ignore_memory_transaction_failures: Cached copy of the MachineState * flag of the same name: allows the board to suppress calling of the * CPU do_transaction_failed hook function. + * @kvm_dirty_gfns: Points to the KVM dirty ring for this CPU when KVM dirty + * ring is enabled. + * @kvm_fetch_index: Keeps the index that we last fetched from the per-vCPU + * dirty ring structure. * @log_state: The per-cpu instruction logging state. * * State of one CPU core or thread. @@ -399,9 +380,13 @@ struct CPUState { */ uintptr_t mem_io_pc; + /* Only used in KVM */ int kvm_fd; struct KVMState *kvm_state; struct kvm_run *kvm_run; + struct kvm_dirty_gfn *kvm_dirty_gfns; + uint32_t kvm_fetch_index; + uint64_t dirty_pages; /* Used for events with 'vcpu' and *without* the 'disabled' properties */ DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); @@ -435,7 +420,7 @@ struct CPUState { struct hax_vcpu_state *hax_vcpu; - int hvf_fd; + struct hvf_vcpu_state *hvf; /* track IOMMUs whose translations we've cached in the TCG TLB */ GArray *iommu_notifiers; @@ -565,16 +550,6 @@ enum CPUDumpFlags { */ void cpu_dump_state(CPUState *cpu, FILE *f, int flags); -/** - * cpu_dump_statistics: - * @cpu: The CPU whose state is to be dumped. - * @flags: Flags what to dump. - * - * Dump CPU statistics to the current monitor if we have one, else to - * stdout. - */ -void cpu_dump_statistics(CPUState *cpu, int flags); - #ifndef CONFIG_USER_ONLY /** * cpu_get_phys_page_attrs_debug: @@ -589,18 +564,8 @@ void cpu_dump_statistics(CPUState *cpu, int flags); * * Returns: Corresponding physical page address or -1 if no page found. */ -static inline hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, - MemTxAttrs *attrs) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - if (cc->get_phys_page_attrs_debug) { - return cc->get_phys_page_attrs_debug(cpu, addr, attrs); - } - /* Fallback for CPUs which don't implement the _attrs_ hook */ - *attrs = MEMTXATTRS_UNSPECIFIED; - return cc->get_phys_page_debug(cpu, addr); -} +hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, + MemTxAttrs *attrs); /** * cpu_get_phys_page_debug: @@ -612,12 +577,7 @@ static inline hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, * * Returns: Corresponding physical page address or -1 if no page found. */ -static inline hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr) -{ - MemTxAttrs attrs = {}; - - return cpu_get_phys_page_attrs_debug(cpu, addr, &attrs); -} +hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); /** cpu_asidx_from_attrs: * @cpu: CPU @@ -626,17 +586,16 @@ static inline hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr) * Returns the address space index specifying the CPU AddressSpace * to use for a memory access with the given transaction attributes. */ -static inline int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - int ret = 0; +int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs); - if (cc->asidx_from_attrs) { - ret = cc->asidx_from_attrs(cpu, attrs); - assert(ret < cpu->num_ases && ret >= 0); - } - return ret; -} +/** + * cpu_virtio_is_big_endian: + * @cpu: CPU + + * Returns %true if a CPU which supports runtime configurable endianness + * is currently big-endian. + */ +bool cpu_virtio_is_big_endian(CPUState *cpu); #endif /* CONFIG_USER_ONLY */ @@ -1060,6 +1019,7 @@ void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...) GCC_FMT_ATTR(2, 3); /* $(top_srcdir)/cpu.c */ +void cpu_class_init_props(DeviceClass *dc); void cpu_exec_initfn(CPUState *cpu); void cpu_exec_realizefn(CPUState *cpu, Error **errp); void cpu_exec_unrealizefn(CPUState *cpu); @@ -1078,10 +1038,8 @@ bool target_words_bigendian(void); #ifdef NEED_CPU_H #ifdef CONFIG_SOFTMMU + extern const VMStateDescription vmstate_cpu_common; -#else -#define vmstate_cpu_common vmstate_dummy -#endif #define VMSTATE_CPU() { \ .name = "parent_obj", \ @@ -1090,6 +1048,7 @@ extern const VMStateDescription vmstate_cpu_common; .flags = VMS_STRUCT, \ .offset = 0, \ } +#endif /* CONFIG_SOFTMMU */ #endif /* NEED_CPU_H */ diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h new file mode 100644 index 00000000000..a9ba39e5f25 --- /dev/null +++ b/include/hw/core/sysemu-cpu-ops.h @@ -0,0 +1,92 @@ +/* + * CPU operations specific to system emulation + * + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSEMU_CPU_OPS_H +#define SYSEMU_CPU_OPS_H + +#include "hw/core/cpu.h" + +/* + * struct SysemuCPUOps: System operations specific to a CPU class + */ +typedef struct SysemuCPUOps { + /** + * @get_memory_mapping: Callback for obtaining the memory mappings. + */ + void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, + Error **errp); + /** + * @get_paging_enabled: Callback for inquiring whether paging is enabled. + */ + bool (*get_paging_enabled)(const CPUState *cpu); + /** + * @get_phys_page_debug: Callback for obtaining a physical address. + */ + hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr); + /** + * @get_phys_page_attrs_debug: Callback for obtaining a physical address + * and the associated memory transaction attributes to use for the + * access. + * CPUs which use memory transaction attributes should implement this + * instead of get_phys_page_debug. + */ + hwaddr (*get_phys_page_attrs_debug)(CPUState *cpu, vaddr addr, + MemTxAttrs *attrs); + /** + * @asidx_from_attrs: Callback to return the CPU AddressSpace to use for + * a memory access with the specified memory transaction attributes. + */ + int (*asidx_from_attrs)(CPUState *cpu, MemTxAttrs attrs); + /** + * @get_crash_info: Callback for reporting guest crash information in + * GUEST_PANICKED events. + */ + GuestPanicInformation* (*get_crash_info)(CPUState *cpu); + /** + * @write_elf32_note: Callback for writing a CPU-specific ELF note to a + * 32-bit VM coredump. + */ + int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque); + /** + * @write_elf64_note: Callback for writing a CPU-specific ELF note to a + * 64-bit VM coredump. + */ + int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu, + int cpuid, void *opaque); + /** + * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 32-bit VM coredump. + */ + int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque); + /** + * @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 64-bit VM coredump. + */ + int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, + void *opaque); + /** + * @virtio_is_big_endian: Callback to return %true if a CPU which supports + * runtime configurable endianness is currently big-endian. + * Non-configurable CPUs can use the default implementation of this method. + * This method should not be used by any callers other than the pre-1.0 + * virtio devices. + */ + bool (*virtio_is_big_endian)(CPUState *cpu); + + /** + * @legacy_vmsd: Legacy state for migration. + * Do not use in new targets, use #DeviceClass::vmsd instead. + */ + const VMStateDescription *legacy_vmsd; + +} SysemuCPUOps; + +#endif /* SYSEMU_CPU_OPS_H */ diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h index 72d791438c2..e13898553af 100644 --- a/include/hw/core/tcg-cpu-ops.h +++ b/include/hw/core/tcg-cpu-ops.h @@ -35,33 +35,37 @@ struct TCGCPUOps { void (*cpu_exec_enter)(CPUState *cpu); /** @cpu_exec_exit: Callback for cpu_exec cleanup */ void (*cpu_exec_exit)(CPUState *cpu); - /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */ - bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request); + /** @debug_excp_handler: Callback for handling debug exceptions */ + void (*debug_excp_handler)(CPUState *cpu); + +#ifdef NEED_CPU_H +#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) /** - * @do_interrupt: Callback for interrupt handling. + * @fake_user_interrupt: Callback for 'fake exception' handling. * - * note that this is in general SOFTMMU only, but it actually isn't - * because of an x86 hack (accel/tcg/cpu-exec.c), so we cannot put it - * in the SOFTMMU section in general. + * Simulate 'fake exception' which will be handled outside the + * cpu execution loop (hack for x86 user mode). + */ + void (*fake_user_interrupt)(CPUState *cpu); +#else + /** + * @do_interrupt: Callback for interrupt handling. */ void (*do_interrupt)(CPUState *cpu); +#endif /* !CONFIG_USER_ONLY || !TARGET_I386 */ +#ifdef CONFIG_SOFTMMU + /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */ + bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request); /** - * @tlb_fill: Handle a softmmu tlb miss or user-only address fault + * @tlb_fill: Handle a softmmu tlb miss * - * For system mode, if the access is valid, call tlb_set_page - * and return true; if the access is invalid, and probe is - * true, return false; otherwise raise an exception and do - * not return. For user-only mode, always raise an exception - * and do not return. + * If the access is valid, call tlb_set_page and return true; + * if the access is invalid and probe is true, return false; + * otherwise raise an exception and do not return. */ bool (*tlb_fill)(CPUState *cpu, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); - /** @debug_excp_handler: Callback for handling debug exceptions */ - void (*debug_excp_handler)(CPUState *cpu); - -#ifdef NEED_CPU_H -#ifdef CONFIG_SOFTMMU /** * @do_transaction_failed: Callback for handling failed memory transactions * (ie bus faults or external aborts; not MMU faults) @@ -72,10 +76,11 @@ struct TCGCPUOps { MemTxResult response, uintptr_t retaddr); /** * @do_unaligned_access: Callback for unaligned access handling + * The callback must exit via raising an exception. */ void (*do_unaligned_access)(CPUState *cpu, vaddr addr, MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr); + int mmu_idx, uintptr_t retaddr) QEMU_NORETURN; /** * @adjust_watchpoint_address: hack for cpu_check_watchpoint used by ARM @@ -88,6 +93,12 @@ struct TCGCPUOps { */ bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp); + /** + * @debug_check_breakpoint: return true if the architectural + * breakpoint whose PC has matched should really fire. + */ + bool (*debug_check_breakpoint)(CPUState *cpu); + /** * @io_recompile_replay_branch: Callback for cpu_io_recompile. * @@ -98,6 +109,55 @@ struct TCGCPUOps { */ bool (*io_recompile_replay_branch)(CPUState *cpu, const TranslationBlock *tb); +#else + /** + * record_sigsegv: + * @cpu: cpu context + * @addr: faulting guest address + * @access_type: access was read/write/execute + * @maperr: true for invalid page, false for permission fault + * @ra: host pc for unwinding + * + * We are about to raise SIGSEGV with si_code set for @maperr, + * and si_addr set for @addr. Record anything further needed + * for the signal ucontext_t. + * + * If the emulated kernel does not provide anything to the signal + * handler with anything besides the user context registers, and + * the siginfo_t, then this hook need do nothing and may be omitted. + * Otherwise, record the data and return; the caller will raise + * the signal, unwind the cpu state, and return to the main loop. + * + * If it is simpler to re-use the sysemu tlb_fill code, @ra is provided + * so that a "normal" cpu exception can be raised. In this case, + * the signal must be raised by the architecture cpu_loop. + */ + void (*record_sigsegv)(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); + /** + * record_sigbus: + * @cpu: cpu context + * @addr: misaligned guest address + * @access_type: access was read/write/execute + * @ra: host pc for unwinding + * + * We are about to raise SIGBUS with si_code BUS_ADRALN, + * and si_addr set for @addr. Record anything further needed + * for the signal ucontext_t. + * + * If the emulated kernel does not provide the signal handler with + * anything besides the user context registers, and the siginfo_t, + * then this hook need do nothing and may be omitted. + * Otherwise, record the data and return; the caller will raise + * the signal, unwind the cpu state, and return to the main loop. + * + * If it is simpler to re-use the sysemu do_unaligned_access code, + * @ra is provided so that a "normal" cpu exception can be raised. + * In this case, the signal must be raised by the architecture cpu_loop. + */ + void (*record_sigbus)(CPUState *cpu, vaddr addr, + MMUAccessType access_type, uintptr_t ra); #endif /* CONFIG_SOFTMMU */ #endif /* NEED_CPU_H */ diff --git a/include/hw/display/edid.h b/include/hw/display/edid.h index 1f8fc9b3750..520f8ec2027 100644 --- a/include/hw/display/edid.h +++ b/include/hw/display/edid.h @@ -11,6 +11,7 @@ typedef struct qemu_edid_info { uint32_t prefy; uint32_t maxx; uint32_t maxy; + uint32_t refresh_rate; } qemu_edid_info; void qemu_edid_generate(uint8_t *edid, size_t size, @@ -21,10 +22,11 @@ void qemu_edid_region_io(MemoryRegion *region, Object *owner, uint32_t qemu_edid_dpi_to_mm(uint32_t dpi, uint32_t res); -#define DEFINE_EDID_PROPERTIES(_state, _edid_info) \ - DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0), \ - DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0), \ - DEFINE_PROP_UINT32("xmax", _state, _edid_info.maxx, 0), \ - DEFINE_PROP_UINT32("ymax", _state, _edid_info.maxy, 0) +#define DEFINE_EDID_PROPERTIES(_state, _edid_info) \ + DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0), \ + DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0), \ + DEFINE_PROP_UINT32("xmax", _state, _edid_info.maxx, 0), \ + DEFINE_PROP_UINT32("ymax", _state, _edid_info.maxy, 0), \ + DEFINE_PROP_UINT32("refresh_rate", _state, _edid_info.refresh_rate, 0) #endif /* EDID_H */ diff --git a/include/hw/display/macfb.h b/include/hw/display/macfb.h index 80806b0306a..e52775aa215 100644 --- a/include/hw/display/macfb.h +++ b/include/hw/display/macfb.h @@ -14,9 +14,42 @@ #define MACFB_H #include "exec/memory.h" +#include "hw/irq.h" #include "ui/console.h" +#include "qemu/timer.h" #include "qom/object.h" +typedef enum { + MACFB_DISPLAY_APPLE_21_COLOR = 0, + MACFB_DISPLAY_APPLE_PORTRAIT = 1, + MACFB_DISPLAY_APPLE_12_RGB = 2, + MACFB_DISPLAY_APPLE_2PAGE_MONO = 3, + MACFB_DISPLAY_NTSC_UNDERSCAN = 4, + MACFB_DISPLAY_NTSC_OVERSCAN = 5, + MACFB_DISPLAY_APPLE_12_MONO = 6, + MACFB_DISPLAY_APPLE_13_RGB = 7, + MACFB_DISPLAY_16_COLOR = 8, + MACFB_DISPLAY_PAL1_UNDERSCAN = 9, + MACFB_DISPLAY_PAL1_OVERSCAN = 10, + MACFB_DISPLAY_PAL2_UNDERSCAN = 11, + MACFB_DISPLAY_PAL2_OVERSCAN = 12, + MACFB_DISPLAY_VGA = 13, + MACFB_DISPLAY_SVGA = 14, +} MacfbDisplayType; + +typedef struct MacFbMode { + uint8_t type; + uint8_t depth; + uint32_t mode_ctrl1; + uint32_t mode_ctrl2; + uint32_t width; + uint32_t height; + uint32_t stride; + uint32_t offset; +} MacFbMode; + +#define MACFB_NUM_REGS 8 + typedef struct MacfbState { MemoryRegion mem_vram; MemoryRegion mem_ctrl; @@ -28,6 +61,15 @@ typedef struct MacfbState { uint8_t color_palette[256 * 3]; uint32_t width, height; /* in pixels */ uint8_t depth; + uint8_t type; + + uint32_t regs[MACFB_NUM_REGS]; + MacFbMode *mode; + + uint32_t irq_state; + uint32_t irq_mask; + QEMUTimer *vbl_timer; + qemu_irq irq; } MacfbState; #define TYPE_MACFB "sysbus-macfb" @@ -46,6 +88,7 @@ struct MacfbNubusDeviceClass { DeviceClass parent_class; DeviceRealize parent_realize; + DeviceUnrealize parent_unrealize; }; diff --git a/include/hw/display/milkymist_tmu2.h b/include/hw/display/milkymist_tmu2.h deleted file mode 100644 index fdce9535a1e..00000000000 --- a/include/hw/display/milkymist_tmu2.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * QEMU model of the Milkymist texture mapping unit. - * - * Copyright (c) 2010 Michael Walle - * Copyright (c) 2010 Sebastien Bourdeauducq - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - * - * - * Specification available at: - * http://milkymist.walle.cc/socdoc/tmu2.pdf - * - */ - -#ifndef HW_DISPLAY_MILKYMIST_TMU2_H -#define HW_DISPLAY_MILKYMIST_TMU2_H - -#include "exec/hwaddr.h" -#include "hw/qdev-core.h" - -#if defined(CONFIG_X11) && defined(CONFIG_OPENGL) -DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq); -#else -static inline DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq) -{ - return NULL; -} -#endif - -#endif /* HW_DISPLAY_MILKYMIST_TMU2_H */ diff --git a/include/hw/display/vga.h b/include/hw/display/vga.h index ca0003dbfd9..5f7825e0e36 100644 --- a/include/hw/display/vga.h +++ b/include/hw/display/vga.h @@ -11,6 +11,12 @@ #include "exec/hwaddr.h" +/* + * modules can reference this symbol to avoid being loaded + * into system emulators without vga support + */ +extern bool have_vga; + enum vga_retrace_method { VGA_RETRACE_DUMB, VGA_RETRACE_PRECISE diff --git a/include/hw/dma/xlnx-zdma.h b/include/hw/dma/xlnx-zdma.h index 6602e7ffa72..efc75217d59 100644 --- a/include/hw/dma/xlnx-zdma.h +++ b/include/hw/dma/xlnx-zdma.h @@ -56,7 +56,7 @@ struct XlnxZDMA { MemoryRegion iomem; MemTxAttrs attr; MemoryRegion *dma_mr; - AddressSpace *dma_as; + AddressSpace dma_as; qemu_irq irq_zdma_ch_imr; struct { diff --git a/include/hw/dma/xlnx_csu_dma.h b/include/hw/dma/xlnx_csu_dma.h index 204d94c6737..9e9dc551e99 100644 --- a/include/hw/dma/xlnx_csu_dma.h +++ b/include/hw/dma/xlnx_csu_dma.h @@ -30,7 +30,7 @@ typedef struct XlnxCSUDMA { MemoryRegion iomem; MemTxAttrs attr; MemoryRegion *dma_mr; - AddressSpace *dma_as; + AddressSpace dma_as; qemu_irq irq; StreamSink *tx_dev; /* Used as generic StreamSink */ ptimer_state *src_timer; diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h index 6ee458e7bc3..995de8495c2 100644 --- a/include/hw/elf_ops.h +++ b/include/hw/elf_ops.h @@ -312,25 +312,26 @@ static struct elf_note *glue(get_elf_note_type, SZ)(struct elf_note *nhdr, return nhdr; } -static int glue(load_elf, SZ)(const char *name, int fd, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, - int must_swab, uint64_t *pentry, - uint64_t *lowaddr, uint64_t *highaddr, - uint32_t *pflags, int elf_machine, - int clear_lsb, int data_swab, - AddressSpace *as, bool load_rom, - symbol_fn_t sym_cb) +static ssize_t glue(load_elf, SZ)(const char *name, int fd, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, + int must_swab, uint64_t *pentry, + uint64_t *lowaddr, uint64_t *highaddr, + uint32_t *pflags, int elf_machine, + int clear_lsb, int data_swab, + AddressSpace *as, bool load_rom, + symbol_fn_t sym_cb) { struct elfhdr ehdr; struct elf_phdr *phdr = NULL, *ph; - int size, i, total_size; + int size, i; + ssize_t total_size; elf_word mem_size, file_size, data_offset; uint64_t addr, low = (uint64_t)-1, high = 0; GMappedFile *mapped_file = NULL; uint8_t *data = NULL; - int ret = ELF_LOAD_FAILED; + ssize_t ret = ELF_LOAD_FAILED; if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) goto fail; @@ -368,14 +369,6 @@ static int glue(load_elf, SZ)(const char *name, int fd, } } break; - case EM_MOXIE: - if (ehdr.e_machine != EM_MOXIE) { - if (ehdr.e_machine != EM_MOXIE_OLD) { - ret = ELF_LOAD_WRONG_ARCH; - goto fail; - } - } - break; case EM_MIPS: case EM_NANOMIPS: if ((ehdr.e_machine != EM_MIPS) && @@ -490,7 +483,7 @@ static int glue(load_elf, SZ)(const char *name, int fd, } } - if (mem_size > INT_MAX - total_size) { + if (mem_size > SSIZE_MAX - total_size) { ret = ELF_LOAD_TOO_BIG; goto fail; } diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h index 02a0ced0a09..5a0dd0c8cff 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h @@ -258,6 +258,17 @@ struct smbios_type_32 { uint8_t boot_status; } QEMU_PACKED; +/* SMBIOS type 41 - Onboard Devices Extended Information */ +struct smbios_type_41 { + struct smbios_structure_header header; + uint8_t reference_designation_str; + uint8_t device_type; + uint8_t device_type_instance; + uint16_t segment_group_number; + uint8_t bus_number; + uint8_t device_number; +} QEMU_PACKED; + /* SMBIOS type 127 -- End-of-table */ struct smbios_type_127 { struct smbios_structure_header header; @@ -273,5 +284,6 @@ void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, const unsigned int mem_array_size, uint8_t **tables, size_t *tables_len, - uint8_t **anchor, size_t *anchor_len); + uint8_t **anchor, size_t *anchor_len, + Error **errp); #endif /* QEMU_SMBIOS_H */ diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h index e1636ce7fea..801846befb3 100644 --- a/include/hw/gpio/aspeed_gpio.h +++ b/include/hw/gpio/aspeed_gpio.h @@ -17,9 +17,9 @@ OBJECT_DECLARE_TYPE(AspeedGPIOState, AspeedGPIOClass, ASPEED_GPIO) #define ASPEED_GPIO_MAX_NR_SETS 8 +#define ASPEED_GPIOS_PER_SET 32 #define ASPEED_REGS_PER_BANK 14 #define ASPEED_GPIO_MAX_NR_REGS (ASPEED_REGS_PER_BANK * ASPEED_GPIO_MAX_NR_SETS) -#define ASPEED_GPIO_NR_PINS 228 #define ASPEED_GROUPS_PER_SET 4 #define ASPEED_GPIO_NR_DEBOUNCE_REGS 3 #define ASPEED_CHARS_PER_GROUP_LABEL 4 @@ -60,7 +60,6 @@ struct AspeedGPIOClass { const GPIOSetProperties *props; uint32_t nr_gpio_pins; uint32_t nr_gpio_sets; - uint32_t gap; const AspeedGPIOReg *reg_table; }; @@ -72,7 +71,7 @@ struct AspeedGPIOState { MemoryRegion iomem; int pending; qemu_irq irq; - qemu_irq gpios[ASPEED_GPIO_NR_PINS]; + qemu_irq gpios[ASPEED_GPIO_MAX_NR_SETS][ASPEED_GPIOS_PER_SET]; /* Parallel GPIO Registers */ uint32_t debounce_regs[ASPEED_GPIO_NR_DEBOUNCE_REGS]; diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h index 565f8330662..4b9be09274c 100644 --- a/include/hw/i2c/aspeed_i2c.h +++ b/include/hw/i2c/aspeed_i2c.h @@ -36,7 +36,11 @@ OBJECT_DECLARE_TYPE(AspeedI2CState, AspeedI2CClass, ASPEED_I2C) struct AspeedI2CState; -typedef struct AspeedI2CBus { +#define TYPE_ASPEED_I2C_BUS "aspeed.i2c.bus" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedI2CBus, ASPEED_I2C_BUS) +struct AspeedI2CBus { + SysBusDevice parent_obj; + struct AspeedI2CState *controller; MemoryRegion mr; @@ -54,7 +58,7 @@ typedef struct AspeedI2CBus { uint32_t pool_ctrl; uint32_t dma_addr; uint32_t dma_len; -} AspeedI2CBus; +}; struct AspeedI2CState { SysBusDevice parent_obj; diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h index 277dd9f2d6d..5ca3b708c0b 100644 --- a/include/hw/i2c/i2c.h +++ b/include/hw/i2c/i2c.h @@ -16,6 +16,7 @@ enum i2c_event { I2C_NACK /* Masker NACKed a receive byte. */ }; +typedef struct I2CNodeList I2CNodeList; #define TYPE_I2C_SLAVE "i2c-slave" OBJECT_DECLARE_TYPE(I2CSlave, I2CSlaveClass, @@ -39,6 +40,16 @@ struct I2CSlaveClass { * return code is not used and should be zero. */ int (*event)(I2CSlave *s, enum i2c_event event); + + /* + * Check if this device matches the address provided. Returns bool of + * true if it matches (or broadcast), and updates the device list, false + * otherwise. + * + * If broadcast is true, match should add the device and return true. + */ + bool (*match_and_add)(I2CSlave *candidate, uint8_t address, bool broadcast, + I2CNodeList *current_devs); }; struct I2CSlave { @@ -58,22 +69,58 @@ struct I2CNode { QLIST_ENTRY(I2CNode) next; }; +typedef QLIST_HEAD(I2CNodeList, I2CNode) I2CNodeList; + struct I2CBus { BusState qbus; - QLIST_HEAD(, I2CNode) current_devs; + I2CNodeList current_devs; uint8_t saved_address; bool broadcast; }; I2CBus *i2c_init_bus(DeviceState *parent, const char *name); -void i2c_set_slave_address(I2CSlave *dev, uint8_t address); int i2c_bus_busy(I2CBus *bus); -int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv); + +/** + * i2c_start_transfer: start a transfer on an I2C bus. + * + * @bus: #I2CBus to be used + * @address: address of the slave + * @is_recv: indicates the transfer direction + * + * When @is_recv is a known boolean constant, use the + * i2c_start_recv() or i2c_start_send() helper instead. + * + * Returns: 0 on success, -1 on error + */ +int i2c_start_transfer(I2CBus *bus, uint8_t address, bool is_recv); + +/** + * i2c_start_recv: start a 'receive' transfer on an I2C bus. + * + * @bus: #I2CBus to be used + * @address: address of the slave + * + * Returns: 0 on success, -1 on error + */ +int i2c_start_recv(I2CBus *bus, uint8_t address); + +/** + * i2c_start_send: start a 'send' transfer on an I2C bus. + * + * @bus: #I2CBus to be used + * @address: address of the slave + * + * Returns: 0 on success, -1 on error + */ +int i2c_start_send(I2CBus *bus, uint8_t address); + void i2c_end_transfer(I2CBus *bus); void i2c_nack(I2CBus *bus); -int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send); int i2c_send(I2CBus *bus, uint8_t data); uint8_t i2c_recv(I2CBus *bus); +bool i2c_scan_bus(I2CBus *bus, uint8_t address, bool broadcast, + I2CNodeList *current_devs); /** * Create an I2C slave device on the heap. @@ -127,8 +174,12 @@ I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t addr); */ bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp); -/* lm832x.c */ -void lm832x_key_event(DeviceState *dev, int key, int state); +/** + * Set the I2C bus address of a slave device + * @dev: I2C slave device + * @address: I2C address of the slave when put on a bus + */ +void i2c_slave_set_address(I2CSlave *dev, uint8_t address); extern const VMStateDescription vmstate_i2c_slave; diff --git a/include/hw/i2c/i2c_mux_pca954x.h b/include/hw/i2c/i2c_mux_pca954x.h new file mode 100644 index 00000000000..8aaf9bbc394 --- /dev/null +++ b/include/hw/i2c/i2c_mux_pca954x.h @@ -0,0 +1,19 @@ +#ifndef QEMU_I2C_MUX_PCA954X +#define QEMU_I2C_MUX_PCA954X + +#include "hw/i2c/i2c.h" + +#define TYPE_PCA9546 "pca9546" +#define TYPE_PCA9548 "pca9548" + +/** + * Retrieves the i2c bus associated with the specified channel on this i2c + * mux. + * @mux: an i2c mux device. + * @channel: the i2c channel requested + * + * Returns: a pointer to the associated i2c bus. + */ +I2CBus *pca954x_i2c_get_bus(I2CSlave *mux, uint8_t channel); + +#endif diff --git a/include/hw/i2c/pmbus_device.h b/include/hw/i2c/pmbus_device.h new file mode 100644 index 00000000000..62bd38c83fb --- /dev/null +++ b/include/hw/i2c/pmbus_device.h @@ -0,0 +1,517 @@ +/* + * QEMU PMBus device emulation + * + * Copyright 2021 Google LLC + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_PMBUS_DEVICE_H +#define HW_PMBUS_DEVICE_H + +#include "qemu/bitops.h" +#include "hw/i2c/smbus_slave.h" + +enum pmbus_registers { + PMBUS_PAGE = 0x00, /* R/W byte */ + PMBUS_OPERATION = 0x01, /* R/W byte */ + PMBUS_ON_OFF_CONFIG = 0x02, /* R/W byte */ + PMBUS_CLEAR_FAULTS = 0x03, /* Send Byte */ + PMBUS_PHASE = 0x04, /* R/W byte */ + PMBUS_PAGE_PLUS_WRITE = 0x05, /* Block Write-only */ + PMBUS_PAGE_PLUS_READ = 0x06, /* Block Read-only */ + PMBUS_WRITE_PROTECT = 0x10, /* R/W byte */ + PMBUS_STORE_DEFAULT_ALL = 0x11, /* Send Byte */ + PMBUS_RESTORE_DEFAULT_ALL = 0x12, /* Send Byte */ + PMBUS_STORE_DEFAULT_CODE = 0x13, /* Write-only Byte */ + PMBUS_RESTORE_DEFAULT_CODE = 0x14, /* Write-only Byte */ + PMBUS_STORE_USER_ALL = 0x15, /* Send Byte */ + PMBUS_RESTORE_USER_ALL = 0x16, /* Send Byte */ + PMBUS_STORE_USER_CODE = 0x17, /* Write-only Byte */ + PMBUS_RESTORE_USER_CODE = 0x18, /* Write-only Byte */ + PMBUS_CAPABILITY = 0x19, /* Read-Only byte */ + PMBUS_QUERY = 0x1A, /* Write-Only */ + PMBUS_SMBALERT_MASK = 0x1B, /* Block read, Word write */ + PMBUS_VOUT_MODE = 0x20, /* R/W byte */ + PMBUS_VOUT_COMMAND = 0x21, /* R/W word */ + PMBUS_VOUT_TRIM = 0x22, /* R/W word */ + PMBUS_VOUT_CAL_OFFSET = 0x23, /* R/W word */ + PMBUS_VOUT_MAX = 0x24, /* R/W word */ + PMBUS_VOUT_MARGIN_HIGH = 0x25, /* R/W word */ + PMBUS_VOUT_MARGIN_LOW = 0x26, /* R/W word */ + PMBUS_VOUT_TRANSITION_RATE = 0x27, /* R/W word */ + PMBUS_VOUT_DROOP = 0x28, /* R/W word */ + PMBUS_VOUT_SCALE_LOOP = 0x29, /* R/W word */ + PMBUS_VOUT_SCALE_MONITOR = 0x2A, /* R/W word */ + PMBUS_COEFFICIENTS = 0x30, /* Read-only block 5 bytes */ + PMBUS_POUT_MAX = 0x31, /* R/W word */ + PMBUS_MAX_DUTY = 0x32, /* R/W word */ + PMBUS_FREQUENCY_SWITCH = 0x33, /* R/W word */ + PMBUS_VIN_ON = 0x35, /* R/W word */ + PMBUS_VIN_OFF = 0x36, /* R/W word */ + PMBUS_INTERLEAVE = 0x37, /* R/W word */ + PMBUS_IOUT_CAL_GAIN = 0x38, /* R/W word */ + PMBUS_IOUT_CAL_OFFSET = 0x39, /* R/W word */ + PMBUS_FAN_CONFIG_1_2 = 0x3A, /* R/W byte */ + PMBUS_FAN_COMMAND_1 = 0x3B, /* R/W word */ + PMBUS_FAN_COMMAND_2 = 0x3C, /* R/W word */ + PMBUS_FAN_CONFIG_3_4 = 0x3D, /* R/W byte */ + PMBUS_FAN_COMMAND_3 = 0x3E, /* R/W word */ + PMBUS_FAN_COMMAND_4 = 0x3F, /* R/W word */ + PMBUS_VOUT_OV_FAULT_LIMIT = 0x40, /* R/W word */ + PMBUS_VOUT_OV_FAULT_RESPONSE = 0x41, /* R/W byte */ + PMBUS_VOUT_OV_WARN_LIMIT = 0x42, /* R/W word */ + PMBUS_VOUT_UV_WARN_LIMIT = 0x43, /* R/W word */ + PMBUS_VOUT_UV_FAULT_LIMIT = 0x44, /* R/W word */ + PMBUS_VOUT_UV_FAULT_RESPONSE = 0x45, /* R/W byte */ + PMBUS_IOUT_OC_FAULT_LIMIT = 0x46, /* R/W word */ + PMBUS_IOUT_OC_FAULT_RESPONSE = 0x47, /* R/W byte */ + PMBUS_IOUT_OC_LV_FAULT_LIMIT = 0x48, /* R/W word */ + PMBUS_IOUT_OC_LV_FAULT_RESPONSE = 0x49, /* R/W byte */ + PMBUS_IOUT_OC_WARN_LIMIT = 0x4A, /* R/W word */ + PMBUS_IOUT_UC_FAULT_LIMIT = 0x4B, /* R/W word */ + PMBUS_IOUT_UC_FAULT_RESPONSE = 0x4C, /* R/W byte */ + PMBUS_OT_FAULT_LIMIT = 0x4F, /* R/W word */ + PMBUS_OT_FAULT_RESPONSE = 0x50, /* R/W byte */ + PMBUS_OT_WARN_LIMIT = 0x51, /* R/W word */ + PMBUS_UT_WARN_LIMIT = 0x52, /* R/W word */ + PMBUS_UT_FAULT_LIMIT = 0x53, /* R/W word */ + PMBUS_UT_FAULT_RESPONSE = 0x54, /* R/W byte */ + PMBUS_VIN_OV_FAULT_LIMIT = 0x55, /* R/W word */ + PMBUS_VIN_OV_FAULT_RESPONSE = 0x56, /* R/W byte */ + PMBUS_VIN_OV_WARN_LIMIT = 0x57, /* R/W word */ + PMBUS_VIN_UV_WARN_LIMIT = 0x58, /* R/W word */ + PMBUS_VIN_UV_FAULT_LIMIT = 0x59, /* R/W word */ + PMBUS_VIN_UV_FAULT_RESPONSE = 0x5A, /* R/W byte */ + PMBUS_IIN_OC_FAULT_LIMIT = 0x5B, /* R/W word */ + PMBUS_IIN_OC_FAULT_RESPONSE = 0x5C, /* R/W byte */ + PMBUS_IIN_OC_WARN_LIMIT = 0x5D, /* R/W word */ + PMBUS_POWER_GOOD_ON = 0x5E, /* R/W word */ + PMBUS_POWER_GOOD_OFF = 0x5F, /* R/W word */ + PMBUS_TON_DELAY = 0x60, /* R/W word */ + PMBUS_TON_RISE = 0x61, /* R/W word */ + PMBUS_TON_MAX_FAULT_LIMIT = 0x62, /* R/W word */ + PMBUS_TON_MAX_FAULT_RESPONSE = 0x63, /* R/W byte */ + PMBUS_TOFF_DELAY = 0x64, /* R/W word */ + PMBUS_TOFF_FALL = 0x65, /* R/W word */ + PMBUS_TOFF_MAX_WARN_LIMIT = 0x66, /* R/W word */ + PMBUS_POUT_OP_FAULT_LIMIT = 0x68, /* R/W word */ + PMBUS_POUT_OP_FAULT_RESPONSE = 0x69, /* R/W byte */ + PMBUS_POUT_OP_WARN_LIMIT = 0x6A, /* R/W word */ + PMBUS_PIN_OP_WARN_LIMIT = 0x6B, /* R/W word */ + PMBUS_STATUS_BYTE = 0x78, /* R/W byte */ + PMBUS_STATUS_WORD = 0x79, /* R/W word */ + PMBUS_STATUS_VOUT = 0x7A, /* R/W byte */ + PMBUS_STATUS_IOUT = 0x7B, /* R/W byte */ + PMBUS_STATUS_INPUT = 0x7C, /* R/W byte */ + PMBUS_STATUS_TEMPERATURE = 0x7D, /* R/W byte */ + PMBUS_STATUS_CML = 0x7E, /* R/W byte */ + PMBUS_STATUS_OTHER = 0x7F, /* R/W byte */ + PMBUS_STATUS_MFR_SPECIFIC = 0x80, /* R/W byte */ + PMBUS_STATUS_FANS_1_2 = 0x81, /* R/W byte */ + PMBUS_STATUS_FANS_3_4 = 0x82, /* R/W byte */ + PMBUS_READ_EIN = 0x86, /* Read-Only block 5 bytes */ + PMBUS_READ_EOUT = 0x87, /* Read-Only block 5 bytes */ + PMBUS_READ_VIN = 0x88, /* Read-Only word */ + PMBUS_READ_IIN = 0x89, /* Read-Only word */ + PMBUS_READ_VCAP = 0x8A, /* Read-Only word */ + PMBUS_READ_VOUT = 0x8B, /* Read-Only word */ + PMBUS_READ_IOUT = 0x8C, /* Read-Only word */ + PMBUS_READ_TEMPERATURE_1 = 0x8D, /* Read-Only word */ + PMBUS_READ_TEMPERATURE_2 = 0x8E, /* Read-Only word */ + PMBUS_READ_TEMPERATURE_3 = 0x8F, /* Read-Only word */ + PMBUS_READ_FAN_SPEED_1 = 0x90, /* Read-Only word */ + PMBUS_READ_FAN_SPEED_2 = 0x91, /* Read-Only word */ + PMBUS_READ_FAN_SPEED_3 = 0x92, /* Read-Only word */ + PMBUS_READ_FAN_SPEED_4 = 0x93, /* Read-Only word */ + PMBUS_READ_DUTY_CYCLE = 0x94, /* Read-Only word */ + PMBUS_READ_FREQUENCY = 0x95, /* Read-Only word */ + PMBUS_READ_POUT = 0x96, /* Read-Only word */ + PMBUS_READ_PIN = 0x97, /* Read-Only word */ + PMBUS_REVISION = 0x98, /* Read-Only byte */ + PMBUS_MFR_ID = 0x99, /* R/W block */ + PMBUS_MFR_MODEL = 0x9A, /* R/W block */ + PMBUS_MFR_REVISION = 0x9B, /* R/W block */ + PMBUS_MFR_LOCATION = 0x9C, /* R/W block */ + PMBUS_MFR_DATE = 0x9D, /* R/W block */ + PMBUS_MFR_SERIAL = 0x9E, /* R/W block */ + PMBUS_APP_PROFILE_SUPPORT = 0x9F, /* Read-Only block-read */ + PMBUS_MFR_VIN_MIN = 0xA0, /* Read-Only word */ + PMBUS_MFR_VIN_MAX = 0xA1, /* Read-Only word */ + PMBUS_MFR_IIN_MAX = 0xA2, /* Read-Only word */ + PMBUS_MFR_PIN_MAX = 0xA3, /* Read-Only word */ + PMBUS_MFR_VOUT_MIN = 0xA4, /* Read-Only word */ + PMBUS_MFR_VOUT_MAX = 0xA5, /* Read-Only word */ + PMBUS_MFR_IOUT_MAX = 0xA6, /* Read-Only word */ + PMBUS_MFR_POUT_MAX = 0xA7, /* Read-Only word */ + PMBUS_MFR_TAMBIENT_MAX = 0xA8, /* Read-Only word */ + PMBUS_MFR_TAMBIENT_MIN = 0xA9, /* Read-Only word */ + PMBUS_MFR_EFFICIENCY_LL = 0xAA, /* Read-Only block 14 bytes */ + PMBUS_MFR_EFFICIENCY_HL = 0xAB, /* Read-Only block 14 bytes */ + PMBUS_MFR_PIN_ACCURACY = 0xAC, /* Read-Only byte */ + PMBUS_IC_DEVICE_ID = 0xAD, /* Read-Only block-read */ + PMBUS_IC_DEVICE_REV = 0xAE, /* Read-Only block-read */ + PMBUS_MFR_MAX_TEMP_1 = 0xC0, /* R/W word */ + PMBUS_MFR_MAX_TEMP_2 = 0xC1, /* R/W word */ + PMBUS_MFR_MAX_TEMP_3 = 0xC2, /* R/W word */ +}; + +/* STATUS_WORD */ +#define PB_STATUS_VOUT BIT(15) +#define PB_STATUS_IOUT_POUT BIT(14) +#define PB_STATUS_INPUT BIT(13) +#define PB_STATUS_WORD_MFR BIT(12) +#define PB_STATUS_POWER_GOOD_N BIT(11) +#define PB_STATUS_FAN BIT(10) +#define PB_STATUS_OTHER BIT(9) +#define PB_STATUS_UNKNOWN BIT(8) +/* STATUS_BYTE */ +#define PB_STATUS_BUSY BIT(7) +#define PB_STATUS_OFF BIT(6) +#define PB_STATUS_VOUT_OV BIT(5) +#define PB_STATUS_IOUT_OC BIT(4) +#define PB_STATUS_VIN_UV BIT(3) +#define PB_STATUS_TEMPERATURE BIT(2) +#define PB_STATUS_CML BIT(1) +#define PB_STATUS_NONE_ABOVE BIT(0) + +/* STATUS_VOUT */ +#define PB_STATUS_VOUT_OV_FAULT BIT(7) /* Output Overvoltage Fault */ +#define PB_STATUS_VOUT_OV_WARN BIT(6) /* Output Overvoltage Warning */ +#define PB_STATUS_VOUT_UV_WARN BIT(5) /* Output Undervoltage Warning */ +#define PB_STATUS_VOUT_UV_FAULT BIT(4) /* Output Undervoltage Fault */ +#define PB_STATUS_VOUT_MAX BIT(3) +#define PB_STATUS_VOUT_TON_MAX_FAULT BIT(2) +#define PB_STATUS_VOUT_TOFF_MAX_WARN BIT(1) + +/* STATUS_IOUT */ +#define PB_STATUS_IOUT_OC_FAULT BIT(7) /* Output Overcurrent Fault */ +#define PB_STATUS_IOUT_OC_LV_FAULT BIT(6) /* Output OC And Low Voltage Fault */ +#define PB_STATUS_IOUT_OC_WARN BIT(5) /* Output Overcurrent Warning */ +#define PB_STATUS_IOUT_UC_FAULT BIT(4) /* Output Undercurrent Fault */ +#define PB_STATUS_CURR_SHARE BIT(3) /* Current Share Fault */ +#define PB_STATUS_PWR_LIM_MODE BIT(2) /* In Power Limiting Mode */ +#define PB_STATUS_POUT_OP_FAULT BIT(1) /* Output Overpower Fault */ +#define PB_STATUS_POUT_OP_WARN BIT(0) /* Output Overpower Warning */ + +/* STATUS_INPUT */ +#define PB_STATUS_INPUT_VIN_OV_FAULT BIT(7) /* Input Overvoltage Fault */ +#define PB_STATUS_INPUT_VIN_OV_WARN BIT(6) /* Input Overvoltage Warning */ +#define PB_STATUS_INPUT_VIN_UV_WARN BIT(5) /* Input Undervoltage Warning */ +#define PB_STATUS_INPUT_VIN_UV_FAULT BIT(4) /* Input Undervoltage Fault */ +#define PB_STATUS_INPUT_IIN_OC_FAULT BIT(2) /* Input Overcurrent Fault */ +#define PB_STATUS_INPUT_IIN_OC_WARN BIT(1) /* Input Overcurrent Warning */ +#define PB_STATUS_INPUT_PIN_OP_WARN BIT(0) /* Input Overpower Warning */ + +/* STATUS_TEMPERATURE */ +#define PB_STATUS_OT_FAULT BIT(7) /* Overtemperature Fault */ +#define PB_STATUS_OT_WARN BIT(6) /* Overtemperature Warning */ +#define PB_STATUS_UT_WARN BIT(5) /* Undertemperature Warning */ +#define PB_STATUS_UT_FAULT BIT(4) /* Undertemperature Fault */ + +/* STATUS_CML */ +#define PB_CML_FAULT_INVALID_CMD BIT(7) /* Invalid/Unsupported Command */ +#define PB_CML_FAULT_INVALID_DATA BIT(6) /* Invalid/Unsupported Data */ +#define PB_CML_FAULT_PEC BIT(5) /* Packet Error Check Failed */ +#define PB_CML_FAULT_MEMORY BIT(4) /* Memory Fault Detected */ +#define PB_CML_FAULT_PROCESSOR BIT(3) /* Processor Fault Detected */ +#define PB_CML_FAULT_OTHER_COMM BIT(1) /* Other communication fault */ +#define PB_CML_FAULT_OTHER_MEM_LOGIC BIT(0) /* Other Memory Or Logic Fault */ + +/* OPERATION*/ +#define PB_OP_ON BIT(7) /* PSU is switched on */ +#define PB_OP_MARGIN_HIGH BIT(5) /* PSU vout is set to margin high */ +#define PB_OP_MARGIN_LOW BIT(4) /* PSU vout is set to margin low */ + +/* PAGES */ +#define PB_MAX_PAGES 0x1F +#define PB_ALL_PAGES 0xFF + +#define TYPE_PMBUS_DEVICE "pmbus-device" +OBJECT_DECLARE_TYPE(PMBusDevice, PMBusDeviceClass, + PMBUS_DEVICE) + +/* flags */ +#define PB_HAS_COEFFICIENTS BIT_ULL(9) +#define PB_HAS_VIN BIT_ULL(10) +#define PB_HAS_VOUT BIT_ULL(11) +#define PB_HAS_VOUT_MARGIN BIT_ULL(12) +#define PB_HAS_VIN_RATING BIT_ULL(13) +#define PB_HAS_VOUT_RATING BIT_ULL(14) +#define PB_HAS_VOUT_MODE BIT_ULL(15) +#define PB_HAS_IOUT BIT_ULL(21) +#define PB_HAS_IIN BIT_ULL(22) +#define PB_HAS_IOUT_RATING BIT_ULL(23) +#define PB_HAS_IIN_RATING BIT_ULL(24) +#define PB_HAS_IOUT_GAIN BIT_ULL(25) +#define PB_HAS_POUT BIT_ULL(30) +#define PB_HAS_PIN BIT_ULL(31) +#define PB_HAS_EIN BIT_ULL(32) +#define PB_HAS_EOUT BIT_ULL(33) +#define PB_HAS_POUT_RATING BIT_ULL(34) +#define PB_HAS_PIN_RATING BIT_ULL(35) +#define PB_HAS_TEMPERATURE BIT_ULL(40) +#define PB_HAS_TEMP2 BIT_ULL(41) +#define PB_HAS_TEMP3 BIT_ULL(42) +#define PB_HAS_TEMP_RATING BIT_ULL(43) +#define PB_HAS_MFR_INFO BIT_ULL(50) + +struct PMBusDeviceClass { + SMBusDeviceClass parent_class; + uint8_t device_num_pages; + + /** + * Implement quick_cmd, receive byte, and write_data to support non-standard + * PMBus functionality + */ + void (*quick_cmd)(PMBusDevice *dev, uint8_t read); + int (*write_data)(PMBusDevice *dev, const uint8_t *buf, uint8_t len); + uint8_t (*receive_byte)(PMBusDevice *dev); +}; + +/* + * According to the spec, each page may offer the full range of PMBus commands + * available for each output or non-PMBus device. + * Therefore, we can't assume that any registers will always be the same across + * all pages. + * The page 0xFF is intended for writes to all pages + */ +typedef struct PMBusPage { + uint64_t page_flags; + + uint8_t page; /* R/W byte */ + uint8_t operation; /* R/W byte */ + uint8_t on_off_config; /* R/W byte */ + uint8_t write_protect; /* R/W byte */ + uint8_t phase; /* R/W byte */ + uint8_t vout_mode; /* R/W byte */ + uint16_t vout_command; /* R/W word */ + uint16_t vout_trim; /* R/W word */ + uint16_t vout_cal_offset; /* R/W word */ + uint16_t vout_max; /* R/W word */ + uint16_t vout_margin_high; /* R/W word */ + uint16_t vout_margin_low; /* R/W word */ + uint16_t vout_transition_rate; /* R/W word */ + uint16_t vout_droop; /* R/W word */ + uint16_t vout_scale_loop; /* R/W word */ + uint16_t vout_scale_monitor; /* R/W word */ + uint8_t coefficients[5]; /* Read-only block 5 bytes */ + uint16_t pout_max; /* R/W word */ + uint16_t max_duty; /* R/W word */ + uint16_t frequency_switch; /* R/W word */ + uint16_t vin_on; /* R/W word */ + uint16_t vin_off; /* R/W word */ + uint16_t iout_cal_gain; /* R/W word */ + uint16_t iout_cal_offset; /* R/W word */ + uint8_t fan_config_1_2; /* R/W byte */ + uint16_t fan_command_1; /* R/W word */ + uint16_t fan_command_2; /* R/W word */ + uint8_t fan_config_3_4; /* R/W byte */ + uint16_t fan_command_3; /* R/W word */ + uint16_t fan_command_4; /* R/W word */ + uint16_t vout_ov_fault_limit; /* R/W word */ + uint8_t vout_ov_fault_response; /* R/W byte */ + uint16_t vout_ov_warn_limit; /* R/W word */ + uint16_t vout_uv_warn_limit; /* R/W word */ + uint16_t vout_uv_fault_limit; /* R/W word */ + uint8_t vout_uv_fault_response; /* R/W byte */ + uint16_t iout_oc_fault_limit; /* R/W word */ + uint8_t iout_oc_fault_response; /* R/W byte */ + uint16_t iout_oc_lv_fault_limit; /* R/W word */ + uint8_t iout_oc_lv_fault_response; /* R/W byte */ + uint16_t iout_oc_warn_limit; /* R/W word */ + uint16_t iout_uc_fault_limit; /* R/W word */ + uint8_t iout_uc_fault_response; /* R/W byte */ + uint16_t ot_fault_limit; /* R/W word */ + uint8_t ot_fault_response; /* R/W byte */ + uint16_t ot_warn_limit; /* R/W word */ + uint16_t ut_warn_limit; /* R/W word */ + uint16_t ut_fault_limit; /* R/W word */ + uint8_t ut_fault_response; /* R/W byte */ + uint16_t vin_ov_fault_limit; /* R/W word */ + uint8_t vin_ov_fault_response; /* R/W byte */ + uint16_t vin_ov_warn_limit; /* R/W word */ + uint16_t vin_uv_warn_limit; /* R/W word */ + uint16_t vin_uv_fault_limit; /* R/W word */ + uint8_t vin_uv_fault_response; /* R/W byte */ + uint16_t iin_oc_fault_limit; /* R/W word */ + uint8_t iin_oc_fault_response; /* R/W byte */ + uint16_t iin_oc_warn_limit; /* R/W word */ + uint16_t power_good_on; /* R/W word */ + uint16_t power_good_off; /* R/W word */ + uint16_t ton_delay; /* R/W word */ + uint16_t ton_rise; /* R/W word */ + uint16_t ton_max_fault_limit; /* R/W word */ + uint8_t ton_max_fault_response; /* R/W byte */ + uint16_t toff_delay; /* R/W word */ + uint16_t toff_fall; /* R/W word */ + uint16_t toff_max_warn_limit; /* R/W word */ + uint16_t pout_op_fault_limit; /* R/W word */ + uint8_t pout_op_fault_response; /* R/W byte */ + uint16_t pout_op_warn_limit; /* R/W word */ + uint16_t pin_op_warn_limit; /* R/W word */ + uint16_t status_word; /* R/W word */ + uint8_t status_vout; /* R/W byte */ + uint8_t status_iout; /* R/W byte */ + uint8_t status_input; /* R/W byte */ + uint8_t status_temperature; /* R/W byte */ + uint8_t status_cml; /* R/W byte */ + uint8_t status_other; /* R/W byte */ + uint8_t status_mfr_specific; /* R/W byte */ + uint8_t status_fans_1_2; /* R/W byte */ + uint8_t status_fans_3_4; /* R/W byte */ + uint8_t read_ein[5]; /* Read-Only block 5 bytes */ + uint8_t read_eout[5]; /* Read-Only block 5 bytes */ + uint16_t read_vin; /* Read-Only word */ + uint16_t read_iin; /* Read-Only word */ + uint16_t read_vcap; /* Read-Only word */ + uint16_t read_vout; /* Read-Only word */ + uint16_t read_iout; /* Read-Only word */ + uint16_t read_temperature_1; /* Read-Only word */ + uint16_t read_temperature_2; /* Read-Only word */ + uint16_t read_temperature_3; /* Read-Only word */ + uint16_t read_fan_speed_1; /* Read-Only word */ + uint16_t read_fan_speed_2; /* Read-Only word */ + uint16_t read_fan_speed_3; /* Read-Only word */ + uint16_t read_fan_speed_4; /* Read-Only word */ + uint16_t read_duty_cycle; /* Read-Only word */ + uint16_t read_frequency; /* Read-Only word */ + uint16_t read_pout; /* Read-Only word */ + uint16_t read_pin; /* Read-Only word */ + uint8_t revision; /* Read-Only byte */ + const char *mfr_id; /* R/W block */ + const char *mfr_model; /* R/W block */ + const char *mfr_revision; /* R/W block */ + const char *mfr_location; /* R/W block */ + const char *mfr_date; /* R/W block */ + const char *mfr_serial; /* R/W block */ + const char *app_profile_support; /* Read-Only block-read */ + uint16_t mfr_vin_min; /* Read-Only word */ + uint16_t mfr_vin_max; /* Read-Only word */ + uint16_t mfr_iin_max; /* Read-Only word */ + uint16_t mfr_pin_max; /* Read-Only word */ + uint16_t mfr_vout_min; /* Read-Only word */ + uint16_t mfr_vout_max; /* Read-Only word */ + uint16_t mfr_iout_max; /* Read-Only word */ + uint16_t mfr_pout_max; /* Read-Only word */ + uint16_t mfr_tambient_max; /* Read-Only word */ + uint16_t mfr_tambient_min; /* Read-Only word */ + uint8_t mfr_efficiency_ll[14]; /* Read-Only block 14 bytes */ + uint8_t mfr_efficiency_hl[14]; /* Read-Only block 14 bytes */ + uint8_t mfr_pin_accuracy; /* Read-Only byte */ + uint16_t mfr_max_temp_1; /* R/W word */ + uint16_t mfr_max_temp_2; /* R/W word */ + uint16_t mfr_max_temp_3; /* R/W word */ +} PMBusPage; + +/* State */ +struct PMBusDevice { + SMBusDevice smb; + + uint8_t num_pages; + uint8_t code; + uint8_t page; + + /* + * PMBus registers are stored in a PMBusPage structure allocated by + * calling pmbus_pages_alloc() + */ + PMBusPage *pages; + uint8_t capability; + + + int32_t in_buf_len; + uint8_t *in_buf; + int32_t out_buf_len; + uint8_t out_buf[SMBUS_DATA_MAX_LEN]; +}; + +/** + * Direct mode coefficients + * @var m - mantissa + * @var b - offset + * @var R - exponent + */ +typedef struct PMBusCoefficients { + int32_t m; /* mantissa */ + int64_t b; /* offset */ + int32_t R; /* exponent */ +} PMBusCoefficients; + +/** + * Convert sensor values to direct mode format + * + * Y = (m * x - b) * 10^R + * + * @return uint32_t + */ +uint16_t pmbus_data2direct_mode(PMBusCoefficients c, uint32_t value); + +/** + * Convert direct mode formatted data into sensor reading + * + * X = (Y * 10^-R - b) / m + * + * @return uint32_t + */ +uint32_t pmbus_direct_mode2data(PMBusCoefficients c, uint16_t value); + +/** + * @brief Send a block of data over PMBus + * Assumes that the bytes in the block are already ordered correctly, + * also assumes the length has been prepended to the block if necessary + * | low_byte | ... | high_byte | + * @param state - maintains state of the PMBus device + * @param data - byte array to be sent by device + * @param len - number + */ +void pmbus_send(PMBusDevice *state, const uint8_t *data, uint16_t len); +void pmbus_send8(PMBusDevice *state, uint8_t data); +void pmbus_send16(PMBusDevice *state, uint16_t data); +void pmbus_send32(PMBusDevice *state, uint32_t data); +void pmbus_send64(PMBusDevice *state, uint64_t data); + +/** + * @brief Send a string over PMBus with length prepended. + * Length is calculated using str_len() + */ +void pmbus_send_string(PMBusDevice *state, const char *data); + +/** + * @brief Receive data over PMBus + * These methods help track how much data is being received over PMBus + * Log to GUEST_ERROR if too much or too little is sent. + */ +uint8_t pmbus_receive8(PMBusDevice *pmdev); +uint16_t pmbus_receive16(PMBusDevice *pmdev); +uint32_t pmbus_receive32(PMBusDevice *pmdev); +uint64_t pmbus_receive64(PMBusDevice *pmdev); + +/** + * PMBus page config must be called before any page is first used. + * It will allocate memory for all the pages if needed. + * Passed in flags overwrite existing flags if any. + * @param page_index the page to which the flags are applied, setting page_index + * to 0xFF applies the passed in flags to all pages. + * @param flags + */ +int pmbus_page_config(PMBusDevice *pmdev, uint8_t page_index, uint64_t flags); + +/** + * Update the status registers when sensor values change. + * Useful if modifying sensors through qmp, this way status registers get + * updated + */ +void pmbus_check_limits(PMBusDevice *pmdev); + +extern const VMStateDescription vmstate_pmbus_device; + +#define VMSTATE_PMBUS_DEVICE(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(PMBusDevice), \ + .vmsd = &vmstate_pmbus_device, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, PMBusDevice), \ +} + +#endif diff --git a/include/hw/i386/hostmem-epc.h b/include/hw/i386/hostmem-epc.h new file mode 100644 index 00000000000..846c7260854 --- /dev/null +++ b/include/hw/i386/hostmem-epc.h @@ -0,0 +1,28 @@ +/* + * SGX EPC backend + * + * Copyright (C) 2019 Intel Corporation + * + * Authors: + * Sean Christopherson + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_HOSTMEM_EPC_H +#define QEMU_HOSTMEM_EPC_H + +#include "sysemu/hostmem.h" + +#define TYPE_MEMORY_BACKEND_EPC "memory-backend-epc" + +#define MEMORY_BACKEND_EPC(obj) \ + OBJECT_CHECK(HostMemoryBackendEpc, (obj), TYPE_MEMORY_BACKEND_EPC) + +typedef struct HostMemoryBackendEpc HostMemoryBackendEpc; + +struct HostMemoryBackendEpc { + HostMemoryBackend parent_obj; +}; + +#endif diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h index f25f8374413..4d9c732d4b2 100644 --- a/include/hw/i386/microvm.h +++ b/include/hw/i386/microvm.h @@ -104,6 +104,10 @@ struct MicrovmMachineState { Notifier machine_done; Notifier powerdown_req; struct GPEXConfig gpex; + + /* device tree */ + void *fdt; + uint32_t ioapic_phandle[2]; }; #define TYPE_MICROVM_MACHINE MACHINE_TYPE_NAME("microvm") diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index dcf060b7918..9ab39e428f8 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -12,6 +12,7 @@ #include "hw/acpi/acpi_dev_interface.h" #include "hw/hotplug.h" #include "qom/object.h" +#include "hw/i386/sgx-epc.h" #define HPET_INTCAP "hpet-intcap" @@ -19,7 +20,6 @@ * PCMachineState: * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling * @boot_cpus: number of present VCPUs - * @smp_dies: number of dies per one package */ typedef struct PCMachineState { /*< private >*/ @@ -35,6 +35,7 @@ typedef struct PCMachineState { I2CBus *smbus; PFlashCFI01 *flash[2]; ISADevice *pcspk; + DeviceState *iommu; /* Configuration options: */ uint64_t max_ram_below_4g; @@ -45,14 +46,13 @@ typedef struct PCMachineState { bool sata_enabled; bool pit_enabled; bool hpet_enabled; + bool default_bus_bypass_iommu; uint64_t max_fw_size; - /* NUMA information: */ - uint64_t numa_nodes; - uint64_t *node_mem; - /* ACPI Memory hotplug IO base address */ hwaddr memhp_io_base; + + SGXEPCState sgx_epc; } PCMachineState; #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" @@ -117,9 +117,6 @@ struct PCMachineClass { /* generate legacy CPU hotplug AML */ bool legacy_cpu_hotplug; - /* use DMA capable linuxboot option rom */ - bool linuxboot_dma_enabled; - /* use PVH to load kernels that support this feature */ bool pvh_enabled; @@ -139,8 +136,6 @@ extern int fd_bootchk; void pc_acpi_smi_interrupt(void *opaque, int irq, int level); -void pc_smp_parse(MachineState *ms, QemuOpts *opts); - void pc_guest_info_init(PCMachineState *pcms); #define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" @@ -191,11 +186,21 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms); void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory); bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, int *data_len); +void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size); - -/* acpi-build.c */ +/* hw/i386/acpi-common.c */ void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *apic_ids, GArray *entry); + const CPUArchIdList *apic_ids, GArray *entry, + bool force_enabled); + +/* sgx.c */ +void pc_machine_init_sgx_epc(PCMachineState *pcms); + +extern GlobalProperty pc_compat_6_1[]; +extern const size_t pc_compat_6_1_len; + +extern GlobalProperty pc_compat_6_0[]; +extern const size_t pc_compat_6_0_len; extern GlobalProperty pc_compat_5_2[]; extern const size_t pc_compat_5_2_len; diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h new file mode 100644 index 00000000000..a6a65be854f --- /dev/null +++ b/include/hw/i386/sgx-epc.h @@ -0,0 +1,67 @@ +/* + * SGX EPC device + * + * Copyright (C) 2019 Intel Corporation + * + * Authors: + * Sean Christopherson + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_SGX_EPC_H +#define QEMU_SGX_EPC_H + +#include "hw/i386/hostmem-epc.h" + +#define TYPE_SGX_EPC "sgx-epc" +#define SGX_EPC(obj) \ + OBJECT_CHECK(SGXEPCDevice, (obj), TYPE_SGX_EPC) +#define SGX_EPC_CLASS(oc) \ + OBJECT_CLASS_CHECK(SGXEPCDeviceClass, (oc), TYPE_SGX_EPC) +#define SGX_EPC_GET_CLASS(obj) \ + OBJECT_GET_CLASS(SGXEPCDeviceClass, (obj), TYPE_SGX_EPC) + +#define SGX_EPC_ADDR_PROP "addr" +#define SGX_EPC_SIZE_PROP "size" +#define SGX_EPC_MEMDEV_PROP "memdev" + +/** + * SGXEPCDevice: + * @addr: starting guest physical address, where @SGXEPCDevice is mapped. + * Default value: 0, means that address is auto-allocated. + * @hostmem: host memory backend providing memory for @SGXEPCDevice + */ +typedef struct SGXEPCDevice { + /* private */ + DeviceState parent_obj; + + /* public */ + uint64_t addr; + HostMemoryBackendEpc *hostmem; +} SGXEPCDevice; + +/* + * @base: address in guest physical address space where EPC regions start + * @mr: address space container for memory devices + */ +typedef struct SGXEPCState { + uint64_t base; + uint64_t size; + + MemoryRegion mr; + + struct SGXEPCDevice **sections; + int nr_sections; +} SGXEPCState; + +bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); + +static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) +{ + assert(sgx_epc != NULL && sgx_epc->base >= 0x100000000ULL); + + return sgx_epc->base + sgx_epc->size; +} + +#endif diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 9de92d33a11..5ba0c056d60 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -33,12 +33,6 @@ OBJECT_DECLARE_TYPE(X86IOMMUState, X86IOMMUClass, X86_IOMMU_DEVICE) typedef struct X86IOMMUIrq X86IOMMUIrq; typedef struct X86IOMMU_MSIMessage X86IOMMU_MSIMessage; -typedef enum IommuType { - TYPE_INTEL, - TYPE_AMD, - TYPE_NONE -} IommuType; - struct X86IOMMUClass { SysBusDeviceClass parent; /* Intel/AMD specific realize() hook */ @@ -71,7 +65,6 @@ struct X86IOMMUState { OnOffAuto intr_supported; /* Whether vIOMMU supports IR */ bool dt_supported; /* Whether vIOMMU supports DT */ bool pt_supported; /* Whether vIOMMU supports pass-through */ - IommuType type; /* IOMMU type - AMD/Intel */ QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ }; @@ -140,11 +133,6 @@ struct X86IOMMU_MSIMessage { */ X86IOMMUState *x86_iommu_get_default(void); -/* - * x86_iommu_get_type - get IOMMU type - */ -IommuType x86_iommu_get_type(void); - /** * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry * Cache) notifiers diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index c09b648dff2..bb1cfb88966 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -38,6 +38,8 @@ struct X86MachineClass { bool save_tsc_khz; /* Enables contiguous-apic-ID mode */ bool compat_apic_id_mode; + /* use DMA capable linuxboot option rom */ + bool fwcfg_dma_enabled; }; struct X86MachineState { @@ -62,7 +64,7 @@ struct X86MachineState { unsigned pci_irq_mask; unsigned apic_id_limit; uint16_t boot_cpus; - unsigned smp_dies; + SgxEPCList *sgx_epc_list; OnOffAuto smm; OnOffAuto acpi; @@ -74,12 +76,20 @@ struct X86MachineState { * will be translated to MSI messages in the address space. */ AddressSpace *ioapic_as; + + /* + * Ratelimit enforced on detected bus locks in guest. + * The default value of the bus_lock_ratelimit is 0 per second, + * which means no limitation on the guest's bus locks. + */ + uint64_t bus_lock_ratelimit; }; #define X86_MACHINE_SMM "smm" #define X86_MACHINE_ACPI "acpi" #define X86_MACHINE_OEM_ID "x-oem-id" #define X86_MACHINE_OEM_TABLE_ID "x-oem-table-id" +#define X86_MACHINE_BUS_LOCK_RATELIMIT "bus-lock-ratelimit" #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) @@ -112,8 +122,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, void x86_load_linux(X86MachineState *x86ms, FWCfgState *fw_cfg, int acpi_data_size, - bool pvh_enabled, - bool linuxboot_dma_enabled); + bool pvh_enabled); bool x86_machine_is_smm_enabled(const X86MachineState *x86ms); bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms); diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index 2d09162eeb7..97e7e59dc58 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -624,7 +624,7 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, int chs_trans, Error **errp); void ide_init2(IDEBus *bus, qemu_irq irq); void ide_exit(IDEState *s); -void ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2); +int ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2); void ide_register_restart_cb(IDEBus *bus); void ide_exec_cmd(IDEBus *bus, uint32_t val); @@ -648,8 +648,8 @@ void ide_atapi_cmd(IDEState *s); void ide_atapi_cmd_reply_end(IDEState *s); /* hw/ide/qdev.c */ -void ide_bus_new(IDEBus *idebus, size_t idebus_size, DeviceState *dev, - int bus_id, int max_units); +void ide_bus_init(IDEBus *idebus, size_t idebus_size, DeviceState *dev, + int bus_id, int max_units); IDEDevice *ide_create_drive(IDEBus *bus, int unit, DriveInfo *drive); int ide_handle_rw_error(IDEState *s, int error, int op); diff --git a/include/hw/input/lm832x.h b/include/hw/input/lm832x.h new file mode 100644 index 00000000000..2a58ccf8916 --- /dev/null +++ b/include/hw/input/lm832x.h @@ -0,0 +1,28 @@ +/* + * National Semiconductor LM8322/8323 GPIO keyboard & PWM chips. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_INPUT_LM832X +#define HW_INPUT_LM832X + +#define TYPE_LM8323 "lm8323" + +void lm832x_key_event(DeviceState *dev, int key, int state); + +#endif diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index 91491a2f664..fc38e4b7dca 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -36,6 +36,8 @@ #define GICV3_MAXIRQ 1020 #define GICV3_MAXSPI (GICV3_MAXIRQ - GIC_INTERNAL) +#define GICV3_LPI_INTID_START 8192 + #define GICV3_REDIST_SIZE 0x20000 /* Number of SGI target-list bits */ @@ -202,23 +204,41 @@ struct GICv3CPUState { * real state above; it doesn't need to be migrated. */ PendingIrq hppi; + + /* + * Cached information recalculated from LPI tables + * in guest memory + */ + PendingIrq hpplpi; + /* This is temporary working state, to avoid a malloc in gicv3_update() */ bool seenbetter; }; +/* + * The redistributor pages might be split into more than one region + * on some machine types if there are many CPUs. + */ +typedef struct GICv3RedistRegion { + GICv3State *gic; + MemoryRegion iomem; + uint32_t cpuidx; /* index of first CPU this region covers */ +} GICv3RedistRegion; + struct GICv3State { /*< private >*/ SysBusDevice parent_obj; /*< public >*/ MemoryRegion iomem_dist; /* Distributor */ - MemoryRegion *iomem_redist; /* Redistributor Regions */ + GICv3RedistRegion *redist_regions; /* Redistributor Regions */ uint32_t *redist_region_count; /* redistributor count within each region */ uint32_t nb_redist_regions; /* number of redist regions */ uint32_t num_cpu; uint32_t num_irq; uint32_t revision; + bool lpi_enable; bool security_extn; bool irq_reset_nonsecure; bool gicd_no_migration_shift_bug; @@ -226,6 +246,9 @@ struct GICv3State { int dev_fd; /* kvm device fd if backed by kvm vgic support */ Error *migration_blocker; + MemoryRegion *dma; + AddressSpace dma_as; + /* Distributor */ /* for a GIC with the security extensions the NS banked version of this @@ -293,6 +316,6 @@ struct ARMGICv3CommonClass { }; void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, - const MemoryRegionOps *ops, Error **errp); + const MemoryRegionOps *ops); #endif diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h index 5a0952b4049..4e79145dde3 100644 --- a/include/hw/intc/arm_gicv3_its_common.h +++ b/include/hw/intc/arm_gicv3_its_common.h @@ -25,17 +25,41 @@ #include "hw/intc/arm_gicv3_common.h" #include "qom/object.h" +#define TYPE_ARM_GICV3_ITS "arm-gicv3-its" + #define ITS_CONTROL_SIZE 0x10000 #define ITS_TRANS_SIZE 0x10000 #define ITS_SIZE (ITS_CONTROL_SIZE + ITS_TRANS_SIZE) #define GITS_CTLR 0x0 #define GITS_IIDR 0x4 +#define GITS_TYPER 0x8 #define GITS_CBASER 0x80 #define GITS_CWRITER 0x88 #define GITS_CREADR 0x90 #define GITS_BASER 0x100 +#define GITS_TRANSLATER 0x0040 + +typedef struct { + bool valid; + bool indirect; + uint16_t entry_sz; + uint32_t page_sz; + uint32_t max_entries; + union { + uint32_t max_devids; + uint32_t max_collids; + } maxids; + uint64_t base_addr; +} TableDesc; + +typedef struct { + bool valid; + uint32_t max_entries; + uint64_t base_addr; +} CmdQDesc; + struct GICv3ITSState { SysBusDevice parent_obj; @@ -52,17 +76,23 @@ struct GICv3ITSState { /* Registers */ uint32_t ctlr; uint32_t iidr; + uint64_t typer; uint64_t cbaser; uint64_t cwriter; uint64_t creadr; uint64_t baser[8]; + TableDesc dt; + TableDesc ct; + CmdQDesc cq; + Error *migration_blocker; }; typedef struct GICv3ITSState GICv3ITSState; -void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops); +void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops, + const MemoryRegionOps *tops); #define TYPE_ARM_GICV3_ITS_COMMON "arm-gicv3-its-common" typedef struct GICv3ITSCommonClass GICv3ITSCommonClass; diff --git a/include/hw/intc/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h index 39c71e15936..0180c7b0ca1 100644 --- a/include/hw/intc/armv7m_nvic.h +++ b/include/hw/intc/armv7m_nvic.h @@ -80,18 +80,10 @@ struct NVICState { int vectpending_prio; /* group prio of the exeception in vectpending */ MemoryRegion sysregmem; - MemoryRegion sysreg_ns_mem; - MemoryRegion systickmem; - MemoryRegion systick_ns_mem; - MemoryRegion ras_mem; - MemoryRegion container; - MemoryRegion defaultmem; uint32_t num_irq; qemu_irq excpout; qemu_irq sysresetreq; - - SysTickState systick[M_REG_NUM_BANKS]; }; #endif diff --git a/include/hw/intc/goldfish_pic.h b/include/hw/intc/goldfish_pic.h index ad13ab37fc3..e9d552f7968 100644 --- a/include/hw/intc/goldfish_pic.h +++ b/include/hw/intc/goldfish_pic.h @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * Goldfish PIC * diff --git a/include/hw/intc/ibex_plic.h b/include/hw/intc/ibex_plic.h index 7fc495db992..d596436e064 100644 --- a/include/hw/intc/ibex_plic.h +++ b/include/hw/intc/ibex_plic.h @@ -60,6 +60,8 @@ struct IbexPlicState { uint32_t threshold_base; uint32_t claim_base; + + qemu_irq *external_irqs; }; #endif /* HW_IBEX_PLIC_H */ diff --git a/include/hw/intc/m68k_irqc.h b/include/hw/intc/m68k_irqc.h index dbcfcfc2e00..ef91f218122 100644 --- a/include/hw/intc/m68k_irqc.h +++ b/include/hw/intc/m68k_irqc.h @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * QEMU Motorola 680x0 IRQ Controller * diff --git a/include/hw/intc/riscv_aclint.h b/include/hw/intc/riscv_aclint.h new file mode 100644 index 00000000000..229bd08d254 --- /dev/null +++ b/include/hw/intc/riscv_aclint.h @@ -0,0 +1,80 @@ +/* + * RISC-V ACLINT (Advanced Core Local Interruptor) interface + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017 SiFive, Inc. + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_RISCV_ACLINT_H +#define HW_RISCV_ACLINT_H + +#include "hw/sysbus.h" + +#define TYPE_RISCV_ACLINT_MTIMER "riscv.aclint.mtimer" + +#define RISCV_ACLINT_MTIMER(obj) \ + OBJECT_CHECK(RISCVAclintMTimerState, (obj), TYPE_RISCV_ACLINT_MTIMER) + +typedef struct RISCVAclintMTimerState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + uint32_t hartid_base; + uint32_t num_harts; + uint32_t timecmp_base; + uint32_t time_base; + uint32_t aperture_size; + uint32_t timebase_freq; + qemu_irq *timer_irqs; +} RISCVAclintMTimerState; + +DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size, + uint32_t hartid_base, uint32_t num_harts, + uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq, + bool provide_rdtime); + +#define TYPE_RISCV_ACLINT_SWI "riscv.aclint.swi" + +#define RISCV_ACLINT_SWI(obj) \ + OBJECT_CHECK(RISCVAclintSwiState, (obj), TYPE_RISCV_ACLINT_SWI) + +typedef struct RISCVAclintSwiState { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion mmio; + uint32_t hartid_base; + uint32_t num_harts; + uint32_t sswi; + qemu_irq *soft_irqs; +} RISCVAclintSwiState; + +DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base, + uint32_t num_harts, bool sswi); + +enum { + RISCV_ACLINT_DEFAULT_MTIMECMP = 0x0, + RISCV_ACLINT_DEFAULT_MTIME = 0x7ff8, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE = 0x8000, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ = 10000000, + RISCV_ACLINT_MAX_HARTS = 4095, + RISCV_ACLINT_SWI_SIZE = 0x4000 +}; + +#endif diff --git a/include/hw/intc/sifive_clint.h b/include/hw/intc/sifive_clint.h deleted file mode 100644 index a30be0f3d6f..00000000000 --- a/include/hw/intc/sifive_clint.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * SiFive CLINT (Core Local Interruptor) interface - * - * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu - * Copyright (c) 2017 SiFive, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2 or later, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#ifndef HW_SIFIVE_CLINT_H -#define HW_SIFIVE_CLINT_H - -#include "hw/sysbus.h" - -#define TYPE_SIFIVE_CLINT "riscv.sifive.clint" - -#define SIFIVE_CLINT(obj) \ - OBJECT_CHECK(SiFiveCLINTState, (obj), TYPE_SIFIVE_CLINT) - -typedef struct SiFiveCLINTState { - /*< private >*/ - SysBusDevice parent_obj; - - /*< public >*/ - MemoryRegion mmio; - uint32_t hartid_base; - uint32_t num_harts; - uint32_t sip_base; - uint32_t timecmp_base; - uint32_t time_base; - uint32_t aperture_size; - uint32_t timebase_freq; -} SiFiveCLINTState; - -DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, - uint32_t hartid_base, uint32_t num_harts, uint32_t sip_base, - uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq, - bool provide_rdtime); - -enum { - SIFIVE_SIP_BASE = 0x0, - SIFIVE_TIMECMP_BASE = 0x4000, - SIFIVE_TIME_BASE = 0xBFF8 -}; - -enum { - SIFIVE_CLINT_TIMEBASE_FREQ = 10000000 -}; - -#endif diff --git a/include/hw/intc/sifive_plic.h b/include/hw/intc/sifive_plic.h index 1e451a270c7..134cf39a96b 100644 --- a/include/hw/intc/sifive_plic.h +++ b/include/hw/intc/sifive_plic.h @@ -72,9 +72,13 @@ struct SiFivePLICState { uint32_t context_base; uint32_t context_stride; uint32_t aperture_size; + + qemu_irq *m_external_irqs; + qemu_irq *s_external_irqs; }; DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, + uint32_t num_harts, uint32_t hartid_base, uint32_t num_sources, uint32_t num_priorities, uint32_t priority_base, uint32_t pending_base, uint32_t enable_base, diff --git a/include/hw/ipack/ipack.h b/include/hw/ipack/ipack.h index 75014e74ae1..cbcdda509d3 100644 --- a/include/hw/ipack/ipack.h +++ b/include/hw/ipack/ipack.h @@ -73,9 +73,9 @@ extern const VMStateDescription vmstate_ipack_device; VMSTATE_STRUCT(_field, _state, 1, vmstate_ipack_device, IPackDevice) IPackDevice *ipack_device_find(IPackBus *bus, int32_t slot); -void ipack_bus_new_inplace(IPackBus *bus, size_t bus_size, - DeviceState *parent, - const char *name, uint8_t n_slots, - qemu_irq_handler handler); +void ipack_bus_init(IPackBus *bus, size_t bus_size, + DeviceState *parent, + uint8_t n_slots, + qemu_irq_handler handler); #endif diff --git a/include/hw/isa/isa.h b/include/hw/isa/isa.h index ddaae89a853..d4417b34b63 100644 --- a/include/hw/isa/isa.h +++ b/include/hw/isa/isa.h @@ -132,12 +132,15 @@ void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start); * @portio: the ports, sorted by offset. * @opaque: passed into the portio callbacks. * @name: passed into memory_region_init_io. + * + * Returns: 0 on success, negative error code otherwise (e.g. if the + * ISA bus is not available) */ -void isa_register_portio_list(ISADevice *dev, - PortioList *piolist, - uint16_t start, - const MemoryRegionPortio *portio, - void *opaque, const char *name); +int isa_register_portio_list(ISADevice *dev, + PortioList *piolist, + uint16_t start, + const MemoryRegionPortio *portio, + void *opaque, const char *name); static inline ISABus *isa_bus_from_device(ISADevice *d) { diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h index 9b6d610e838..56ac141be38 100644 --- a/include/hw/isa/vt82c686.h +++ b/include/hw/isa/vt82c686.h @@ -1,11 +1,15 @@ #ifndef HW_VT82C686_H #define HW_VT82C686_H +#include "hw/pci/pci.h" + #define TYPE_VT82C686B_ISA "vt82c686b-isa" -#define TYPE_VT82C686B_SUPERIO "vt82c686b-superio" #define TYPE_VT82C686B_PM "vt82c686b-pm" +#define TYPE_VT8231_ISA "vt8231-isa" #define TYPE_VT8231_PM "vt8231-pm" #define TYPE_VIA_AC97 "via-ac97" #define TYPE_VIA_MC97 "via-mc97" +void via_isa_set_irq(PCIDevice *d, int n, int level); + #endif diff --git a/include/hw/lm32/lm32_pic.h b/include/hw/lm32/lm32_pic.h deleted file mode 100644 index 9e5e038437c..00000000000 --- a/include/hw/lm32/lm32_pic.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef QEMU_HW_LM32_PIC_H -#define QEMU_HW_LM32_PIC_H - - -uint32_t lm32_pic_get_ip(DeviceState *d); -uint32_t lm32_pic_get_im(DeviceState *d); -void lm32_pic_set_ip(DeviceState *d, uint32_t ip); -void lm32_pic_set_im(DeviceState *d, uint32_t im); - -#endif /* QEMU_HW_LM32_PIC_H */ diff --git a/include/hw/loader.h b/include/hw/loader.h index cbfc1848737..4fa485bd61c 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -90,7 +90,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); #define ELF_LOAD_WRONG_ARCH -3 #define ELF_LOAD_WRONG_ENDIAN -4 #define ELF_LOAD_TOO_BIG -5 -const char *load_elf_strerror(int error); +const char *load_elf_strerror(ssize_t error); /** load_elf_ram_sym: * @filename: Path of ELF file @@ -128,48 +128,48 @@ const char *load_elf_strerror(int error); typedef void (*symbol_fn_t)(const char *st_name, int st_info, uint64_t st_value, uint64_t st_size); -int load_elf_ram_sym(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, - uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags, - int big_endian, int elf_machine, - int clear_lsb, int data_swab, - AddressSpace *as, bool load_rom, symbol_fn_t sym_cb); +ssize_t load_elf_ram_sym(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, + uint64_t *lowaddr, uint64_t *highaddr, + uint32_t *pflags, int big_endian, int elf_machine, + int clear_lsb, int data_swab, + AddressSpace *as, bool load_rom, symbol_fn_t sym_cb); /** load_elf_ram: * Same as load_elf_ram_sym(), but doesn't allow the caller to specify a * symbol callback function */ -int load_elf_ram(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, - uint64_t *highaddr, uint32_t *pflags, int big_endian, - int elf_machine, int clear_lsb, int data_swab, - AddressSpace *as, bool load_rom); +ssize_t load_elf_ram(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, + uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags, + int big_endian, int elf_machine, int clear_lsb, + int data_swab, AddressSpace *as, bool load_rom); /** load_elf_as: * Same as load_elf_ram(), but always loads the elf as ROM */ -int load_elf_as(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, - uint64_t *highaddr, uint32_t *pflags, int big_endian, - int elf_machine, int clear_lsb, int data_swab, - AddressSpace *as); +ssize_t load_elf_as(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, uint32_t *pflags, int big_endian, + int elf_machine, int clear_lsb, int data_swab, + AddressSpace *as); /** load_elf: * Same as load_elf_as(), but doesn't allow the caller to specify an * AddressSpace. */ -int load_elf(const char *filename, - uint64_t (*elf_note_fn)(void *, void *, bool), - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, - uint64_t *highaddr, uint32_t *pflags, int big_endian, - int elf_machine, int clear_lsb, int data_swab); +ssize_t load_elf(const char *filename, + uint64_t (*elf_note_fn)(void *, void *, bool), + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, uint32_t *pflags, int big_endian, + int elf_machine, int clear_lsb, int data_swab); /** load_elf_hdr: * @filename: Path of ELF file @@ -336,12 +336,6 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \ rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true) -#define PC_ROM_MIN_VGA 0xc0000 -#define PC_ROM_MIN_OPTION 0xc8000 -#define PC_ROM_MAX 0xe0000 -#define PC_ROM_ALIGN 0x800 -#define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA) - int rom_add_vga(const char *file); int rom_add_option(const char *file, int32_t bootindex); diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index 3d3db82641f..1473e6db625 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -56,9 +56,6 @@ struct PCDIMMDevice { * PCDIMMDeviceClass: * @realize: called after common dimm is realized so that the dimm based * devices get the chance to do specified operations. - * @get_vmstate_memory_region: returns #MemoryRegion which indicates the - * memory of @dimm should be kept during live migration. Will not fail - * after the device was realized. */ struct PCDIMMDeviceClass { /* private */ @@ -66,8 +63,6 @@ struct PCDIMMDeviceClass { /* public */ void (*realize)(PCDIMMDevice *dimm, Error **errp); - MemoryRegion *(*get_vmstate_memory_region)(PCDIMMDevice *dimm, - Error **errp); }; void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine, diff --git a/include/hw/misc/armv7m_ras.h b/include/hw/misc/armv7m_ras.h new file mode 100644 index 00000000000..ba6daccf3fc --- /dev/null +++ b/include/hw/misc/armv7m_ras.h @@ -0,0 +1,37 @@ +/* + * Arm M-profile RAS (Reliability, Availability and Serviceability) block + * + * Copyright (c) 2021 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +/* + * This is a model of the RAS register block of an M-profile CPU + * (the registers starting at 0xE0005000 with ERRFRn). + * + * QEMU interface: + * + sysbus MMIO region 0: the register bank + * + * The QEMU implementation currently provides "minimal RAS" only. + */ + +#ifndef HW_MISC_ARMV7M_RAS_H +#define HW_MISC_ARMV7M_RAS_H + +#include "hw/sysbus.h" + +#define TYPE_ARMV7M_RAS "armv7m-ras" +OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MRAS, ARMV7M_RAS) + +struct ARMv7MRAS { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + MemoryRegion iomem; +}; + +#endif diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h new file mode 100644 index 00000000000..94d5ada95fa --- /dev/null +++ b/include/hw/misc/aspeed_hace.h @@ -0,0 +1,43 @@ +/* + * ASPEED Hash and Crypto Engine + * + * Copyright (C) 2021 IBM Corp. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef ASPEED_HACE_H +#define ASPEED_HACE_H + +#include "hw/sysbus.h" + +#define TYPE_ASPEED_HACE "aspeed.hace" +#define TYPE_ASPEED_AST2400_HACE TYPE_ASPEED_HACE "-ast2400" +#define TYPE_ASPEED_AST2500_HACE TYPE_ASPEED_HACE "-ast2500" +#define TYPE_ASPEED_AST2600_HACE TYPE_ASPEED_HACE "-ast2600" +OBJECT_DECLARE_TYPE(AspeedHACEState, AspeedHACEClass, ASPEED_HACE) + +#define ASPEED_HACE_NR_REGS (0x64 >> 2) + +struct AspeedHACEState { + SysBusDevice parent; + + MemoryRegion iomem; + qemu_irq irq; + + uint32_t regs[ASPEED_HACE_NR_REGS]; + + MemoryRegion *dram_mr; + AddressSpace dram_as; +}; + + +struct AspeedHACEClass { + SysBusDeviceClass parent_class; + + uint32_t src_mask; + uint32_t dest_mask; + uint32_t hash_mask; +}; + +#endif /* _ASPEED_HACE_H_ */ diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h index d49bfb02fbd..c14aff2bcbb 100644 --- a/include/hw/misc/aspeed_scu.h +++ b/include/hw/misc/aspeed_scu.h @@ -43,6 +43,8 @@ struct AspeedSCUState { #define AST2500_A1_SILICON_REV 0x04010303U #define AST2600_A0_SILICON_REV 0x05000303U #define AST2600_A1_SILICON_REV 0x05010303U +#define AST2600_A2_SILICON_REV 0x05020303U +#define AST2600_A3_SILICON_REV 0x05030303U #define ASPEED_IS_AST2500(si_rev) ((((si_rev) >> 24) & 0xff) == 0x04) diff --git a/include/hw/misc/aspeed_xdma.h b/include/hw/misc/aspeed_xdma.h index a2dea96984f..b1478fd1c68 100644 --- a/include/hw/misc/aspeed_xdma.h +++ b/include/hw/misc/aspeed_xdma.h @@ -13,7 +13,10 @@ #include "qom/object.h" #define TYPE_ASPEED_XDMA "aspeed.xdma" -OBJECT_DECLARE_SIMPLE_TYPE(AspeedXDMAState, ASPEED_XDMA) +#define TYPE_ASPEED_2400_XDMA TYPE_ASPEED_XDMA "-ast2400" +#define TYPE_ASPEED_2500_XDMA TYPE_ASPEED_XDMA "-ast2500" +#define TYPE_ASPEED_2600_XDMA TYPE_ASPEED_XDMA "-ast2600" +OBJECT_DECLARE_TYPE(AspeedXDMAState, AspeedXDMAClass, ASPEED_XDMA) #define ASPEED_XDMA_NUM_REGS (ASPEED_XDMA_REG_SIZE / sizeof(uint32_t)) #define ASPEED_XDMA_REG_SIZE 0x7C @@ -28,4 +31,16 @@ struct AspeedXDMAState { uint32_t regs[ASPEED_XDMA_NUM_REGS]; }; +struct AspeedXDMAClass { + SysBusDeviceClass parent_class; + + uint8_t cmdq_endp; + uint8_t cmdq_wrp; + uint8_t cmdq_rdp; + uint8_t intr_ctrl; + uint32_t intr_ctrl_mask; + uint8_t intr_status; + uint32_t intr_complete; +}; + #endif /* ASPEED_XDMA_H */ diff --git a/include/hw/misc/avr_power.h b/include/hw/misc/avr_power.h index 707df030b18..388e421aa7b 100644 --- a/include/hw/misc/avr_power.h +++ b/include/hw/misc/avr_power.h @@ -26,7 +26,6 @@ #define HW_MISC_AVR_POWER_H #include "hw/sysbus.h" -#include "hw/hw.h" #include "qom/object.h" diff --git a/include/hw/misc/bcm2835_powermgt.h b/include/hw/misc/bcm2835_powermgt.h new file mode 100644 index 00000000000..303b9a6f684 --- /dev/null +++ b/include/hw/misc/bcm2835_powermgt.h @@ -0,0 +1,29 @@ +/* + * BCM2835 Power Management emulation + * + * Copyright (C) 2017 Marcin Chojnacki + * Copyright (C) 2021 Nolan Leake + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_POWERMGT_H +#define BCM2835_POWERMGT_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_BCM2835_POWERMGT "bcm2835-powermgt" +OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PowerMgtState, BCM2835_POWERMGT) + +struct BCM2835PowerMgtState { + SysBusDevice busdev; + MemoryRegion iomem; + + uint32_t rstc; + uint32_t rsts; + uint32_t wdog; +}; + +#endif diff --git a/include/hw/misc/mac_via.h b/include/hw/misc/mac_via.h index 3058b30685a..b4455658660 100644 --- a/include/hw/misc/mac_via.h +++ b/include/hw/misc/mac_via.h @@ -15,6 +15,8 @@ #include "qom/object.h" +#define VIA_SIZE 0x2000 + /* VIA 1 */ #define VIA1_IRQ_ONE_SECOND_BIT 0 #define VIA1_IRQ_60HZ_BIT 1 @@ -24,11 +26,11 @@ #define VIA1_IRQ_NB 8 -#define VIA1_IRQ_ONE_SECOND (1 << VIA1_IRQ_ONE_SECOND_BIT) -#define VIA1_IRQ_60HZ (1 << VIA1_IRQ_60HZ_BIT) -#define VIA1_IRQ_ADB_READY (1 << VIA1_IRQ_ADB_READY_BIT) -#define VIA1_IRQ_ADB_DATA (1 << VIA1_IRQ_ADB_DATA_BIT) -#define VIA1_IRQ_ADB_CLOCK (1 << VIA1_IRQ_ADB_CLOCK_BIT) +#define VIA1_IRQ_ONE_SECOND (1 << VIA1_IRQ_ONE_SECOND_BIT) +#define VIA1_IRQ_60HZ (1 << VIA1_IRQ_60HZ_BIT) +#define VIA1_IRQ_ADB_READY (1 << VIA1_IRQ_ADB_READY_BIT) +#define VIA1_IRQ_ADB_DATA (1 << VIA1_IRQ_ADB_DATA_BIT) +#define VIA1_IRQ_ADB_CLOCK (1 << VIA1_IRQ_ADB_CLOCK_BIT) #define TYPE_MOS6522_Q800_VIA1 "mos6522-q800-via1" @@ -38,9 +40,36 @@ struct MOS6522Q800VIA1State { /*< private >*/ MOS6522State parent_obj; + MemoryRegion via_mem; + qemu_irq irqs[VIA1_IRQ_NB]; + qemu_irq auxmode_irq; uint8_t last_b; + + /* RTC */ uint8_t PRAM[256]; + BlockBackend *blk; + VMChangeStateEntry *vmstate; + + uint32_t tick_offset; + + uint8_t data_out; + int data_out_cnt; + uint8_t data_in; + uint8_t data_in_cnt; + uint8_t cmd; + int wprotect; + int alt; + + /* ADB */ + ADBBusState adb_bus; + qemu_irq adb_data_ready; + int adb_data_in_size; + int adb_data_in_index; + int adb_data_out_index; + uint8_t adb_data_in[128]; + uint8_t adb_data_out[16]; + uint8_t adb_autopoll_cmd; /* external timers */ QEMUTimer *one_second_timer; @@ -52,18 +81,28 @@ struct MOS6522Q800VIA1State { /* VIA 2 */ #define VIA2_IRQ_SCSI_DATA_BIT 0 -#define VIA2_IRQ_SLOT_BIT 1 +#define VIA2_IRQ_NUBUS_BIT 1 #define VIA2_IRQ_UNUSED_BIT 2 #define VIA2_IRQ_SCSI_BIT 3 #define VIA2_IRQ_ASC_BIT 4 #define VIA2_IRQ_NB 8 -#define VIA2_IRQ_SCSI_DATA (1 << VIA2_IRQ_SCSI_DATA_BIT) -#define VIA2_IRQ_SLOT (1 << VIA2_IRQ_SLOT_BIT) -#define VIA2_IRQ_UNUSED (1 << VIA2_IRQ_SCSI_BIT) -#define VIA2_IRQ_SCSI (1 << VIA2_IRQ_UNUSED_BIT) -#define VIA2_IRQ_ASC (1 << VIA2_IRQ_ASC_BIT) +#define VIA2_IRQ_SCSI_DATA (1 << VIA2_IRQ_SCSI_DATA_BIT) +#define VIA2_IRQ_NUBUS (1 << VIA2_IRQ_NUBUS_BIT) +#define VIA2_IRQ_UNUSED (1 << VIA2_IRQ_SCSI_BIT) +#define VIA2_IRQ_SCSI (1 << VIA2_IRQ_UNUSED_BIT) +#define VIA2_IRQ_ASC (1 << VIA2_IRQ_ASC_BIT) + +#define VIA2_NUBUS_IRQ_NB 7 + +#define VIA2_NUBUS_IRQ_9 0 +#define VIA2_NUBUS_IRQ_A 1 +#define VIA2_NUBUS_IRQ_B 2 +#define VIA2_NUBUS_IRQ_C 3 +#define VIA2_NUBUS_IRQ_D 4 +#define VIA2_NUBUS_IRQ_E 5 +#define VIA2_NUBUS_IRQ_INTVIDEO 6 #define TYPE_MOS6522_Q800_VIA2 "mos6522-q800-via2" OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA2State, MOS6522_Q800_VIA2) @@ -71,47 +110,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA2State, MOS6522_Q800_VIA2) struct MOS6522Q800VIA2State { /*< private >*/ MOS6522State parent_obj; -}; - -#define TYPE_MAC_VIA "mac_via" -OBJECT_DECLARE_SIMPLE_TYPE(MacVIAState, MAC_VIA) - -struct MacVIAState { - SysBusDevice busdev; - - VMChangeStateEntry *vmstate; - - /* MMIO */ - MemoryRegion mmio; - MemoryRegion via1mem; - MemoryRegion via2mem; - - /* VIAs */ - MOS6522Q800VIA1State mos6522_via1; - MOS6522Q800VIA2State mos6522_via2; - - /* RTC */ - uint32_t tick_offset; - - uint8_t data_out; - int data_out_cnt; - uint8_t data_in; - uint8_t data_in_cnt; - uint8_t cmd; - int wprotect; - int alt; - BlockBackend *blk; - - /* ADB */ - ADBBusState adb_bus; - qemu_irq adb_data_ready; - int adb_data_in_size; - int adb_data_in_index; - int adb_data_out_index; - uint8_t adb_data_in[128]; - uint8_t adb_data_out[16]; - uint8_t adb_autopoll_cmd; + MemoryRegion via_mem; }; #endif diff --git a/include/hw/misc/mps2-scc.h b/include/hw/misc/mps2-scc.h index 49d070616aa..3b2d13ac9c3 100644 --- a/include/hw/misc/mps2-scc.h +++ b/include/hw/misc/mps2-scc.h @@ -9,6 +9,24 @@ * (at your option) any later version. */ +/* + * This is a model of the Serial Communication Controller (SCC) + * block found in most MPS FPGA images. + * + * QEMU interface: + * + sysbus MMIO region 0: the register bank + * + QOM property "scc-cfg4": value of the read-only CFG4 register + * + QOM property "scc-aid": value of the read-only SCC_AID register + * + QOM property "scc-id": value of the read-only SCC_ID register + * + QOM property "scc-cfg0": reset value of the CFG0 register + * + QOM property array "oscclk": reset values of the OSCCLK registers + * (which are accessed via the SYS_CFG channel provided by this device) + * + named GPIO output "remap": this tracks the value of CFG0 register + * bit 0. Boards where this bit controls memory remapping should + * connect this GPIO line to a function performing that mapping. + * Boards where bit 0 has no special function should leave the GPIO + * output disconnected. + */ #ifndef MPS2_SCC_H #define MPS2_SCC_H @@ -43,6 +61,9 @@ struct MPS2SCC { uint32_t num_oscclk; uint32_t *oscclk; uint32_t *oscclk_reset; + uint32_t cfg0_reset; + + qemu_irq remap; }; #endif diff --git a/include/hw/misc/stm32f4xx_exti.h b/include/hw/misc/stm32f4xx_exti.h index 24b6fa7724b..ea6b0097b0e 100644 --- a/include/hw/misc/stm32f4xx_exti.h +++ b/include/hw/misc/stm32f4xx_exti.h @@ -26,7 +26,6 @@ #define HW_STM_EXTI_H #include "hw/sysbus.h" -#include "hw/hw.h" #include "qom/object.h" #define EXTI_IMR 0x00 diff --git a/include/hw/misc/stm32f4xx_syscfg.h b/include/hw/misc/stm32f4xx_syscfg.h index 8c31feccd37..6f8ca49228b 100644 --- a/include/hw/misc/stm32f4xx_syscfg.h +++ b/include/hw/misc/stm32f4xx_syscfg.h @@ -26,7 +26,6 @@ #define HW_STM_SYSCFG_H #include "hw/sysbus.h" -#include "hw/hw.h" #include "qom/object.h" #define SYSCFG_MEMRMP 0x00 diff --git a/include/hw/misc/virt_ctrl.h b/include/hw/misc/virt_ctrl.h index edfadc46950..25a237e5187 100644 --- a/include/hw/misc/virt_ctrl.h +++ b/include/hw/misc/virt_ctrl.h @@ -1,5 +1,5 @@ /* - * SPDX-License-Identifer: GPL-2.0-or-later + * SPDX-License-Identifier: GPL-2.0-or-later * * Virt system Controller */ diff --git a/include/hw/nubus/mac-nubus-bridge.h b/include/hw/nubus/mac-nubus-bridge.h index 36aa098dd4b..70ab50ab2d6 100644 --- a/include/hw/nubus/mac-nubus-bridge.h +++ b/include/hw/nubus/mac-nubus-bridge.h @@ -12,13 +12,18 @@ #include "hw/nubus/nubus.h" #include "qom/object.h" +#define MAC_NUBUS_FIRST_SLOT 0x9 +#define MAC_NUBUS_LAST_SLOT 0xe +#define MAC_NUBUS_SLOT_NB (MAC_NUBUS_LAST_SLOT - MAC_NUBUS_FIRST_SLOT + 1) + #define TYPE_MAC_NUBUS_BRIDGE "mac-nubus-bridge" -OBJECT_DECLARE_SIMPLE_TYPE(MacNubusState, MAC_NUBUS_BRIDGE) +OBJECT_DECLARE_SIMPLE_TYPE(MacNubusBridge, MAC_NUBUS_BRIDGE) -struct MacNubusState { - SysBusDevice sysbus_dev; +struct MacNubusBridge { + NubusBridge parent_obj; - NubusBus *bus; + MemoryRegion super_slot_alias; + MemoryRegion slot_alias; }; #endif diff --git a/include/hw/nubus/nubus.h b/include/hw/nubus/nubus.h index e2b5cf260ba..b3b4d2eadb4 100644 --- a/include/hw/nubus/nubus.h +++ b/include/hw/nubus/nubus.h @@ -10,17 +10,23 @@ #define HW_NUBUS_NUBUS_H #include "hw/qdev-properties.h" +#include "hw/sysbus.h" #include "exec/address-spaces.h" #include "qom/object.h" +#include "qemu/units.h" #define NUBUS_SUPER_SLOT_SIZE 0x10000000U -#define NUBUS_SUPER_SLOT_NB 0x9 +#define NUBUS_SUPER_SLOT_NB 0xe + +#define NUBUS_SLOT_BASE (NUBUS_SUPER_SLOT_SIZE * \ + (NUBUS_SUPER_SLOT_NB + 1)) #define NUBUS_SLOT_SIZE 0x01000000 -#define NUBUS_SLOT_NB 0xF +#define NUBUS_FIRST_SLOT 0x0 +#define NUBUS_LAST_SLOT 0xf +#define NUBUS_SLOT_NB (NUBUS_LAST_SLOT - NUBUS_FIRST_SLOT + 1) -#define NUBUS_FIRST_SLOT 0x9 -#define NUBUS_LAST_SLOT 0xF +#define NUBUS_IRQS 16 #define TYPE_NUBUS_DEVICE "nubus-device" OBJECT_DECLARE_SIMPLE_TYPE(NubusDevice, NUBUS_DEVICE) @@ -29,40 +35,41 @@ OBJECT_DECLARE_SIMPLE_TYPE(NubusDevice, NUBUS_DEVICE) OBJECT_DECLARE_SIMPLE_TYPE(NubusBus, NUBUS_BUS) #define TYPE_NUBUS_BRIDGE "nubus-bridge" +OBJECT_DECLARE_SIMPLE_TYPE(NubusBridge, NUBUS_BRIDGE); struct NubusBus { BusState qbus; + AddressSpace nubus_as; + MemoryRegion nubus_mr; + MemoryRegion super_slot_io; MemoryRegion slot_io; - int current_slot; + uint16_t slot_available_mask; + + qemu_irq irqs[NUBUS_IRQS]; }; +#define NUBUS_DECL_ROM_MAX_SIZE (128 * KiB) + struct NubusDevice { DeviceState qdev; - int slot_nb; + int32_t slot; + MemoryRegion super_slot_mem; MemoryRegion slot_mem; - /* Format Block */ - - MemoryRegion fblock_io; + char *romfile; + MemoryRegion decl_rom; +}; - uint32_t rom_length; - uint32_t rom_crc; - uint8_t rom_rev; - uint8_t rom_format; - uint8_t byte_lanes; - int32_t directory_offset; +void nubus_set_irq(NubusDevice *nd, int level); - /* ROM */ +struct NubusBridge { + SysBusDevice parent_obj; - MemoryRegion rom_io; - const uint8_t *rom; + NubusBus bus; }; -void nubus_register_rom(NubusDevice *dev, const uint8_t *rom, uint32_t size, - int revision, int format, uint8_t byte_lanes); - #endif diff --git a/include/hw/nvram/xlnx-bbram.h b/include/hw/nvram/xlnx-bbram.h new file mode 100644 index 00000000000..87d59ef3c0c --- /dev/null +++ b/include/hw/nvram/xlnx-bbram.h @@ -0,0 +1,54 @@ +/* + * QEMU model of the Xilinx BBRAM Battery Backed RAM + * + * Copyright (c) 2015-2021 Xilinx Inc. + * + * Written by Edgar E. Iglesias + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef XLNX_BBRAM_H +#define XLNX_BBRAM_H + +#include "sysemu/block-backend.h" +#include "hw/qdev-core.h" +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "hw/register.h" + +#define RMAX_XLNX_BBRAM ((0x4c / 4) + 1) + +#define TYPE_XLNX_BBRAM "xlnx,bbram-ctrl" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxBBRam, XLNX_BBRAM); + +struct XlnxBBRam { + SysBusDevice parent_obj; + qemu_irq irq_bbram; + + BlockBackend *blk; + + uint32_t crc_zpads; + bool bbram8_wo; + bool blk_ro; + + uint32_t regs[RMAX_XLNX_BBRAM]; + RegisterInfo regs_info[RMAX_XLNX_BBRAM]; +}; + +#endif diff --git a/include/hw/nvram/xlnx-efuse.h b/include/hw/nvram/xlnx-efuse.h new file mode 100644 index 00000000000..58414e468b5 --- /dev/null +++ b/include/hw/nvram/xlnx-efuse.h @@ -0,0 +1,132 @@ +/* + * QEMU model of the Xilinx eFuse core + * + * Copyright (c) 2015 Xilinx Inc. + * + * Written by Edgar E. Iglesias + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef XLNX_EFUSE_H +#define XLNX_EFUSE_H + +#include "sysemu/block-backend.h" +#include "hw/qdev-core.h" + +#define TYPE_XLNX_EFUSE "xlnx,efuse" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxEFuse, XLNX_EFUSE); + +struct XlnxEFuse { + DeviceState parent_obj; + BlockBackend *blk; + bool blk_ro; + uint32_t *fuse32; + + DeviceState *dev; + + bool init_tbits; + + uint8_t efuse_nr; + uint32_t efuse_size; + + uint32_t *ro_bits; + uint32_t ro_bits_cnt; +}; + +/** + * xlnx_efuse_calc_crc: + * @data: an array of 32-bit words for which the CRC should be computed + * @u32_cnt: the array size in number of 32-bit words + * @zpads: the number of 32-bit zeros prepended to @data before computation + * + * This function is used to compute the CRC for an array of 32-bit words, + * using a Xilinx-specific data padding. + * + * Returns: the computed 32-bit CRC + */ +uint32_t xlnx_efuse_calc_crc(const uint32_t *data, unsigned u32_cnt, + unsigned zpads); + +/** + * xlnx_efuse_get_bit: + * @s: the efuse object + * @bit: the efuse bit-address to read the data + * + * Returns: the bit, 0 or 1, at @bit of object @s + */ +bool xlnx_efuse_get_bit(XlnxEFuse *s, unsigned int bit); + +/** + * xlnx_efuse_set_bit: + * @s: the efuse object + * @bit: the efuse bit-address to be written a value of 1 + * + * Returns: true on success, false on failure + */ +bool xlnx_efuse_set_bit(XlnxEFuse *s, unsigned int bit); + +/** + * xlnx_efuse_k256_check: + * @s: the efuse object + * @crc: the 32-bit CRC to be compared with + * @start: the efuse bit-address (which must be multiple of 32) of the + * start of a 256-bit array + * + * This function computes the CRC of a 256-bit array starting at @start + * then compares to the given @crc + * + * Returns: true of @crc == computed, false otherwise + */ +bool xlnx_efuse_k256_check(XlnxEFuse *s, uint32_t crc, unsigned start); + +/** + * xlnx_efuse_tbits_check: + * @s: the efuse object + * + * This function inspects a number of efuse bits at specific addresses + * to see if they match a validation pattern. Each pattern is a group + * of 4 bits, and there are 3 groups. + * + * Returns: a 3-bit mask, where a bit of '1' means the corresponding + * group has a valid pattern. + */ +uint32_t xlnx_efuse_tbits_check(XlnxEFuse *s); + +/** + * xlnx_efuse_get_row: + * @s: the efuse object + * @bit: the efuse bit address for which a 32-bit value is read + * + * Returns: the entire 32 bits of the efuse, starting at a bit + * address that is multiple of 32 and contains the bit at @bit + */ +static inline uint32_t xlnx_efuse_get_row(XlnxEFuse *s, unsigned int bit) +{ + if (!(s->fuse32)) { + return 0; + } else { + unsigned int row_idx = bit / 32; + + assert(row_idx < (s->efuse_size * s->efuse_nr / 32)); + return s->fuse32[row_idx]; + } +} + +#endif diff --git a/include/hw/nvram/xlnx-versal-efuse.h b/include/hw/nvram/xlnx-versal-efuse.h new file mode 100644 index 00000000000..a873dc5cb01 --- /dev/null +++ b/include/hw/nvram/xlnx-versal-efuse.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef XLNX_VERSAL_EFUSE_H +#define XLNX_VERSAL_EFUSE_H + +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "hw/register.h" +#include "hw/nvram/xlnx-efuse.h" + +#define XLNX_VERSAL_EFUSE_CTRL_R_MAX ((0x100 / 4) + 1) + +#define TYPE_XLNX_VERSAL_EFUSE_CTRL "xlnx,versal-efuse" +#define TYPE_XLNX_VERSAL_EFUSE_CACHE "xlnx,pmc-efuse-cache" + +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalEFuseCtrl, XLNX_VERSAL_EFUSE_CTRL); +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalEFuseCache, XLNX_VERSAL_EFUSE_CACHE); + +struct XlnxVersalEFuseCtrl { + SysBusDevice parent_obj; + qemu_irq irq_efuse_imr; + + XlnxEFuse *efuse; + + void *extra_pg0_lock_spec; /* Opaque property */ + uint32_t extra_pg0_lock_n16; + + uint32_t regs[XLNX_VERSAL_EFUSE_CTRL_R_MAX]; + RegisterInfo regs_info[XLNX_VERSAL_EFUSE_CTRL_R_MAX]; +}; + +struct XlnxVersalEFuseCache { + SysBusDevice parent_obj; + MemoryRegion iomem; + + XlnxEFuse *efuse; +}; + +/** + * xlnx_versal_efuse_read_row: + * @s: the efuse object + * @bit: the bit-address within the 32-bit row to be read + * @denied: if non-NULL, to receive true if the row is write-only + * + * Returns: the 32-bit word containing address @bit; 0 if @denies is true + */ +uint32_t xlnx_versal_efuse_read_row(XlnxEFuse *s, uint32_t bit, bool *denied); + +#endif diff --git a/include/hw/nvram/xlnx-zynqmp-efuse.h b/include/hw/nvram/xlnx-zynqmp-efuse.h new file mode 100644 index 00000000000..6b051ec4f15 --- /dev/null +++ b/include/hw/nvram/xlnx-zynqmp-efuse.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Xilinx Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef XLNX_ZYNQMP_EFUSE_H +#define XLNX_ZYNQMP_EFUSE_H + +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "hw/register.h" +#include "hw/nvram/xlnx-efuse.h" + +#define XLNX_ZYNQMP_EFUSE_R_MAX ((0x10fc / 4) + 1) + +#define TYPE_XLNX_ZYNQMP_EFUSE "xlnx,zynqmp-efuse" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPEFuse, XLNX_ZYNQMP_EFUSE); + +struct XlnxZynqMPEFuse { + SysBusDevice parent_obj; + qemu_irq irq; + + XlnxEFuse *efuse; + uint32_t regs[XLNX_ZYNQMP_EFUSE_R_MAX]; + RegisterInfo regs_info[XLNX_ZYNQMP_EFUSE_R_MAX]; +}; + +#endif diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h index d48a020a952..fcf8b638200 100644 --- a/include/hw/pci-host/gpex.h +++ b/include/hw/pci-host/gpex.h @@ -49,8 +49,12 @@ struct GPEXHost { MemoryRegion io_ioport; MemoryRegion io_mmio; + MemoryRegion io_ioport_window; + MemoryRegion io_mmio_window; qemu_irq irq[GPEX_NUM_IRQS]; int irq_num[GPEX_NUM_IRQS]; + + bool allow_unmapped_accesses; }; struct GPEXConfig { diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index 24fd53942ca..f068aaba8fd 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -11,7 +11,6 @@ #ifndef HW_PCI_I440FX_H #define HW_PCI_I440FX_H -#include "hw/hw.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/pam.h" #include "qom/object.h" @@ -46,6 +45,5 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type, MemoryRegion *pci_memory, MemoryRegion *ram_memory); -PCIBus *find_i440fx(void); #endif diff --git a/include/hw/pci-host/mv64361.h b/include/hw/pci-host/mv64361.h new file mode 100644 index 00000000000..9cdb35cb3cf --- /dev/null +++ b/include/hw/pci-host/mv64361.h @@ -0,0 +1,8 @@ +#ifndef MV64361_H +#define MV64361_H + +#define TYPE_MV64361 "mv64361" + +PCIBus *mv64361_get_pci_bus(DeviceState *dev, int n); + +#endif diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 6be4e0c460c..e7cdf2d5ec5 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; + bool has_power; /* PCI config space */ uint8_t *config; @@ -401,13 +402,17 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS) #define TYPE_PCIE_BUS "PCIE" +typedef void (*pci_bus_dev_fn)(PCIBus *b, PCIDevice *d, void *opaque); +typedef void (*pci_bus_fn)(PCIBus *b, void *opaque); +typedef void *(*pci_bus_ret_fn)(PCIBus *b, void *opaque); + bool pci_bus_is_express(PCIBus *bus); -void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent, - const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, - uint8_t devfn_min, const char *typename); +void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent, + const char *name, + MemoryRegion *address_space_mem, + MemoryRegion *address_space_io, + uint8_t devfn_min, const char *typename); PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, @@ -450,6 +455,7 @@ static inline PCIBus *pci_get_bus(const PCIDevice *dev) return PCI_BUS(qdev_get_parent_bus(DEVICE(dev))); } int pci_bus_num(PCIBus *s); +void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus); static inline int pci_dev_bus_num(const PCIDevice *dev) { return pci_bus_num(pci_get_bus(dev)); @@ -457,29 +463,30 @@ static inline int pci_dev_bus_num(const PCIDevice *dev) int pci_bus_numa_node(PCIBus *bus); void pci_for_each_device(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque), + pci_bus_dev_fn fn, void *opaque); void pci_for_each_device_reverse(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *bus, PCIDevice *d, - void *opaque), + pci_bus_dev_fn fn, void *opaque); -void pci_for_each_bus_depth_first(PCIBus *bus, - void *(*begin)(PCIBus *bus, void *parent_state), - void (*end)(PCIBus *bus, void *state), - void *parent_state); +void pci_for_each_device_under_bus(PCIBus *bus, + pci_bus_dev_fn fn, void *opaque); +void pci_for_each_device_under_bus_reverse(PCIBus *bus, + pci_bus_dev_fn fn, + void *opaque); +void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin, + pci_bus_fn end, void *parent_state); PCIDevice *pci_get_function_0(PCIDevice *pci_dev); /* Use this wrapper when specific scan order is not required. */ static inline -void pci_for_each_bus(PCIBus *bus, - void (*fn)(PCIBus *bus, void *opaque), - void *opaque) +void pci_for_each_bus(PCIBus *bus, pci_bus_fn fn, void *opaque) { pci_for_each_bus_depth_first(bus, NULL, fn, opaque); } PCIBus *pci_device_root_bus(const PCIDevice *d); const char *pci_root_bus_path(PCIDevice *dev); +bool pci_bus_bypass_iommu(PCIBus *bus); PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn); int pci_qdev_find_device(const char *id, PCIDevice **pdev); void pci_bus_get_w64_range(PCIBus *bus, Range *range); @@ -902,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device; } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); +void pci_set_power(PCIDevice *pci_dev, bool state); #endif diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h index 52e038c0196..c6f4eb45851 100644 --- a/include/hw/pci/pci_host.h +++ b/include/hw/pci/pci_host.h @@ -43,6 +43,7 @@ struct PCIHostState { uint32_t config_reg; bool mig_enabled; PCIBus *bus; + bool bypass_iommu; QLIST_ENTRY(PCIHostState) next; }; diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index ea28dcc850b..11abe22d460 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -204,15 +204,17 @@ #define PCI_VENDOR_ID_XILINX 0x10ee #define PCI_VENDOR_ID_VIA 0x1106 -#define PCI_DEVICE_ID_VIA_ISA_BRIDGE 0x0686 +#define PCI_DEVICE_ID_VIA_82C686B_ISA 0x0686 #define PCI_DEVICE_ID_VIA_IDE 0x0571 #define PCI_DEVICE_ID_VIA_UHCI 0x3038 #define PCI_DEVICE_ID_VIA_82C686B_PM 0x3057 #define PCI_DEVICE_ID_VIA_AC97 0x3058 #define PCI_DEVICE_ID_VIA_MC97 0x3068 +#define PCI_DEVICE_ID_VIA_8231_ISA 0x8231 #define PCI_DEVICE_ID_VIA_8231_PM 0x8235 #define PCI_VENDOR_ID_MARVELL 0x11ab +#define PCI_DEVICE_ID_MARVELL_MV6436X 0x6460 #define PCI_VENDOR_ID_SILICON_MOTION 0x126f #define PCI_DEVICE_ID_SM501 0x0501 @@ -225,6 +227,9 @@ #define PCI_VENDOR_ID_FREESCALE 0x1957 #define PCI_DEVICE_ID_MPC8533E 0x0030 +#define PCI_VENDOR_ID_BAIDU 0x1d22 +#define PCI_DEVICE_ID_KUNLUN_VF 0x3685 + #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_82378 0x0484 #define PCI_DEVICE_ID_INTEL_82441 0x1237 diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index bea8ecad0fd..e25b289ce84 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -57,8 +57,11 @@ struct PCIESlot { /* Disable ACS (really for a pcie_root_port) */ bool disable_acs; - /* Indicates whether hot-plug is enabled on the slot */ + /* Indicates whether any type of hot-plug is allowed on the slot */ bool hotplug; + + bool native_hotplug; + QLIST_ENTRY(PCIESlot) next; }; diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h index 74ff44bff0a..ebdaf8a4932 100644 --- a/include/hw/ppc/openpic.h +++ b/include/hw/ppc/openpic.h @@ -21,7 +21,6 @@ enum { typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; -#define OPENPIC_MODEL_RAVEN 0 #define OPENPIC_MODEL_FSL_MPIC_20 1 #define OPENPIC_MODEL_FSL_MPIC_42 2 #define OPENPIC_MODEL_KEYLARGO 3 @@ -32,13 +31,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; #define OPENPIC_MAX_IRQ (OPENPIC_MAX_SRC + OPENPIC_MAX_IPI + \ OPENPIC_MAX_TMR) -/* Raven */ -#define RAVEN_MAX_CPU 2 -#define RAVEN_MAX_EXT 48 -#define RAVEN_MAX_IRQ 64 -#define RAVEN_MAX_TMR OPENPIC_MAX_TMR -#define RAVEN_MAX_IPI OPENPIC_MAX_IPI - /* KeyLargo */ #define KEYLARGO_MAX_CPU 4 #define KEYLARGO_MAX_EXT 64 @@ -49,14 +41,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; /* Timers don't exist but this makes the code happy... */ #define KEYLARGO_TMR_IRQ (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI) -/* Interrupt definitions */ -#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */ -#define RAVEN_ERR_IRQ (RAVEN_MAX_EXT + 1) /* Error IRQ */ -#define RAVEN_TMR_IRQ (RAVEN_MAX_EXT + 2) /* First timer IRQ */ -#define RAVEN_IPI_IRQ (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */ -/* First doorbell IRQ */ -#define RAVEN_DBL_IRQ (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI)) - typedef struct FslMpicInfo { int max_ext; } FslMpicInfo; @@ -67,7 +51,8 @@ typedef enum IRQType { IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */ } IRQType; -/* Round up to the nearest 64 IRQs so that the queue length +/* + * Round up to the nearest 64 IRQs so that the queue length * won't change when moving between 32 and 64 bit hosts. */ #define IRQQUEUE_SIZE_BITS ((OPENPIC_MAX_IRQ + 63) & ~63) @@ -117,8 +102,10 @@ typedef struct OpenPICTimer { bool qemu_timer_active; /* Is the qemu_timer is running? */ struct QEMUTimer *qemu_timer; struct OpenPICState *opp; /* Device timer is part of. */ - /* The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last - current_count written or read, only defined if qemu_timer_active. */ + /* + * The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last + * current_count written or read, only defined if qemu_timer_active. + */ uint64_t origin_time; } OpenPICTimer; diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index d69cee17b23..aa08d79d24d 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -170,29 +170,10 @@ DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8NVL, DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER9, TYPE_PNV_CHIP_POWER9) -#define TYPE_PNV_CHIP_POWER10 PNV_CHIP_TYPE_NAME("power10_v1.0") +#define TYPE_PNV_CHIP_POWER10 PNV_CHIP_TYPE_NAME("power10_v2.0") DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10, TYPE_PNV_CHIP_POWER10) -/* - * This generates a HW chip id depending on an index, as found on a - * two socket system with dual chip modules : - * - * 0x0, 0x1, 0x10, 0x11 - * - * 4 chips should be the maximum - * - * TODO: use a machine property to define the chip ids - */ -#define PNV_CHIP_HWID(i) ((((i) & 0x3e) << 3) | ((i) & 0x1)) - -/* - * Converts back a HW chip id to an index. This is useful to calculate - * the MMIO addresses of some controllers which depend on the chip id. - */ -#define PNV_CHIP_INDEX(chip) \ - (((chip)->chip_id >> 2) * 2 + ((chip)->chip_id & 0x3)) - PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir); #define TYPE_PNV_MACHINE MACHINE_TYPE_NAME("powernv") @@ -256,11 +237,11 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor); #define PNV_OCC_COMMON_AREA_SIZE 0x0000000000800000ull #define PNV_OCC_COMMON_AREA_BASE 0x7fff800000ull #define PNV_OCC_SENSOR_BASE(chip) (PNV_OCC_COMMON_AREA_BASE + \ - PNV_OCC_SENSOR_DATA_BLOCK_BASE(PNV_CHIP_INDEX(chip))) + PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id)) #define PNV_HOMER_SIZE 0x0000000000400000ull #define PNV_HOMER_BASE(chip) \ - (0x7ffd800000ull + ((uint64_t)PNV_CHIP_INDEX(chip)) * PNV_HOMER_SIZE) + (0x7ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV_HOMER_SIZE) /* @@ -279,16 +260,16 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor); */ #define PNV_ICP_SIZE 0x0000000000100000ull #define PNV_ICP_BASE(chip) \ - (0x0003ffff80000000ull + (uint64_t) PNV_CHIP_INDEX(chip) * PNV_ICP_SIZE) + (0x0003ffff80000000ull + (uint64_t) (chip)->chip_id * PNV_ICP_SIZE) #define PNV_PSIHB_SIZE 0x0000000000100000ull #define PNV_PSIHB_BASE(chip) \ - (0x0003fffe80000000ull + (uint64_t)PNV_CHIP_INDEX(chip) * PNV_PSIHB_SIZE) + (0x0003fffe80000000ull + (uint64_t)(chip)->chip_id * PNV_PSIHB_SIZE) #define PNV_PSIHB_FSP_SIZE 0x0000000100000000ull #define PNV_PSIHB_FSP_BASE(chip) \ - (0x0003ffe000000000ull + (uint64_t)PNV_CHIP_INDEX(chip) * \ + (0x0003ffe000000000ull + (uint64_t)(chip)->chip_id * \ PNV_PSIHB_FSP_SIZE) /* @@ -324,11 +305,11 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor); #define PNV9_OCC_COMMON_AREA_SIZE 0x0000000000800000ull #define PNV9_OCC_COMMON_AREA_BASE 0x203fff800000ull #define PNV9_OCC_SENSOR_BASE(chip) (PNV9_OCC_COMMON_AREA_BASE + \ - PNV_OCC_SENSOR_DATA_BLOCK_BASE(PNV_CHIP_INDEX(chip))) + PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id)) #define PNV9_HOMER_SIZE 0x0000000000400000ull #define PNV9_HOMER_BASE(chip) \ - (0x203ffd800000ull + ((uint64_t)PNV_CHIP_INDEX(chip)) * PNV9_HOMER_SIZE) + (0x203ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV9_HOMER_SIZE) /* * POWER10 MMIO base addresses - 16TB stride per chip diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 6ecee98a76e..c22eab2e1f6 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -67,7 +67,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvQuad, PNV_QUAD) struct PnvQuad { DeviceState parent_obj; - uint32_t id; + uint32_t quad_id; MemoryRegion xscom_regs; }; #endif /* PPC_PNV_CORE_H */ diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index bf7cab7a2ce..ee7504b9760 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -12,6 +12,7 @@ #include "hw/ppc/spapr_xive.h" /* For SpaprXive */ #include "hw/ppc/xics.h" /* For ICSState */ #include "hw/ppc/spapr_tpm_proxy.h" +#include "hw/ppc/vof.h" struct SpaprVioBus; struct SpaprPhbState; @@ -74,8 +75,10 @@ typedef enum { #define SPAPR_CAP_CCF_ASSIST 0x09 /* Implements PAPR FWNMI option */ #define SPAPR_CAP_FWNMI 0x0A +/* Support H_RPT_INVALIDATE */ +#define SPAPR_CAP_RPT_INVALIDATE 0x0B /* Num Caps */ -#define SPAPR_CAP_NUM (SPAPR_CAP_FWNMI + 1) +#define SPAPR_CAP_NUM (SPAPR_CAP_RPT_INVALIDATE + 1) /* * Capability Values @@ -95,25 +98,32 @@ typedef enum { #define SPAPR_CAP_FIXED_CCD 0x03 #define SPAPR_CAP_FIXED_NA 0x10 /* Lets leave a bit of a gap... */ -#define FDT_MAX_SIZE 0x100000 +#define FDT_MAX_SIZE 0x200000 + +/* Max number of GPUs per system */ +#define NVGPU_MAX_NUM 6 + +/* Max number of NUMA nodes */ +#define NUMA_NODES_MAX_NUM (MAX_NODES + NVGPU_MAX_NUM) /* - * NUMA related macros. MAX_DISTANCE_REF_POINTS was taken - * from Linux kernel arch/powerpc/mm/numa.h. It represents the - * amount of associativity domains for non-CPU resources. + * NUMA FORM1 macros. FORM1_DIST_REF_POINTS was taken from + * MAX_DISTANCE_REF_POINTS in arch/powerpc/mm/numa.h from Linux + * kernel source. It represents the amount of associativity domains + * for non-CPU resources. * - * NUMA_ASSOC_SIZE is the base array size of an ibm,associativity + * FORM1_NUMA_ASSOC_SIZE is the base array size of an ibm,associativity * array for any non-CPU resource. - * - * VCPU_ASSOC_SIZE represents the size of ibm,associativity array - * for CPUs, which has an extra element (vcpu_id) in the end. */ -#define MAX_DISTANCE_REF_POINTS 4 -#define NUMA_ASSOC_SIZE (MAX_DISTANCE_REF_POINTS + 1) -#define VCPU_ASSOC_SIZE (NUMA_ASSOC_SIZE + 1) +#define FORM1_DIST_REF_POINTS 4 +#define FORM1_NUMA_ASSOC_SIZE (FORM1_DIST_REF_POINTS + 1) -/* Max number of these GPUsper a physical box */ -#define NVGPU_MAX_NUM 6 +/* + * FORM2 NUMA affinity has a single associativity domain, giving + * us a assoc size of 2. + */ +#define FORM2_DIST_REF_POINTS 1 +#define FORM2_NUMA_ASSOC_SIZE (FORM2_DIST_REF_POINTS + 1) typedef struct SpaprCapabilities SpaprCapabilities; struct SpaprCapabilities { @@ -142,6 +152,7 @@ struct SpaprMachineClass { hwaddr rma_limit; /* clamp the RMA to this size */ bool pre_5_1_assoc_refpoints; bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, @@ -180,6 +191,7 @@ struct SpaprMachineState { uint64_t kernel_addr; uint32_t initrd_base; long initrd_size; + Vof *vof; uint64_t rtc_offset; /* Now used only during incoming migration */ struct PPCTimebase tb; bool has_graphics; @@ -223,6 +235,9 @@ struct SpaprMachineState { int fwnmi_machine_check_interlock; QemuCond fwnmi_machine_check_interlock_cond; + /* Set by -boot */ + char *boot_device; + /*< public >*/ char *kvm_type; char *host_model; @@ -242,7 +257,8 @@ struct SpaprMachineState { unsigned gpu_numa_id; SpaprTpmProxy *tpm_proxy; - uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE]; + uint32_t FORM1_assoc_array[NUMA_NODES_MAX_NUM][FORM1_NUMA_ASSOC_SIZE]; + uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE]; Error *fwnmi_migration_blocker; }; @@ -363,7 +379,7 @@ struct SpaprMachineState { /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 -#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_SET_DAWR0 2 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 @@ -395,10 +411,13 @@ struct SpaprMachineState { #define H_CPU_CHAR_THR_RECONF_TRIG PPC_BIT(6) #define H_CPU_CHAR_CACHE_COUNT_DIS PPC_BIT(7) #define H_CPU_CHAR_BCCTR_FLUSH_ASSIST PPC_BIT(9) + #define H_CPU_BEHAV_FAVOUR_SECURITY PPC_BIT(0) #define H_CPU_BEHAV_L1D_FLUSH_PR PPC_BIT(1) #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR PPC_BIT(2) #define H_CPU_BEHAV_FLUSH_COUNT_CACHE PPC_BIT(5) +#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY PPC_BIT(7) +#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS PPC_BIT(8) /* Each control block has to be on a 4K boundary */ #define H_CB_ALIGNMENT 4096 @@ -538,8 +557,10 @@ struct SpaprMachineState { #define H_SCM_BIND_MEM 0x3EC #define H_SCM_UNBIND_MEM 0x3F0 #define H_SCM_UNBIND_ALL 0x3FC +#define H_SCM_HEALTH 0x400 +#define H_RPT_INVALIDATE 0x448 -#define MAX_HCALL_OPCODE H_SCM_UNBIND_ALL +#define MAX_HCALL_OPCODE H_RPT_INVALIDATE /* The hcalls above are standardized in PAPR and implemented by pHyp * as well. @@ -554,7 +575,9 @@ struct SpaprMachineState { /* Client Architecture support */ #define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2) #define KVMPPC_H_UPDATE_DT (KVMPPC_HCALL_BASE + 0x3) -#define KVMPPC_HCALL_MAX KVMPPC_H_UPDATE_DT +/* 0x4 was used for KVMPPC_H_UPDATE_PHANDLE in SLOF */ +#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5) +#define KVMPPC_HCALL_MAX KVMPPC_H_VOF_CLIENT /* * The hcall range 0xEF00 to 0xEF80 is reserved for use in facilitating @@ -581,6 +604,12 @@ typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, SpaprMachineState *sm, void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn); target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args); +target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong shift); +target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong flags, target_ulong shift); +bool is_ram_address(SpaprMachineState *spapr, hwaddr addr); +void push_sregs_to_kvm_pr(SpaprMachineState *spapr); /* Virtual Processor Area structure constants */ #define VPA_MIN_SIZE 640 @@ -760,7 +789,7 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr); #define SPAPR_IS_PCI_LIOBN(liobn) (!!((liobn) & 0x80000000)) #define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff) -#define RTAS_SIZE 2048 +#define RTAS_MIN_SIZE 20 /* hv_rtas_size in SLOF */ #define RTAS_ERROR_LOG_MAX 2048 /* Offset from rtas-base where error log is placed */ @@ -820,6 +849,7 @@ void spapr_dt_events(SpaprMachineState *sm, void *fdt); void close_htab_fd(SpaprMachineState *spapr); void spapr_setup_hpt(SpaprMachineState *spapr); void spapr_free_hpt(SpaprMachineState *spapr); +void spapr_check_mmu_mode(bool guest_radix); SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn); void spapr_tce_table_enable(SpaprTceTable *tcet, uint32_t page_shift, uint64_t bus_offset, @@ -921,6 +951,7 @@ extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv; extern const VMStateDescription vmstate_spapr_cap_large_decr; extern const VMStateDescription vmstate_spapr_cap_ccf_assist; extern const VMStateDescription vmstate_spapr_cap_fwnmi; +extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate; static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap) { @@ -945,4 +976,16 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); hwaddr spapr_get_rtas_addr(void); bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); + +void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp); +void spapr_vof_quiesce(MachineState *ms); +bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname, + void *val, int vallen); +target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args); +target_ulong spapr_vof_client_architecture_support(MachineState *ms, + CPUState *cs, + target_ulong ovec_addr); +void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt); + #endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h index 6f9f02d3de2..7cb33674006 100644 --- a/include/hw/ppc/spapr_numa.h +++ b/include/hw/ppc/spapr_numa.h @@ -24,6 +24,7 @@ */ void spapr_numa_associativity_init(SpaprMachineState *spapr, MachineState *machine); +void spapr_numa_associativity_check(SpaprMachineState *spapr); void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas); void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, int offset, int nodeid); diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h index 73be250e2ac..764f999f547 100644 --- a/include/hw/ppc/spapr_nvdimm.h +++ b/include/hw/ppc/spapr_nvdimm.h @@ -11,19 +11,9 @@ #define HW_SPAPR_NVDIMM_H #include "hw/mem/nvdimm.h" -#include "hw/ppc/spapr.h" -/* - * The nvdimm size should be aligned to SCM block size. - * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE - * inorder to have SCM regions not to overlap with dimm memory regions. - * The SCM devices can have variable block sizes. For now, fixing the - * block size to the minimum value. - */ -#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE - -/* Have an explicit check for alignment */ -QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); +typedef struct SpaprDrc SpaprDrc; +typedef struct SpaprMachineState SpaprMachineState; int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, void *fdt, int *fdt_start_offset, Error **errp); diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h index 48b716a060c..c3e8b98e7e2 100644 --- a/include/hw/ppc/spapr_ovec.h +++ b/include/hw/ppc/spapr_ovec.h @@ -49,6 +49,7 @@ typedef struct SpaprOptionVector SpaprOptionVector; /* option vector 5 */ #define OV5_DRCONF_MEMORY OV_BIT(2, 2) #define OV5_FORM1_AFFINITY OV_BIT(5, 0) +#define OV5_FORM2_AFFINITY OV_BIT(5, 2) #define OV5_HP_EVT OV_BIT(6, 5) #define OV5_HPT_RESIZE OV_BIT(6, 7) #define OV5_DRMEM_V2 OV_BIT(22, 0) diff --git a/include/hw/ppc/vof.h b/include/hw/ppc/vof.h new file mode 100644 index 00000000000..97fdef758bf --- /dev/null +++ b/include/hw/ppc/vof.h @@ -0,0 +1,60 @@ +/* + * Virtual Open Firmware + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_VOF_H +#define HW_VOF_H + +typedef struct Vof { + uint64_t top_addr; /* copied from rma_size */ + GArray *claimed; /* array of SpaprOfClaimed */ + uint64_t claimed_base; + GHashTable *of_instances; /* ihandle -> SpaprOfInstance */ + uint32_t of_instance_last; + char *bootargs; + long fw_size; +} Vof; + +int vof_client_call(MachineState *ms, Vof *vof, void *fdt, + target_ulong args_real); +uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, uint64_t align); +void vof_init(Vof *vof, uint64_t top_addr, Error **errp); +void vof_cleanup(Vof *vof); +void vof_build_dt(void *fdt, Vof *vof); +uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename, + const char *prop, const char *path); + +#define TYPE_VOF_MACHINE_IF "vof-machine-if" + +typedef struct VofMachineIfClass VofMachineIfClass; +DECLARE_CLASS_CHECKERS(VofMachineIfClass, VOF_MACHINE, TYPE_VOF_MACHINE_IF) + +struct VofMachineIfClass { + InterfaceClass parent; + target_ulong (*client_architecture_support)(MachineState *ms, CPUState *cs, + target_ulong vec); + void (*quiesce)(MachineState *ms); + bool (*setprop)(MachineState *ms, const char *path, const char *propname, + void *val, int vallen); +}; + +/* + * Initial stack size is from + * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#REF27292 + * + * "Client programs shall be invoked with a valid stack pointer (r1) with + * at least 32K bytes of memory available for stack growth". + */ +#define VOF_STACK_SIZE 0x8000 + +#define VOF_MEM_READ(pa, buf, size) \ + address_space_read(&address_space_memory, \ + (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size)) +#define VOF_MEM_WRITE(pa, buf, size) \ + address_space_write(&address_space_memory, \ + (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size)) + +#define PROM_ERROR (~0U) + +#endif /* HW_VOF_H */ diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 445eccfe6b7..b8ab0bf7490 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -261,6 +261,10 @@ static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno) #define XIVE_ESB_QUEUED (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q) #define XIVE_ESB_OFF XIVE_ESB_VAL_Q +bool xive_esb_trigger(uint8_t *pq); +bool xive_esb_eoi(uint8_t *pq); +uint8_t xive_esb_set(uint8_t *pq, uint8_t value); + /* * "magic" Event State Buffer (ESB) MMIO offsets. * @@ -282,6 +286,30 @@ static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno) uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno); uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq); +/* + * Source status helpers + */ +static inline void xive_source_set_status(XiveSource *xsrc, uint32_t srcno, + uint8_t status, bool enable) +{ + if (enable) { + xsrc->status[srcno] |= status; + } else { + xsrc->status[srcno] &= ~status; + } +} + +static inline void xive_source_set_asserted(XiveSource *xsrc, uint32_t srcno, + bool enable) +{ + xive_source_set_status(xsrc, srcno, XIVE_STATUS_ASSERTED, enable); +} + +static inline bool xive_source_is_asserted(XiveSource *xsrc, uint32_t srcno) +{ + return xsrc->status[srcno] & XIVE_STATUS_ASSERTED; +} + void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon); @@ -331,6 +359,11 @@ struct XiveTCTX { XivePresenter *xptr; }; +static inline uint32_t xive_tctx_word2(uint8_t *ring) +{ + return *((uint32_t *) &ring[TM_WORD2]); +} + /* * XIVE Router */ @@ -404,6 +437,10 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, bool cam_ignore, uint32_t logic_serv); +bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv); /* * XIVE Fabric (Interface between Interrupt Controller and Machine) @@ -450,6 +487,17 @@ struct XiveENDSource { */ #define XIVE_PRIORITY_MAX 7 +/* + * Convert a priority number to an Interrupt Pending Buffer (IPB) + * register, which indicates a pending interrupt at the priority + * corresponding to the bit number + */ +static inline uint8_t xive_priority_to_ipb(uint8_t priority) +{ + return priority > XIVE_PRIORITY_MAX ? + 0 : 1 << (XIVE_PRIORITY_MAX - priority); +} + /* * XIVE Thread Interrupt Management Aera (TIMA) * diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index bafc311bfa1..20d3066595e 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -26,6 +26,7 @@ typedef enum DeviceCategory { DEVICE_CATEGORY_SOUND, DEVICE_CATEGORY_MISC, DEVICE_CATEGORY_CPU, + DEVICE_CATEGORY_WATCHDOG, DEVICE_CATEGORY_MAX } DeviceCategory; @@ -176,11 +177,12 @@ struct DeviceState { Object parent_obj; /*< public >*/ - const char *id; + char *id; char *canonical_path; bool realized; bool pending_deleted_event; - QemuOpts *opts; + int64_t pending_deleted_expires_ms; + QDict *opts; int hotplugged; bool allow_unplug_during_migration; BusState *parent_bus; @@ -201,8 +203,12 @@ struct DeviceListener { * informs qdev if a device should be visible or hidden. We can * hide a failover device depending for example on the device * opts. + * + * On errors, it returns false and errp is set. Device creation + * should fail in this case. */ - bool (*hide_device)(DeviceListener *listener, QemuOpts *device_opts); + bool (*hide_device)(DeviceListener *listener, const QDict *device_opts, + bool from_json, Error **errp); QTAILQ_ENTRY(DeviceListener) link; }; @@ -264,6 +270,7 @@ struct BusState { HotplugHandler *hotplug_handler; int max_index; bool realized; + bool full; int num_children; /* @@ -597,6 +604,10 @@ void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n); * * See qdev_connect_gpio_out() for how code that uses such a device * can connect to one of its output GPIO lines. + * + * There is no need to release the @pins allocated array because it + * will be automatically released when @dev calls its instance_finalize() + * handler. */ void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n); /** @@ -673,9 +684,9 @@ DeviceState *qdev_find_recursive(BusState *bus, const char *id); typedef int (qbus_walkerfn)(BusState *bus, void *opaque); typedef int (qdev_walkerfn)(DeviceState *dev, void *opaque); -void qbus_create_inplace(void *bus, size_t size, const char *typename, - DeviceState *parent, const char *name); -BusState *qbus_create(const char *typename, DeviceState *parent, const char *name); +void qbus_init(void *bus, size_t size, const char *typename, + DeviceState *parent, const char *name); +BusState *qbus_new(const char *typename, DeviceState *parent, const char *name); bool qbus_realize(BusState *bus, Error **errp); void qbus_unrealize(BusState *bus); @@ -798,18 +809,43 @@ static inline bool qbus_is_hotpluggable(BusState *bus) return bus->hotplug_handler; } +/** + * qbus_mark_full: Mark this bus as full, so no more devices can be attached + * @bus: Bus to mark as full + * + * By default, QEMU will allow devices to be plugged into a bus up + * to the bus class's device count limit. Calling this function + * marks a particular bus as full, so that no more devices can be + * plugged into it. In particular this means that the bus will not + * be considered as a candidate for plugging in devices created by + * the user on the commandline or via the monitor. + * If a machine has multiple buses of a given type, such as I2C, + * where some of those buses in the real hardware are used only for + * internal devices and some are exposed via expansion ports, you + * can use this function to mark the internal-only buses as full + * after you have created all their internal devices. Then user + * created devices will appear on the expansion-port bus where + * guest software expects them. + */ +static inline void qbus_mark_full(BusState *bus) +{ + bus->full = true; +} + void device_listener_register(DeviceListener *listener); void device_listener_unregister(DeviceListener *listener); /** * @qdev_should_hide_device: - * @opts: QemuOpts as passed on cmdline. + * @opts: options QDict + * @from_json: true if @opts entries are typed, false for all strings + * @errp: pointer to error object * * Check if a device should be added. * When a device is added via qdev_device_add() this will be called, * and return if the device should be added now or not. */ -bool qdev_should_hide_device(QemuOpts *opts); +bool qdev_should_hide_device(const QDict *opts, bool from_json, Error **errp); typedef enum MachineInitPhase { /* current_machine is NULL. */ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 0ef97d60ce4..f7925f67d03 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -32,6 +32,7 @@ struct PropertyInfo { const char *name; const char *description; const QEnumLookup *enum_table; + bool realized_set_allowed; /* allow setting property on realized device */ int (*print)(Object *obj, Property *prop, char *dest, size_t len); void (*set_default_value)(ObjectProperty *op, const Property *prop); ObjectProperty *(*create)(ObjectClass *oc, const char *name, diff --git a/include/hw/rdma/rdma.h b/include/hw/rdma/rdma.h index e77e43a1709..80b2e531c47 100644 --- a/include/hw/rdma/rdma.h +++ b/include/hw/rdma/rdma.h @@ -31,7 +31,7 @@ typedef struct RdmaProvider RdmaProvider; struct RdmaProviderClass { InterfaceClass parent; - void (*print_statistics)(Monitor *mon, RdmaProvider *obj); + void (*format_statistics)(RdmaProvider *obj, GString *buf); }; #endif diff --git a/include/hw/register.h b/include/hw/register.h index b480e3882cd..6a076cfcdf0 100644 --- a/include/hw/register.h +++ b/include/hw/register.h @@ -204,6 +204,14 @@ RegisterInfoArray *register_init_block32(DeviceState *owner, bool debug_enabled, uint64_t memory_size); +RegisterInfoArray *register_init_block64(DeviceState *owner, + const RegisterAccessInfo *rae, + int num, RegisterInfo *ri, + uint64_t *data, + const MemoryRegionOps *ops, + bool debug_enabled, + uint64_t memory_size); + /** * This function should be called to cleanup the registers that were initialized * when calling register_init_block32(). This function should only be called diff --git a/include/hw/registerfields.h b/include/hw/registerfields.h index 93fa4a84c22..f2a3c9c41f7 100644 --- a/include/hw/registerfields.h +++ b/include/hw/registerfields.h @@ -30,6 +30,10 @@ enum { A_ ## reg = (addr) }; \ enum { R_ ## reg = (addr) / 2 }; +#define REG64(reg, addr) \ + enum { A_ ## reg = (addr) }; \ + enum { R_ ## reg = (addr) / 8 }; + /* Define SHIFT, LENGTH and MASK constants for a field within a register */ /* This macro will define R_FOO_BAR_MASK, R_FOO_BAR_SHIFT and R_FOO_BAR_LENGTH @@ -58,6 +62,8 @@ /* Extract a field from an array of registers */ #define ARRAY_FIELD_EX32(regs, reg, field) \ FIELD_EX32((regs)[R_ ## reg], reg, field) +#define ARRAY_FIELD_EX64(regs, reg, field) \ + FIELD_EX64((regs)[R_ ## reg], reg, field) /* Deposit a register field. * Assigning values larger then the target field will result in @@ -89,7 +95,7 @@ _d; }) #define FIELD_DP64(storage, reg, field, val) ({ \ struct { \ - unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + uint64_t v:R_ ## reg ## _ ## field ## _LENGTH; \ } _v = { .v = val }; \ uint64_t _d; \ _d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT, \ @@ -99,5 +105,7 @@ /* Deposit a field to array of registers. */ #define ARRAY_FIELD_DP32(regs, reg, field, val) \ (regs)[R_ ## reg] = FIELD_DP32((regs)[R_ ## reg], reg, field, val); +#define ARRAY_FIELD_DP64(regs, reg, field, val) \ + (regs)[R_ ## reg] = FIELD_DP64((regs)[R_ ## reg], reg, field, val); #endif diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h index 182322d7ebf..81f5445b21f 100644 --- a/include/hw/riscv/boot.h +++ b/include/hw/riscv/boot.h @@ -24,8 +24,15 @@ #include "hw/loader.h" #include "hw/riscv/riscv_hart.h" +#define RISCV32_BIOS_BIN "opensbi-riscv32-generic-fw_dynamic.bin" +#define RISCV32_BIOS_ELF "opensbi-riscv32-generic-fw_dynamic.elf" +#define RISCV64_BIOS_BIN "opensbi-riscv64-generic-fw_dynamic.bin" +#define RISCV64_BIOS_ELF "opensbi-riscv64-generic-fw_dynamic.elf" + bool riscv_is_32bit(RISCVHartArrayState *harts); +char *riscv_plic_hart_config_string(int hart_count); + target_ulong riscv_calc_kernel_start_addr(RISCVHartArrayState *harts, target_ulong firmware_end_addr); QEMU_WARN_UNUSED_RESULT diff --git a/include/hw/riscv/microchip_pfsoc.h b/include/hw/riscv/microchip_pfsoc.h index d30916f45d4..a0673f5f59c 100644 --- a/include/hw/riscv/microchip_pfsoc.h +++ b/include/hw/riscv/microchip_pfsoc.h @@ -138,7 +138,6 @@ enum { #define MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT 1 #define MICROCHIP_PFSOC_COMPUTE_CPU_COUNT 4 -#define MICROCHIP_PFSOC_PLIC_HART_CONFIG "MS" #define MICROCHIP_PFSOC_PLIC_NUM_SOURCES 185 #define MICROCHIP_PFSOC_PLIC_NUM_PRIORITIES 7 #define MICROCHIP_PFSOC_PLIC_PRIORITY_BASE 0x04 diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h index a5ea3a5e4e5..eac35ef5905 100644 --- a/include/hw/riscv/opentitan.h +++ b/include/hw/riscv/opentitan.h @@ -20,8 +20,9 @@ #define HW_OPENTITAN_H #include "hw/riscv/riscv_hart.h" -#include "hw/intc/ibex_plic.h" +#include "hw/intc/sifive_plic.h" #include "hw/char/ibex_uart.h" +#include "hw/timer/ibex_timer.h" #include "qom/object.h" #define TYPE_RISCV_IBEX_SOC "riscv.lowrisc.ibex.soc" @@ -33,11 +34,13 @@ struct LowRISCIbexSoCState { /*< public >*/ RISCVHartArrayState cpus; - IbexPlicState plic; + SiFivePLICState plic; IbexUartState uart; + IbexTimerState timer; MemoryRegion flash_mem; MemoryRegion rom; + MemoryRegion flash_alias; }; typedef struct OpenTitanState { @@ -52,12 +55,13 @@ enum { IBEX_DEV_ROM, IBEX_DEV_RAM, IBEX_DEV_FLASH, + IBEX_DEV_FLASH_VIRTUAL, IBEX_DEV_UART, IBEX_DEV_GPIO, IBEX_DEV_SPI, IBEX_DEV_I2C, IBEX_DEV_PATTGEN, - IBEX_DEV_RV_TIMER, + IBEX_DEV_TIMER, IBEX_DEV_SENSOR_CTRL, IBEX_DEV_OTP_CTRL, IBEX_DEV_PWRMGR, @@ -79,17 +83,19 @@ enum { IBEX_DEV_ALERT_HANDLER, IBEX_DEV_NMI_GEN, IBEX_DEV_OTBN, + IBEX_DEV_PERI, }; enum { - IBEX_UART_RX_PARITY_ERR_IRQ = 0x28, - IBEX_UART_RX_TIMEOUT_IRQ = 0x27, - IBEX_UART_RX_BREAK_ERR_IRQ = 0x26, - IBEX_UART_RX_FRAME_ERR_IRQ = 0x25, - IBEX_UART_RX_OVERFLOW_IRQ = 0x24, - IBEX_UART_TX_EMPTY_IRQ = 0x23, - IBEX_UART_RX_WATERMARK_IRQ = 0x22, - IBEX_UART_TX_WATERMARK_IRQ = 0x21, + IBEX_TIMER_TIMEREXPIRED0_0 = 126, + IBEX_UART0_RX_PARITY_ERR_IRQ = 8, + IBEX_UART0_RX_TIMEOUT_IRQ = 7, + IBEX_UART0_RX_BREAK_ERR_IRQ = 6, + IBEX_UART0_RX_FRAME_ERR_IRQ = 5, + IBEX_UART0_RX_OVERFLOW_IRQ = 4, + IBEX_UART0_TX_EMPTY_IRQ = 3, + IBEX_UART0_RX_WATERMARK_IRQ = 2, + IBEX_UART0_TX_WATERMARK_IRQ = 1, }; #endif diff --git a/include/hw/riscv/shakti_c.h b/include/hw/riscv/shakti_c.h new file mode 100644 index 00000000000..50a2b790860 --- /dev/null +++ b/include/hw/riscv/shakti_c.h @@ -0,0 +1,75 @@ +/* + * Shakti C-class SoC emulation + * + * Copyright (c) 2021 Vijai Kumar K + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef HW_SHAKTI_H +#define HW_SHAKTI_H + +#include "hw/riscv/riscv_hart.h" +#include "hw/boards.h" +#include "hw/char/shakti_uart.h" + +#define TYPE_RISCV_SHAKTI_SOC "riscv.shakti.cclass.soc" +#define RISCV_SHAKTI_SOC(obj) \ + OBJECT_CHECK(ShaktiCSoCState, (obj), TYPE_RISCV_SHAKTI_SOC) + +typedef struct ShaktiCSoCState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + RISCVHartArrayState cpus; + DeviceState *plic; + ShaktiUartState uart; + MemoryRegion rom; + +} ShaktiCSoCState; + +#define TYPE_RISCV_SHAKTI_MACHINE MACHINE_TYPE_NAME("shakti_c") +#define RISCV_SHAKTI_MACHINE(obj) \ + OBJECT_CHECK(ShaktiCMachineState, (obj), TYPE_RISCV_SHAKTI_MACHINE) +typedef struct ShaktiCMachineState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + ShaktiCSoCState soc; +} ShaktiCMachineState; + +enum { + SHAKTI_C_ROM, + SHAKTI_C_RAM, + SHAKTI_C_UART, + SHAKTI_C_GPIO, + SHAKTI_C_PLIC, + SHAKTI_C_CLINT, + SHAKTI_C_I2C, +}; + +#define SHAKTI_C_PLIC_HART_CONFIG "MS" +/* Including Interrupt ID 0 (no interrupt)*/ +#define SHAKTI_C_PLIC_NUM_SOURCES 28 +/* Excluding Priority 0 */ +#define SHAKTI_C_PLIC_NUM_PRIORITIES 2 +#define SHAKTI_C_PLIC_PRIORITY_BASE 0x04 +#define SHAKTI_C_PLIC_PENDING_BASE 0x1000 +#define SHAKTI_C_PLIC_ENABLE_BASE 0x2000 +#define SHAKTI_C_PLIC_ENABLE_STRIDE 0x80 +#define SHAKTI_C_PLIC_CONTEXT_BASE 0x200000 +#define SHAKTI_C_PLIC_CONTEXT_STRIDE 0x1000 + +#endif diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h index 2656b39808a..8f63a183c47 100644 --- a/include/hw/riscv/sifive_u.h +++ b/include/hw/riscv/sifive_u.h @@ -27,6 +27,7 @@ #include "hw/misc/sifive_u_otp.h" #include "hw/misc/sifive_u_prci.h" #include "hw/ssi/sifive_spi.h" +#include "hw/timer/sifive_pwm.h" #define TYPE_RISCV_U_SOC "riscv.sifive.u.soc" #define RISCV_U_SOC(obj) \ @@ -49,6 +50,7 @@ typedef struct SiFiveUSoCState { SiFiveSPIState spi0; SiFiveSPIState spi2; CadenceGEMState gem; + SiFivePwmState pwm[2]; uint32_t serial; char *cpu_type; @@ -92,7 +94,9 @@ enum { SIFIVE_U_DEV_FLASH0, SIFIVE_U_DEV_DRAM, SIFIVE_U_DEV_GEM, - SIFIVE_U_DEV_GEM_MGMT + SIFIVE_U_DEV_GEM_MGMT, + SIFIVE_U_DEV_PWM0, + SIFIVE_U_DEV_PWM1 }; enum { @@ -126,6 +130,14 @@ enum { SIFIVE_U_PDMA_IRQ5 = 28, SIFIVE_U_PDMA_IRQ6 = 29, SIFIVE_U_PDMA_IRQ7 = 30, + SIFIVE_U_PWM0_IRQ0 = 42, + SIFIVE_U_PWM0_IRQ1 = 43, + SIFIVE_U_PWM0_IRQ2 = 44, + SIFIVE_U_PWM0_IRQ3 = 45, + SIFIVE_U_PWM1_IRQ0 = 46, + SIFIVE_U_PWM1_IRQ1 = 47, + SIFIVE_U_PWM1_IRQ2 = 48, + SIFIVE_U_PWM1_IRQ3 = 49, SIFIVE_U_QSPI0_IRQ = 51, SIFIVE_U_GEM_IRQ = 53 }; @@ -144,7 +156,6 @@ enum { #define SIFIVE_U_MANAGEMENT_CPU_COUNT 1 #define SIFIVE_U_COMPUTE_CPU_COUNT 4 -#define SIFIVE_U_PLIC_HART_CONFIG "MS" #define SIFIVE_U_PLIC_NUM_SOURCES 54 #define SIFIVE_U_PLIC_NUM_PRIORITIES 7 #define SIFIVE_U_PLIC_PRIORITY_BASE 0x04 diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h index 349fee1f897..b8ef99f3489 100644 --- a/include/hw/riscv/virt.h +++ b/include/hw/riscv/virt.h @@ -43,6 +43,7 @@ struct RISCVVirtState { FWCfgState *fw_cfg; int fdt_size; + bool have_aclint; }; enum { @@ -51,6 +52,7 @@ enum { VIRT_TEST, VIRT_RTC, VIRT_CLINT, + VIRT_ACLINT_SSWI, VIRT_PLIC, VIRT_UART0, VIRT_VIRTIO, @@ -71,7 +73,6 @@ enum { VIRTIO_NDEV = 0x35 /* Arbitrary maximum number of interrupts */ }; -#define VIRT_PLIC_HART_CONFIG "MS" #define VIRT_PLIC_NUM_SOURCES 127 #define VIRT_PLIC_NUM_PRIORITIES 7 #define VIRT_PLIC_PRIORITY_BASE 0x04 diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h index bba7593d2ea..75e53816135 100644 --- a/include/hw/s390x/css.h +++ b/include/hw/s390x/css.h @@ -138,13 +138,16 @@ struct SubchDev { int (*ccw_cb) (SubchDev *, CCW1); void (*disable_cb)(SubchDev *); IOInstEnding (*do_subchannel_work) (SubchDev *); + void (*irb_cb)(SubchDev *, IRB *); SenseId id; void *driver_data; + ESW esw; }; static inline void sch_gen_unit_exception(SubchDev *sch) { - sch->curr_status.scsw.ctrl &= ~SCSW_ACTL_START_PEND; + sch->curr_status.scsw.ctrl &= ~(SCSW_ACTL_DEVICE_ACTIVE | + SCSW_ACTL_SUBCH_ACTIVE); sch->curr_status.scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | SCSW_STCTL_ALERT | @@ -201,6 +204,7 @@ int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id); unsigned int css_find_free_chpid(uint8_t cssid); uint16_t css_build_subchannel_id(SubchDev *sch); void copy_scsw_to_guest(SCSW *dest, const SCSW *src); +void copy_esw_to_guest(ESW *dest, const ESW *src); void css_inject_io_interrupt(SubchDev *sch); void css_reset(void); void css_reset_sch(SubchDev *sch); @@ -215,6 +219,8 @@ void css_clear_sei_pending(void); IOInstEnding s390_ccw_cmd_request(SubchDev *sch); IOInstEnding do_subchannel_work_virtual(SubchDev *sub); IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); +void build_irb_passthrough(SubchDev *sch, IRB *irb); +void build_irb_virtual(SubchDev *sch, IRB *irb); int s390_ccw_halt(SubchDev *sch); int s390_ccw_clear(SubchDev *sch); diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h index c6737a30d44..3771fff9d44 100644 --- a/include/hw/s390x/ioinst.h +++ b/include/hw/s390x/ioinst.h @@ -123,10 +123,20 @@ typedef struct SCHIB { uint8_t mda[4]; } QEMU_PACKED SCHIB; +/* format-0 extended-status word */ +typedef struct ESW { + uint32_t word0; /* subchannel logout for format 0 */ + uint32_t erw; + uint64_t word2; /* failing-storage address for format 0 */ + uint32_t word4; /* secondary-CCW address for format 0 */ +} QEMU_PACKED ESW; + +#define ESW_ERW_SENSE 0x01000000 + /* interruption response block */ typedef struct IRB { SCSW scsw; - uint32_t esw[5]; + ESW esw; uint32_t ecw[8]; uint32_t emw[8]; } IRB; diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h index 49ae9f03d31..aa891c178dd 100644 --- a/include/hw/s390x/s390-pci-bus.h +++ b/include/hw/s390x/s390-pci-bus.h @@ -81,9 +81,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(S390PCIIOMMU, S390_PCI_IOMMU) #define ZPCI_SDMA_ADDR 0x100000000ULL #define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1 << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) #define PAGE_DEFAULT_ACC 0 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) @@ -137,7 +134,7 @@ enum ZpciIoatDtype { #define ZPCI_TABLE_BITS 11 #define ZPCI_PT_BITS 8 -#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) +#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + TARGET_PAGE_BITS) #define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) #define ZPCI_RTE_FLAG_MASK 0x3fffULL diff --git a/include/hw/s390x/storage-keys.h b/include/hw/s390x/storage-keys.h index 2888d42d0b4..aa2ec2aae50 100644 --- a/include/hw/s390x/storage-keys.h +++ b/include/hw/s390x/storage-keys.h @@ -28,9 +28,72 @@ struct S390SKeysState { struct S390SKeysClass { DeviceClass parent_class; - int (*skeys_enabled)(S390SKeysState *ks); + + /** + * @skeys_are_enabled: + * + * Check whether storage keys are enabled. If not enabled, they were not + * enabled lazily either by the guest via a storage key instruction or + * by the host during migration. + * + * If disabled, everything not explicitly triggered by the guest, + * such as outgoing migration or dirty/change tracking, should not touch + * storage keys and should not lazily enable it. + * + * @ks: the #S390SKeysState + * + * Returns false if not enabled and true if enabled. + */ + bool (*skeys_are_enabled)(S390SKeysState *ks); + + /** + * @enable_skeys: + * + * Lazily enable storage keys. If this function is not implemented, + * setting a storage key will lazily enable storage keys implicitly + * instead. TCG guests have to make sure to flush the TLB of all CPUs + * if storage keys were not enabled before this call. + * + * @ks: the #S390SKeysState + * + * Returns false if not enabled before this call, and true if already + * enabled. + */ + bool (*enable_skeys)(S390SKeysState *ks); + + /** + * @get_skeys: + * + * Get storage keys for the given PFN range. This call will fail if + * storage keys have not been lazily enabled yet. + * + * Callers have to validate that a GFN is valid before this call. + * + * @ks: the #S390SKeysState + * @start_gfn: the start GFN to get storage keys for + * @count: the number of storage keys to get + * @keys: the byte array where storage keys will be stored to + * + * Returns 0 on success, returns an error if getting a storage key failed. + */ int (*get_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count, uint8_t *keys); + /** + * @set_skeys: + * + * Set storage keys for the given PFN range. This call will fail if + * storage keys have not been lazily enabled yet and implicit + * enablement is not supported. + * + * Callers have to validate that a GFN is valid before this call. + * + * @ks: the #S390SKeysState + * @start_gfn: the start GFN to set storage keys for + * @count: the number of storage keys to set + * @keys: the byte array where storage keys will be read from + * + * Returns 0 on success, returns an error if setting a storage key failed. + */ int (*set_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count, uint8_t *keys); }; diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h index ff3195a4bf8..0935e850891 100644 --- a/include/hw/s390x/tod.h +++ b/include/hw/s390x/tod.h @@ -12,7 +12,7 @@ #define HW_S390_TOD_H #include "hw/qdev-core.h" -#include "target/s390x/s390-tod.h" +#include "tcg/s390-tod.h" #include "qom/object.h" typedef struct S390TOD { diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h index aada3680b75..b1ec27612f6 100644 --- a/include/hw/scsi/esp.h +++ b/include/hw/scsi/esp.h @@ -37,6 +37,7 @@ struct ESPState { SCSIRequest *current_req; Fifo8 cmdfifo; uint8_t cmdfifo_cdb_offset; + uint8_t lun; uint32_t do_cmd; bool data_in_ready; diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index 0b726bc78c6..a567a5ed86b 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -146,8 +146,34 @@ struct SCSIBus { const SCSIBusInfo *info; }; -void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, - const SCSIBusInfo *info, const char *bus_name); +/** + * scsi_bus_init_named: Initialize a SCSI bus with the specified name + * @bus: SCSIBus object to initialize + * @bus_size: size of @bus object + * @host: Device which owns the bus (generally the SCSI controller) + * @info: structure defining callbacks etc for the controller + * @bus_name: Name to use for this bus + * + * This in-place initializes @bus as a new SCSI bus with a name + * provided by the caller. It is the caller's responsibility to make + * sure that name does not clash with the name of any other bus in the + * system. Unless you need the new bus to have a specific name, you + * should use scsi_bus_new() instead. + */ +void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, + const SCSIBusInfo *info, const char *bus_name); + +/** + * scsi_bus_init: Initialize a SCSI bus + * + * This in-place-initializes @bus as a new SCSI bus and gives it + * an automatically generated unique name. + */ +static inline void scsi_bus_init(SCSIBus *bus, size_t bus_size, + DeviceState *host, const SCSIBusInfo *info) +{ + scsi_bus_init_named(bus, bus_size, host, info, NULL); +} static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) { diff --git a/include/hw/sd/npcm7xx_sdhci.h b/include/hw/sd/npcm7xx_sdhci.h new file mode 100644 index 00000000000..d728f0a40de --- /dev/null +++ b/include/hw/sd/npcm7xx_sdhci.h @@ -0,0 +1,65 @@ +/* + * NPCM7xx SD-3.0 / eMMC-4.51 Host Controller + * + * Copyright (c) 2021 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef NPCM7XX_SDHCI_H +#define NPCM7XX_SDHCI_H + +#include "hw/sd/sdhci.h" +#include "qom/object.h" + +#define TYPE_NPCM7XX_SDHCI "npcm7xx.sdhci" +#define NPCM7XX_PRSTVALS_SIZE 6 +#define NPCM7XX_PRSTVALS 0x60 +#define NPCM7XX_PRSTVALS_0 0x0 +#define NPCM7XX_PRSTVALS_1 0x2 +#define NPCM7XX_PRSTVALS_2 0x4 +#define NPCM7XX_PRSTVALS_3 0x6 +#define NPCM7XX_PRSTVALS_4 0x8 +#define NPCM7XX_PRSTVALS_5 0xA +#define NPCM7XX_BOOTTOCTRL 0x10 +#define NPCM7XX_SDHCI_REGSIZE 0x20 + +#define NPCM7XX_PRSNTS_RESET 0x04A00000 +#define NPCM7XX_BLKGAP_RESET 0x80 +#define NPCM7XX_CAPAB_RESET 0x0100200161EE0399 +#define NPCM7XX_MAXCURR_RESET 0x0000000000000005 +#define NPCM7XX_HCVER_RESET 0x1002 + +#define NPCM7XX_PRSTVALS_0_RESET 0x0040 +#define NPCM7XX_PRSTVALS_1_RESET 0x0001 +#define NPCM7XX_PRSTVALS_3_RESET 0x0001 + +OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxSDHCIState, NPCM7XX_SDHCI) + +typedef struct NPCM7xxRegs { + /* Preset Values Register Field, read-only */ + uint16_t prstvals[NPCM7XX_PRSTVALS_SIZE]; + /* Boot Timeout Control Register, read-write */ + uint32_t boottoctrl; +} NPCM7xxRegisters; + +typedef struct NPCM7xxSDHCIState { + SysBusDevice parent; + + MemoryRegion container; + MemoryRegion iomem; + BusState *bus; + NPCM7xxRegisters regs; + + SDHCIState sdhci; +} NPCM7xxSDHCIState; + +#endif /* NPCM7XX_SDHCI_H */ diff --git a/include/hw/misc/emc141x_regs.h b/include/hw/sensor/emc141x_regs.h similarity index 100% rename from include/hw/misc/emc141x_regs.h rename to include/hw/sensor/emc141x_regs.h diff --git a/hw/misc/tmp105.h b/include/hw/sensor/tmp105.h similarity index 97% rename from hw/misc/tmp105.h rename to include/hw/sensor/tmp105.h index 7c97071ad75..244e2989feb 100644 --- a/hw/misc/tmp105.h +++ b/include/hw/sensor/tmp105.h @@ -15,7 +15,7 @@ #define QEMU_TMP105_H #include "hw/i2c/i2c.h" -#include "hw/misc/tmp105_regs.h" +#include "hw/sensor/tmp105_regs.h" #include "qom/object.h" #define TYPE_TMP105 "tmp105" diff --git a/include/hw/misc/tmp105_regs.h b/include/hw/sensor/tmp105_regs.h similarity index 100% rename from include/hw/misc/tmp105_regs.h rename to include/hw/sensor/tmp105_regs.h diff --git a/include/hw/sh4/sh.h b/include/hw/sh4/sh.h index becb5969790..ec716cdd458 100644 --- a/include/hw/sh4/sh.h +++ b/include/hw/sh4/sh.h @@ -44,25 +44,18 @@ typedef struct { uint16_t portbmask_trigger; /* Return 0 if no action was taken */ int (*port_change_cb) (uint16_t porta, uint16_t portb, - uint16_t * periph_pdtra, - uint16_t * periph_portdira, - uint16_t * periph_pdtrb, - uint16_t * periph_portdirb); + uint16_t *periph_pdtra, + uint16_t *periph_portdira, + uint16_t *periph_pdtrb, + uint16_t *periph_portdirb); } sh7750_io_device; int sh7750_register_io_device(struct SH7750State *s, - sh7750_io_device * device); + sh7750_io_device *device); /* sh_serial.c */ +#define TYPE_SH_SERIAL "sh-serial" #define SH_SERIAL_FEAT_SCIF (1 << 0) -void sh_serial_init(MemoryRegion *sysmem, - hwaddr base, int feat, - uint32_t freq, Chardev *chr, - qemu_irq eri_source, - qemu_irq rxi_source, - qemu_irq txi_source, - qemu_irq tei_source, - qemu_irq bri_source); /* sh7750.c */ qemu_irq sh7750_irl(struct SH7750State *s); diff --git a/include/hw/sh4/sh_intc.h b/include/hw/sh4/sh_intc.h index 65f34250572..f62d5c5e136 100644 --- a/include/hw/sh4/sh_intc.h +++ b/include/hw/sh4/sh_intc.h @@ -58,7 +58,7 @@ struct intc_desc { }; int sh_intc_get_pending_vector(struct intc_desc *desc, int imask); -struct intc_source *sh_intc_source(struct intc_desc *desc, intc_enum id); + void sh_intc_toggle_source(struct intc_source *source, int enable_adj, int assert_adj); diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h index 16c03fe64f3..e2655558199 100644 --- a/include/hw/ssi/aspeed_smc.h +++ b/include/hw/ssi/aspeed_smc.h @@ -29,64 +29,33 @@ #include "hw/sysbus.h" #include "qom/object.h" -typedef struct AspeedSegments { - hwaddr addr; - uint32_t size; -} AspeedSegments; - struct AspeedSMCState; -typedef struct AspeedSMCController { - const char *name; - uint8_t r_conf; - uint8_t r_ce_ctrl; - uint8_t r_ctrl0; - uint8_t r_timings; - uint8_t nregs_timings; - uint8_t conf_enable_w0; - uint8_t max_peripherals; - const AspeedSegments *segments; - hwaddr flash_window_base; - uint32_t flash_window_size; - bool has_dma; - hwaddr dma_flash_mask; - hwaddr dma_dram_mask; - uint32_t nregs; - uint32_t (*segment_to_reg)(const struct AspeedSMCState *s, - const AspeedSegments *seg); - void (*reg_to_segment)(const struct AspeedSMCState *s, uint32_t reg, - AspeedSegments *seg); -} AspeedSMCController; -typedef struct AspeedSMCFlash { - struct AspeedSMCState *controller; +#define TYPE_ASPEED_SMC_FLASH "aspeed.smc.flash" +OBJECT_DECLARE_SIMPLE_TYPE(AspeedSMCFlash, ASPEED_SMC_FLASH) +struct AspeedSMCFlash { + SysBusDevice parent_obj; - uint8_t id; - uint32_t size; + struct AspeedSMCState *controller; + uint8_t cs; MemoryRegion mmio; - DeviceState *flash; -} AspeedSMCFlash; +}; #define TYPE_ASPEED_SMC "aspeed.smc" OBJECT_DECLARE_TYPE(AspeedSMCState, AspeedSMCClass, ASPEED_SMC) -struct AspeedSMCClass { - SysBusDevice parent_obj; - const AspeedSMCController *ctrl; -}; - #define ASPEED_SMC_R_MAX (0x100 / 4) +#define ASPEED_SMC_CS_MAX 5 struct AspeedSMCState { SysBusDevice parent_obj; - const AspeedSMCController *ctrl; - MemoryRegion mmio; + MemoryRegion mmio_flash_container; MemoryRegion mmio_flash; qemu_irq irq; - int irqline; uint32_t num_cs; qemu_irq *cs_lines; @@ -103,17 +72,45 @@ struct AspeedSMCState { uint8_t r_timings; uint8_t conf_enable_w0; - /* for DMA support */ - uint64_t sdram_base; - AddressSpace flash_as; MemoryRegion *dram_mr; AddressSpace dram_as; - AspeedSMCFlash *flashes; + AspeedSMCFlash flashes[ASPEED_SMC_CS_MAX]; uint8_t snoop_index; uint8_t snoop_dummies; }; +typedef struct AspeedSegments { + hwaddr addr; + uint32_t size; +} AspeedSegments; + +struct AspeedSMCClass { + SysBusDeviceClass parent_obj; + + uint8_t r_conf; + uint8_t r_ce_ctrl; + uint8_t r_ctrl0; + uint8_t r_timings; + uint8_t nregs_timings; + uint8_t conf_enable_w0; + uint8_t max_peripherals; + const uint32_t *resets; + const AspeedSegments *segments; + hwaddr flash_window_base; + uint32_t flash_window_size; + uint32_t features; + hwaddr dma_flash_mask; + hwaddr dma_dram_mask; + uint32_t nregs; + uint32_t (*segment_to_reg)(const AspeedSMCState *s, + const AspeedSegments *seg); + void (*reg_to_segment)(const AspeedSMCState *s, uint32_t reg, + AspeedSegments *seg); + void (*dma_ctrl)(AspeedSMCState *s, uint32_t value); + int (*addr_width)(const AspeedSMCState *s); +}; + #endif /* ASPEED_SMC_H */ diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h index 84496faaf96..ee09b138810 100644 --- a/include/hw/timer/armv7m_systick.h +++ b/include/hw/timer/armv7m_systick.h @@ -15,11 +15,23 @@ #include "hw/sysbus.h" #include "qom/object.h" #include "hw/ptimer.h" +#include "hw/clock.h" #define TYPE_SYSTICK "armv7m_systick" OBJECT_DECLARE_SIMPLE_TYPE(SysTickState, SYSTICK) +/* + * QEMU interface: + * + sysbus MMIO region 0 is the register interface (covering + * the registers which are mapped at address 0xE000E010) + * + sysbus IRQ 0 is the interrupt line to the NVIC + * + Clock input "refclk" is the external reference clock + * (used when SYST_CSR.CLKSOURCE == 0) + * + Clock input "cpuclk" is the main CPU clock + * (used when SYST_CSR.CLKSOURCE == 1) + */ + struct SysTickState { /*< private >*/ SysBusDevice parent_obj; @@ -31,28 +43,8 @@ struct SysTickState { ptimer_state *ptimer; MemoryRegion iomem; qemu_irq irq; + Clock *refclk; + Clock *cpuclk; }; -/* - * Multiplication factor to convert from system clock ticks to qemu timer - * ticks. This should be set (by board code, usually) to a value - * equal to NANOSECONDS_PER_SECOND / frq, where frq is the clock frequency - * in Hz of the CPU. - * - * This value is used by the systick device when it is running in - * its "use the CPU clock" mode (ie when SYST_CSR.CLKSOURCE == 1) to - * set how fast the timer should tick. - * - * TODO: we should refactor this so that rather than using a global - * we use a device property or something similar. This is complicated - * because (a) the property would need to be plumbed through from the - * board code down through various layers to the systick device - * and (b) the property needs to be modifiable after realize, because - * the stellaris board uses this to implement the behaviour where the - * guest can reprogram the PLL registers to downclock the CPU, and the - * systick device needs to react accordingly. Possibly this should - * be deferred until we have a good API for modelling clock trees. - */ -extern int system_clock_scale; - #endif diff --git a/include/hw/timer/avr_timer16.h b/include/hw/timer/avr_timer16.h index 05362543378..a1a032a24dc 100644 --- a/include/hw/timer/avr_timer16.h +++ b/include/hw/timer/avr_timer16.h @@ -30,7 +30,6 @@ #include "hw/sysbus.h" #include "qemu/timer.h" -#include "hw/hw.h" #include "qom/object.h" enum NextInterrupt { diff --git a/include/hw/timer/ibex_timer.h b/include/hw/timer/ibex_timer.h new file mode 100644 index 00000000000..b6f69b38ee7 --- /dev/null +++ b/include/hw/timer/ibex_timer.h @@ -0,0 +1,54 @@ +/* + * QEMU lowRISC Ibex Timer device + * + * Copyright (c) 2021 Western Digital + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_IBEX_TIMER_H +#define HW_IBEX_TIMER_H + +#include "hw/sysbus.h" + +#define TYPE_IBEX_TIMER "ibex-timer" +OBJECT_DECLARE_SIMPLE_TYPE(IbexTimerState, IBEX_TIMER) + +struct IbexTimerState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + + uint32_t timer_ctrl; + uint32_t timer_cfg0; + uint32_t timer_compare_lower0; + uint32_t timer_compare_upper0; + uint32_t timer_intr_enable; + uint32_t timer_intr_state; + uint32_t timer_intr_test; + + uint32_t timebase_freq; + + qemu_irq irq; + + qemu_irq m_timer_irq; +}; +#endif /* HW_IBEX_TIMER_H */ diff --git a/include/hw/timer/sifive_pwm.h b/include/hw/timer/sifive_pwm.h new file mode 100644 index 00000000000..6a8cf7b29e4 --- /dev/null +++ b/include/hw/timer/sifive_pwm.h @@ -0,0 +1,62 @@ +/* + * SiFive PWM + * + * Copyright (c) 2020 Western Digital + * + * Author: Alistair Francis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HW_SIFIVE_PWM_H +#define HW_SIFIVE_PWM_H + +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qom/object.h" + +#define TYPE_SIFIVE_PWM "sifive-pwm" + +#define SIFIVE_PWM(obj) \ + OBJECT_CHECK(SiFivePwmState, (obj), TYPE_SIFIVE_PWM) + +#define SIFIVE_PWM_CHANS 4 +#define SIFIVE_PWM_IRQS SIFIVE_PWM_CHANS + +typedef struct SiFivePwmState { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion mmio; + QEMUTimer timer[SIFIVE_PWM_CHANS]; + /* + * if en bit(s) set, is the number of ticks when pwmcount was 0 + * if en bit(s) not set, is the number of ticks in pwmcount + */ + uint64_t tick_offset; + uint64_t freq_hz; + + uint32_t pwmcfg; + uint32_t pwmcmp[SIFIVE_PWM_CHANS]; + + qemu_irq irqs[SIFIVE_PWM_IRQS]; +} SiFivePwmState; + +#endif /* HW_SIFIVE_PWM_H */ diff --git a/include/hw/timer/stellaris-gptm.h b/include/hw/timer/stellaris-gptm.h new file mode 100644 index 00000000000..fde1fc6f0c7 --- /dev/null +++ b/include/hw/timer/stellaris-gptm.h @@ -0,0 +1,51 @@ +/* + * Luminary Micro Stellaris General Purpose Timer Module + * + * Copyright (c) 2006 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the GPL. + */ + +#ifndef HW_TIMER_STELLARIS_GPTM_H +#define HW_TIMER_STELLARIS_GPTM_H + +#include "qom/object.h" +#include "hw/sysbus.h" +#include "hw/irq.h" +#include "hw/clock.h" + +#define TYPE_STELLARIS_GPTM "stellaris-gptm" +OBJECT_DECLARE_SIMPLE_TYPE(gptm_state, STELLARIS_GPTM) + +/* + * QEMU interface: + * + sysbus MMIO region 0: register bank + * + sysbus IRQ 0: timer interrupt + * + unnamed GPIO output 0: trigger output for the ADC + * + Clock input "clk": the 32-bit countdown timer runs at this speed + */ +struct gptm_state { + SysBusDevice parent_obj; + + MemoryRegion iomem; + uint32_t config; + uint32_t mode[2]; + uint32_t control; + uint32_t state; + uint32_t mask; + uint32_t load[2]; + uint32_t match[2]; + uint32_t prescale[2]; + uint32_t match_prescale[2]; + uint32_t rtc; + int64_t tick[2]; + struct gptm_state *opaque[2]; + QEMUTimer *timer[2]; + /* The timers have an alternate output used to trigger the ADC. */ + qemu_irq trigger; + qemu_irq irq; + Clock *clk; +}; + +#endif diff --git a/include/hw/tricore/tricore_testdevice.h b/include/hw/tricore/tricore_testdevice.h new file mode 100644 index 00000000000..2c56c51bcb8 --- /dev/null +++ b/include/hw/tricore/tricore_testdevice.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018-2021 Bastian Koppelmann Paderborn University + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + + +#ifndef HW_TRICORE_TESTDEV_H +#define HW_TRICORE_TESTDEV_H + +#include "hw/sysbus.h" +#include "hw/hw.h" + +#define TYPE_TRICORE_TESTDEVICE "tricore_testdevice" +#define TRICORE_TESTDEVICE(obj) \ + OBJECT_CHECK(TriCoreTestDeviceState, (obj), TYPE_TRICORE_TESTDEVICE) + +typedef struct { + /* */ + SysBusDevice parent_obj; + + /* */ + MemoryRegion iomem; + +} TriCoreTestDeviceState; + +#endif diff --git a/include/hw/unicore32/puv3.h b/include/hw/unicore32/puv3.h deleted file mode 100644 index f587a1f6228..00000000000 --- a/include/hw/unicore32/puv3.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Misc PKUnity SoC declarations - * - * Copyright (C) 2010-2012 Guan Xuetao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation, or any later version. - * See the COPYING file in the top-level directory. - */ - -#ifndef QEMU_HW_PUV3_H -#define QEMU_HW_PUV3_H - -#define PUV3_REGS_OFFSET (0x1000) /* 4K is reasonable */ - -/* Hardware interrupts */ -#define PUV3_IRQS_NR (32) - -#define PUV3_IRQS_GPIOLOW0 (0) -#define PUV3_IRQS_GPIOLOW1 (1) -#define PUV3_IRQS_GPIOLOW2 (2) -#define PUV3_IRQS_GPIOLOW3 (3) -#define PUV3_IRQS_GPIOLOW4 (4) -#define PUV3_IRQS_GPIOLOW5 (5) -#define PUV3_IRQS_GPIOLOW6 (6) -#define PUV3_IRQS_GPIOLOW7 (7) -#define PUV3_IRQS_GPIOHIGH (8) -#define PUV3_IRQS_PS2_KBD (22) -#define PUV3_IRQS_PS2_AUX (23) -#define PUV3_IRQS_OST0 (26) - -/* All puv3_*.c use DPRINTF for debug. */ -#ifdef DEBUG_PUV3 -#define DPRINTF(fmt, ...) printf("%s: " fmt , __func__, ## __VA_ARGS__) -#else -#define DPRINTF(fmt, ...) do {} while (0) -#endif - -#endif /* QEMU_HW_PUV3_H */ diff --git a/include/hw/usb.h b/include/hw/usb.h index 436e07b3040..33668dd0a99 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -219,6 +219,7 @@ enum USBDeviceFlags { USB_DEV_FLAG_IS_HOST, USB_DEV_FLAG_MSOS_DESC_ENABLE, USB_DEV_FLAG_MSOS_DESC_IN_USE, + USB_DEV_FLAG_IS_SCSI_STORAGE, }; /* definition of a USB device */ @@ -465,7 +466,6 @@ void usb_generic_async_ctrl_complete(USBDevice *s, USBPacket *p); /* usb-linux.c */ void hmp_info_usbhost(Monitor *mon, const QDict *qdict); -bool usb_host_dev_is_scsi_storage(USBDevice *usbdev); /* usb ports of the VM */ @@ -561,6 +561,11 @@ const char *usb_device_get_product_desc(USBDevice *dev); const USBDesc *usb_device_get_usb_desc(USBDevice *dev); +static inline bool usb_device_is_scsi_storage(USBDevice *dev) +{ + return dev->flags & (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); +} + /* quirks.c */ /* In bulk endpoints are streaming data sources (iow behave like isoc eps) */ diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h index 40af23a0bad..a7eb5314854 100644 --- a/include/hw/usb/dwc2-regs.h +++ b/include/hw/usb/dwc2-regs.h @@ -39,8 +39,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __DWC2_HW_H__ -#define __DWC2_HW_H__ +#ifndef DWC2_HW_H +#define DWC2_HW_H #define HSOTG_REG(x) (x) diff --git a/include/hw/usb/msd.h b/include/hw/usb/msd.h index 7538c54569b..54e9f38bda4 100644 --- a/include/hw/usb/msd.h +++ b/include/hw/usb/msd.h @@ -17,7 +17,7 @@ enum USBMSDMode { USB_MSDM_CSW /* Command Status. */ }; -struct usb_msd_csw { +struct QEMU_PACKED usb_msd_csw { uint32_t sig; uint32_t tag; uint32_t residue; diff --git a/include/hw/usb/xlnx-usb-subsystem.h b/include/hw/usb/xlnx-usb-subsystem.h index 739bef7f451..999e423951a 100644 --- a/include/hw/usb/xlnx-usb-subsystem.h +++ b/include/hw/usb/xlnx-usb-subsystem.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#ifndef _XLNX_VERSAL_USB_SUBSYSTEM_H_ -#define _XLNX_VERSAL_USB_SUBSYSTEM_H_ +#ifndef XLNX_VERSAL_USB_SUBSYSTEM_H +#define XLNX_VERSAL_USB_SUBSYSTEM_H #include "hw/usb/xlnx-versal-usb2-ctrl-regs.h" #include "hw/usb/hcd-dwc3.h" diff --git a/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h b/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h index 975a717627a..b76dce04195 100644 --- a/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h +++ b/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h @@ -23,8 +23,8 @@ * THE SOFTWARE. */ -#ifndef _XLNX_USB2_REGS_H_ -#define _XLNX_USB2_REGS_H_ +#ifndef XLNX_USB2_REGS_H +#define XLNX_USB2_REGS_H #define TYPE_XILINX_VERSAL_USB2_CTRL_REGS "xlnx.versal-usb2-ctrl-regs" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 6141162d7ae..8af11b0a769 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -88,9 +88,11 @@ typedef struct VFIOContainer { uint64_t dirty_pgsizes; uint64_t max_dirty_bitmap_size; unsigned long pgsizes; + unsigned int dma_max_mappings; QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; QLIST_ENTRY(VFIOContainer) next; } VFIOContainer; @@ -102,6 +104,16 @@ typedef struct VFIOGuestIOMMU { QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; } VFIOGuestIOMMU; +typedef struct VFIORamDiscardListener { + VFIOContainer *container; + MemoryRegion *mr; + hwaddr offset_within_address_space; + hwaddr size; + uint64_t granularity; + RamDiscardListener listener; + QLIST_ENTRY(VFIORamDiscardListener) next; +} VFIORamDiscardListener; + typedef struct VFIOHostDMAWindow { hwaddr min_iova; hwaddr max_iova; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 8a6f8e2a7a3..81bf3109f83 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -37,7 +37,8 @@ struct vhost_scsi_target; struct vhost_iotlb_msg; struct vhost_virtqueue; -typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); +typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque, + Error **errp); typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev); @@ -97,7 +98,7 @@ typedef int (*vhost_set_config_op)(struct vhost_dev *dev, const uint8_t *data, uint32_t offset, uint32_t size, uint32_t flags); typedef int (*vhost_get_config_op)(struct vhost_dev *dev, uint8_t *config, - uint32_t config_len); + uint32_t config_len, Error **errp); typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev, void *session_info, @@ -172,12 +173,6 @@ typedef struct VhostOps { vhost_force_iommu_op vhost_force_iommu; } VhostOps; -extern const VhostOps user_ops; -extern const VhostOps vdpa_ops; - -int vhost_set_backend_type(struct vhost_dev *dev, - VhostBackendType backend_type); - int vhost_backend_update_device_iotlb(struct vhost_dev *dev, uint64_t iova, uint64_t uaddr, uint64_t len, diff --git a/include/hw/virtio/vhost-user-i2c.h b/include/hw/virtio/vhost-user-i2c.h new file mode 100644 index 00000000000..deae47a76d5 --- /dev/null +++ b/include/hw/virtio/vhost-user-i2c.h @@ -0,0 +1,28 @@ +/* + * Vhost-user i2c virtio device + * + * Copyright (c) 2021 Viresh Kumar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _QEMU_VHOST_USER_I2C_H +#define _QEMU_VHOST_USER_I2C_H + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" + +#define TYPE_VHOST_USER_I2C "vhost-user-i2c-device" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserI2C, VHOST_USER_I2C) + +struct VHostUserI2C { + VirtIODevice parent; + CharBackend chardev; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev vhost_dev; + VhostUserState vhost_user; + VirtQueue *vq; + bool connected; +}; + +#endif /* _QEMU_VHOST_USER_I2C_H */ diff --git a/include/hw/virtio/vhost-user-rng.h b/include/hw/virtio/vhost-user-rng.h new file mode 100644 index 00000000000..071539996d1 --- /dev/null +++ b/include/hw/virtio/vhost-user-rng.h @@ -0,0 +1,33 @@ +/* + * Vhost-user RNG virtio device + * + * Copyright (c) 2021 Mathieu Poirier + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _QEMU_VHOST_USER_RNG_H +#define _QEMU_VHOST_USER_RNG_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" + +#define TYPE_VHOST_USER_RNG "vhost-user-rng" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserRNG, VHOST_USER_RNG) + +struct VHostUserRNG { + /*< private >*/ + VirtIODevice parent; + CharBackend chardev; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev vhost_dev; + VhostUserState vhost_user; + VirtQueue *req_vq; + bool connected; + + /*< public >*/ +}; + +#endif /* _QEMU_VHOST_USER_RNG_H */ diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 9b81a409da8..3ce79a646df 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -13,15 +13,22 @@ #define HW_VIRTIO_VHOST_VDPA_H #include "hw/virtio/virtio.h" +#include "standard-headers/linux/vhost_types.h" + +typedef struct VhostVDPAHostNotifier { + MemoryRegion mr; + void *addr; +} VhostVDPAHostNotifier; typedef struct vhost_vdpa { int device_fd; + int index; uint32_t msg_type; + bool iotlb_batch_begin_sent; MemoryListener listener; + struct vhost_vdpa_iova_range iova_range; struct vhost_dev *dev; + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; } VhostVDPA; -extern AddressSpace address_space_memory; -extern int vhost_vdpa_get_device_id(struct vhost_dev *dev, - uint32_t *device_id); #endif diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h index e412b5ee982..d8b565b4dac 100644 --- a/include/hw/virtio/vhost-vsock-common.h +++ b/include/hw/virtio/vhost-vsock-common.h @@ -35,6 +35,9 @@ struct VHostVSockCommon { VirtQueue *trans_vq; QEMUTimer *post_load_timer; + + /* features */ + OnOffAuto seqpacket; }; int vhost_vsock_common_start(VirtIODevice *vdev); @@ -43,5 +46,7 @@ int vhost_vsock_common_pre_save(void *opaque); int vhost_vsock_common_post_load(void *opaque, int version_id); void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name); void vhost_vsock_common_unrealize(VirtIODevice *vdev); +uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp); #endif /* _QEMU_VHOST_VSOCK_COMMON_H */ diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 4a8bc75415f..58a73e7b7a1 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -71,9 +71,13 @@ struct vhost_dev { int n_tmp_sections; MemoryRegionSection *tmp_sections; struct vhost_virtqueue *vqs; - int nvqs; + unsigned int nvqs; /* the first virtqueue which would be used by this vhost dev */ int vq_index; + /* one past the last vq index for the virtio device (not vhost) */ + int vq_index_end; + /* if non-zero, minimum required value for max_queues */ + int num_queues; uint64_t features; uint64_t acked_features; uint64_t backend_features; @@ -93,6 +97,10 @@ struct vhost_dev { const VhostDevConfigOps *config_ops; }; +extern const VhostOps kernel_ops; +extern const VhostOps user_ops; +extern const VhostOps vdpa_ops; + struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[2]; @@ -102,7 +110,7 @@ struct vhost_net { int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, - uint32_t busyloop_timeout); + uint32_t busyloop_timeout, Error **errp); void vhost_dev_cleanup(struct vhost_dev *hdev); int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); @@ -128,8 +136,8 @@ int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); -int vhost_dev_get_config(struct vhost_dev *dev, uint8_t *config, - uint32_t config_len); +int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, + uint32_t config_len, Error **errp); int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, uint32_t offset, uint32_t size, uint32_t flags); /* notifier callback in case vhost device config space changed diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index ef8abe49c5a..7ab8c9dab05 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -93,6 +93,7 @@ struct VirtioBusClass { */ bool has_variable_vring_alignment; AddressSpace *(*get_dma_as)(DeviceState *d); + bool (*iommu_enabled)(DeviceState *d); }; struct VirtioBusState { @@ -154,5 +155,6 @@ void virtio_bus_release_ioeventfd(VirtioBusState *bus); int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign); /* Tell the bus that the ioeventfd handler is no longer required. */ void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n); - +/* Whether the IOMMU is enabled for this device */ +bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev); #endif /* VIRTIO_BUS_H */ diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h index 203f9e17189..e2bee8f5955 100644 --- a/include/hw/virtio/virtio-gpu-bswap.h +++ b/include/hw/virtio/virtio-gpu-bswap.h @@ -59,4 +59,20 @@ virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d) le32_to_cpus(&t2d->padding); } +static inline void +virtio_gpu_create_blob_bswap(struct virtio_gpu_resource_create_blob *cblob) +{ + virtio_gpu_ctrl_hdr_bswap(&cblob->hdr); + le32_to_cpus(&cblob->resource_id); + le32_to_cpus(&cblob->blob_flags); + le64_to_cpus(&cblob->size); +} + +static inline void +virtio_gpu_scanout_blob_bswap(struct virtio_gpu_set_scanout_blob *ssb) +{ + virtio_gpu_bswap_32(ssb, sizeof(*ssb) - sizeof(ssb->offsets[3])); + le32_to_cpus(&ssb->offsets[3]); +} + #endif diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index fae149235c5..acfba7c76c1 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -29,7 +29,10 @@ OBJECT_DECLARE_TYPE(VirtIOGPUBase, VirtIOGPUBaseClass, VIRTIO_GPU_BASE) #define TYPE_VIRTIO_GPU "virtio-gpu-device" -OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPU, VIRTIO_GPU) +OBJECT_DECLARE_TYPE(VirtIOGPU, VirtIOGPUClass, VIRTIO_GPU) + +#define TYPE_VIRTIO_GPU_GL "virtio-gpu-gl-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL) #define TYPE_VHOST_USER_GPU "vhost-user-gpu" OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU) @@ -47,9 +50,23 @@ struct virtio_gpu_simple_resource { uint32_t scanout_bitmask; pixman_image_t *image; uint64_t hostmem; + + uint64_t blob_size; + void *blob; + int dmabuf_fd; + uint8_t *remapped; + QTAILQ_ENTRY(virtio_gpu_simple_resource) next; }; +struct virtio_gpu_framebuffer { + pixman_format_code_t format; + uint32_t bytes_pp; + uint32_t width, height; + uint32_t stride; + uint32_t offset; +}; + struct virtio_gpu_scanout { QemuConsole *con; DisplaySurface *ds; @@ -72,6 +89,7 @@ enum virtio_gpu_base_conf_flags { VIRTIO_GPU_FLAG_STATS_ENABLED, VIRTIO_GPU_FLAG_EDID_ENABLED, VIRTIO_GPU_FLAG_DMABUF_ENABLED, + VIRTIO_GPU_FLAG_BLOB_ENABLED, }; #define virtio_gpu_virgl_enabled(_cfg) \ @@ -82,6 +100,8 @@ enum virtio_gpu_base_conf_flags { (_cfg.flags & (1 << VIRTIO_GPU_FLAG_EDID_ENABLED)) #define virtio_gpu_dmabuf_enabled(_cfg) \ (_cfg.flags & (1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED)) +#define virtio_gpu_blob_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_BLOB_ENABLED)) struct virtio_gpu_base_conf { uint32_t max_outputs; @@ -108,7 +128,6 @@ struct VirtIOGPUBase { struct virtio_gpu_config virtio_config; const GraphicHwOps *hw_ops; - bool use_virgl_renderer; int renderer_blocked; int enable; @@ -131,6 +150,12 @@ struct VirtIOGPUBaseClass { DEFINE_PROP_UINT32("xres", _state, _conf.xres, 1024), \ DEFINE_PROP_UINT32("yres", _state, _conf.yres, 768) +typedef struct VGPUDMABuf { + QemuDmaBuf buf; + uint32_t scanout_id; + QTAILQ_ENTRY(VGPUDMABuf) next; +} VGPUDMABuf; + struct VirtIOGPU { VirtIOGPUBase parent_obj; @@ -149,8 +174,6 @@ struct VirtIOGPU { uint64_t hostmem; bool processing_cmdq; - bool renderer_inited; - bool renderer_reset; QEMUTimer *fence_poll; QEMUTimer *print_stats; @@ -161,6 +184,28 @@ struct VirtIOGPU { uint32_t req_3d; uint32_t bytes_3d; } stats; + + struct { + QTAILQ_HEAD(, VGPUDMABuf) bufs; + VGPUDMABuf *primary[VIRTIO_GPU_MAX_SCANOUTS]; + } dmabuf; +}; + +struct VirtIOGPUClass { + VirtIOGPUBaseClass parent; + + void (*handle_ctrl)(VirtIODevice *vdev, VirtQueue *vq); + void (*process_cmd)(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd); + void (*update_cursor_data)(VirtIOGPU *g, + struct virtio_gpu_scanout *s, + uint32_t resource_id); +}; + +struct VirtIOGPUGL { + struct VirtIOGPU parent_obj; + + bool renderer_inited; + bool renderer_reset; }; struct VhostUserGPU { @@ -207,17 +252,35 @@ void virtio_gpu_get_display_info(VirtIOGPU *g, void virtio_gpu_get_edid(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd); int virtio_gpu_create_mapping_iov(VirtIOGPU *g, - struct virtio_gpu_resource_attach_backing *ab, + uint32_t nr_entries, uint32_t offset, struct virtio_gpu_ctrl_command *cmd, - uint64_t **addr, struct iovec **iov); + uint64_t **addr, struct iovec **iov, + uint32_t *niov); void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g, struct iovec *iov, uint32_t count); void virtio_gpu_process_cmdq(VirtIOGPU *g); +void virtio_gpu_device_realize(DeviceState *qdev, Error **errp); +void virtio_gpu_reset(VirtIODevice *vdev); +void virtio_gpu_simple_process_cmd(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd); +void virtio_gpu_update_cursor_data(VirtIOGPU *g, + struct virtio_gpu_scanout *s, + uint32_t resource_id); + +/* virtio-gpu-udmabuf.c */ +bool virtio_gpu_have_udmabuf(void); +void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res); +void virtio_gpu_fini_udmabuf(struct virtio_gpu_simple_resource *res); +int virtio_gpu_update_dmabuf(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_simple_resource *res, + struct virtio_gpu_framebuffer *fb, + struct virtio_gpu_rect *r); /* virtio-gpu-3d.c */ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd); void virtio_gpu_virgl_fence_poll(VirtIOGPU *g); +void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g); void virtio_gpu_virgl_reset(VirtIOGPU *g); int virtio_gpu_virgl_init(VirtIOGPU *g); int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g); diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index 273e35c04bc..e2339e5b72e 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -26,7 +26,7 @@ #include "qom/object.h" #define TYPE_VIRTIO_IOMMU "virtio-iommu-device" -#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-device-base" +#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci" OBJECT_DECLARE_SIMPLE_TYPE(VirtIOIOMMU, VIRTIO_IOMMU) #define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region" diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index 4eeb82d5ddb..a5dd6a493b6 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -65,8 +65,8 @@ struct VirtIOMEM { /* notifiers to notify when "size" changes */ NotifierList size_change_notifiers; - /* don't migrate unplugged memory */ - NotifierWithReturn precopy_notifier; + /* listeners to notify on plug/unplug activity. */ + QLIST_HEAD(, RamDiscardListener) rdl_list; }; struct VirtIOMEMClass { diff --git a/include/hw/virtio/virtio-mmio.h b/include/hw/virtio/virtio-mmio.h index d4c4c386ab0..090f7730e77 100644 --- a/include/hw/virtio/virtio-mmio.h +++ b/include/hw/virtio/virtio-mmio.h @@ -49,12 +49,17 @@ typedef struct VirtIOMMIOQueue { uint32_t used[2]; } VirtIOMMIOQueue; +#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT 1 +#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD \ + (1 << VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT) + struct VirtIOMMIOProxy { /* Generic */ SysBusDevice parent_obj; MemoryRegion iomem; qemu_irq irq; bool legacy; + uint32_t flags; /* Guest accessible state needing migration and reset */ uint32_t host_features_sel; uint32_t guest_features_sel; diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 7e96d193aa6..eb87032627d 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -21,6 +21,8 @@ #include "qemu/option_int.h" #include "qom/object.h" +#include "ebpf/ebpf_rss.h" + #define TYPE_VIRTIO_NET "virtio-net-device" OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) @@ -130,6 +132,7 @@ typedef struct VirtioNetRscChain { typedef struct VirtioNetRssData { bool enabled; + bool enabled_software_rss; bool redirect; bool populate_hash; uint32_t hash_types; @@ -191,8 +194,9 @@ struct VirtIONet { NICConf nic_conf; DeviceState *qdev; int multiqueue; - uint16_t max_queues; - uint16_t curr_queues; + uint16_t max_queue_pairs; + uint16_t curr_queue_pairs; + uint16_t max_ncs; size_t config_size; char *netclient_name; char *netclient_type; @@ -206,9 +210,12 @@ struct VirtIONet { bool failover_primary_hidden; bool failover; DeviceListener primary_listener; + QDict *primary_opts; + bool primary_opts_from_json; Notifier migration_state; VirtioNetRssData rss_data; struct NetRxPkt *rx_pkt; + struct EBPFRSSContext ebpf_rss; }; void virtio_net_set_netclient_name(VirtIONet *n, const char *name, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index b7ece7a6a89..8bab9cfb750 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -43,7 +43,7 @@ typedef struct VirtIOFeature { size_t end; } VirtIOFeature; -size_t virtio_feature_get_config_size(VirtIOFeature *features, +size_t virtio_feature_get_config_size(const VirtIOFeature *features, uint64_t host_features); typedef struct VirtQueue VirtQueue; diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h index 80b03661e30..f945cd6c583 100644 --- a/include/hw/watchdog/wdt_aspeed.h +++ b/include/hw/watchdog/wdt_aspeed.h @@ -44,6 +44,7 @@ struct AspeedWDTClass { uint32_t reset_ctrl_reg; void (*reset_pulse)(AspeedWDTState *s, uint32_t property); void (*wdt_reload)(AspeedWDTState *s); + uint64_t (*sanitize_ctrl)(uint64_t data); }; #endif /* WDT_ASPEED_H */ diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index 82e56339dd7..a8118b41acf 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -134,6 +134,12 @@ static inline xenforeignmemory_resource_handle *xenforeignmemory_map_resource( return NULL; } +static inline int xenforeignmemory_unmap_resource( + xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres) +{ + return 0; +} + #endif /* CONFIG_XEN_CTRL_INTERFACE_VERSION < 41100 */ #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41000 diff --git a/include/libdecnumber/decNumber.h b/include/libdecnumber/decNumber.h index aa115fed079..41bc2a0d36f 100644 --- a/include/libdecnumber/decNumber.h +++ b/include/libdecnumber/decNumber.h @@ -116,12 +116,16 @@ decNumber * decNumberFromUInt32(decNumber *, uint32_t); decNumber *decNumberFromInt64(decNumber *, int64_t); decNumber *decNumberFromUInt64(decNumber *, uint64_t); + decNumber *decNumberFromInt128(decNumber *, uint64_t, int64_t); + decNumber *decNumberFromUInt128(decNumber *, uint64_t, uint64_t); decNumber * decNumberFromString(decNumber *, const char *, decContext *); char * decNumberToString(const decNumber *, char *); char * decNumberToEngString(const decNumber *, char *); uint32_t decNumberToUInt32(const decNumber *, decContext *); int32_t decNumberToInt32(const decNumber *, decContext *); int64_t decNumberIntegralToInt64(const decNumber *dn, decContext *set); + void decNumberIntegralToInt128(const decNumber *dn, decContext *set, + uint64_t *plow, uint64_t *phigh); uint8_t * decNumberGetBCD(const decNumber *, uint8_t *); decNumber * decNumberSetBCD(decNumber *, const uint8_t *, uint32_t); diff --git a/include/libdecnumber/decNumberLocal.h b/include/libdecnumber/decNumberLocal.h index 4d53c077f2a..6198ca85930 100644 --- a/include/libdecnumber/decNumberLocal.h +++ b/include/libdecnumber/decNumberLocal.h @@ -98,7 +98,7 @@ /* Shared lookup tables */ extern const uByte DECSTICKYTAB[10]; /* re-round digits if sticky */ - extern const uLong DECPOWERS[19]; /* powers of ten table */ + extern const uLong DECPOWERS[20]; /* powers of ten table */ /* The following are included from decDPD.h */ extern const uShort DPD2BIN[1024]; /* DPD -> 0-999 */ extern const uShort BIN2DPD[1000]; /* 0-999 -> DPD */ diff --git a/include/migration/blocker.h b/include/migration/blocker.h index acd27018e94..9cebe2ba06a 100644 --- a/include/migration/blocker.h +++ b/include/migration/blocker.h @@ -25,6 +25,22 @@ */ int migrate_add_blocker(Error *reason, Error **errp); +/** + * @migrate_add_blocker_internal - prevent migration from proceeding without + * only-migrate implications + * + * @reason - an error to be returned whenever migration is attempted + * + * @errp - [out] The reason (if any) we cannot block migration right now. + * + * @returns - 0 on success, -EBUSY on failure, with errp set. + * + * Some of the migration blockers can be temporary (e.g., for a few seconds), + * so it shouldn't need to conflict with "-only-migratable". For those cases, + * we can call this function rather than @migrate_add_blocker(). + */ +int migrate_add_blocker_internal(Error *reason, Error **errp); + /** * @migrate_del_blocker - remove a blocking error from migration * diff --git a/include/migration/misc.h b/include/migration/misc.h index 738675ef528..465906710de 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -37,7 +37,6 @@ void precopy_infrastructure_init(void); void precopy_add_notifier(NotifierWithReturn *n); void precopy_remove_notifier(NotifierWithReturn *n); int precopy_notify(PrecopyNotifyReason reason, Error **errp); -void precopy_enable_free_page_optimization(void); void ram_mig_init(void); void qemu_guest_free_page_hint(void *addr, size_t len); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index ab64f7c217b..3f0290f21ad 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -153,6 +153,7 @@ typedef enum { MIG_PRI_DEFAULT = 0, MIG_PRI_IOMMU, /* Must happen before PCI devices */ MIG_PRI_PCI_BUS, /* Must happen before IOMMU */ + MIG_PRI_VIRTIO_MEM, /* Must happen before IOMMU */ MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */ MIG_PRI_GICV3, /* Must happen before the ITS */ MIG_PRI_MAX, @@ -194,8 +195,6 @@ struct VMStateDescription { const VMStateDescription **subsections; }; -extern const VMStateDescription vmstate_dummy; - extern const VMStateInfo vmstate_info_bool; extern const VMStateInfo vmstate_info_int8; diff --git a/include/monitor/hmp-target.h b/include/monitor/hmp-target.h index 60fc92722ae..ffdc15a34b6 100644 --- a/include/monitor/hmp-target.h +++ b/include/monitor/hmp-target.h @@ -48,6 +48,7 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict); void hmp_info_tlb(Monitor *mon, const QDict *qdict); void hmp_mce(Monitor *mon, const QDict *qdict); void hmp_info_local_apic(Monitor *mon, const QDict *qdict); -void hmp_info_io_apic(Monitor *mon, const QDict *qdict); +void hmp_info_sev(Monitor *mon, const QDict *qdict); +void hmp_info_sgx(Monitor *mon, const QDict *qdict); #endif /* MONITOR_HMP_TARGET_H */ diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index 605d57287ae..96d014826ad 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -15,8 +15,9 @@ #define HMP_H #include "qemu/readline.h" +#include "qapi/qapi-types-common.h" -void hmp_handle_error(Monitor *mon, Error *err); +bool hmp_handle_error(Monitor *mon, Error *err); void hmp_info_name(Monitor *mon, const QDict *qdict); void hmp_info_version(Monitor *mon, const QDict *qdict); @@ -124,10 +125,13 @@ void hmp_info_ramblock(Monitor *mon, const QDict *qdict); void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict); void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict); void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict); -void hmp_info_sev(Monitor *mon, const QDict *qdict); void hmp_info_replay(Monitor *mon, const QDict *qdict); void hmp_replay_break(Monitor *mon, const QDict *qdict); void hmp_replay_delete_break(Monitor *mon, const QDict *qdict); void hmp_replay_seek(Monitor *mon, const QDict *qdict); +void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict); +void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict); +void hmp_human_readable_text_helper(Monitor *mon, + HumanReadableText *(*qmp_handler)(Error **)); #endif diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h index af3887bb71d..12d395d62d6 100644 --- a/include/monitor/monitor.h +++ b/include/monitor/monitor.h @@ -4,7 +4,7 @@ #include "block/block.h" #include "qapi/qapi-types-misc.h" #include "qemu/readline.h" -#include "include/exec/hwaddr.h" +#include "exec/hwaddr.h" typedef struct MonitorHMP MonitorHMP; typedef struct MonitorOptions MonitorOptions; @@ -51,4 +51,9 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags); void monitor_fdset_dup_fd_remove(int dup_fd); int64_t monitor_fdset_dup_fd_find(int dup_fd); +void monitor_register_hmp(const char *name, bool info, + void (*cmd)(Monitor *mon, const QDict *qdict)); +void monitor_register_hmp_info_hrt(const char *name, + HumanReadableText *(*handler)(Error **errp)); + #endif /* MONITOR_H */ diff --git a/include/monitor/qdev.h b/include/monitor/qdev.h index eaa947d73a3..1d57bf65779 100644 --- a/include/monitor/qdev.h +++ b/include/monitor/qdev.h @@ -9,6 +9,31 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp); int qdev_device_help(QemuOpts *opts); DeviceState *qdev_device_add(QemuOpts *opts, Error **errp); -void qdev_set_id(DeviceState *dev, const char *id); +DeviceState *qdev_device_add_from_qdict(const QDict *opts, + bool from_json, Error **errp); + +/** + * qdev_set_id: parent the device and set its id if provided. + * @dev: device to handle + * @id: id to be given to the device, or NULL. + * + * Returns: the id of the device in case of success; otherwise NULL. + * + * @dev must be unrealized, unparented and must not have an id. + * + * If @id is non-NULL, this function tries to setup @dev qom path as + * "/peripheral/id". If @id is already taken, it fails. If it succeeds, + * the id field of @dev is set to @id (@dev now owns the given @id + * parameter). + * + * If @id is NULL, this function generates a unique name and setups @dev + * qom path as "/peripheral-anon/name". This name is not set as the id + * of @dev. + * + * Upon success, it returns the id/name (generated or provided). The + * returned string is owned by the corresponding child property and must + * not be freed by the caller. + */ +const char *qdev_set_id(DeviceState *dev, char *id, Error **errp); #endif diff --git a/include/net/net.h b/include/net/net.h index 1ef536d7712..523136c7acb 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -61,6 +61,8 @@ typedef int (SetVnetBE)(NetClientState *, bool); typedef struct SocketReadState SocketReadState; typedef void (SocketReadStateFinalize)(SocketReadState *rs); typedef void (NetAnnounce)(NetClientState *); +typedef bool (SetSteeringEBPF)(NetClientState *, int); +typedef bool (NetCheckPeerType)(NetClientState *, ObjectClass *, Error **); typedef struct NetClientInfo { NetClientDriver type; @@ -82,6 +84,8 @@ typedef struct NetClientInfo { SetVnetLE *set_vnet_le; SetVnetBE *set_vnet_be; NetAnnounce *announce; + SetSteeringEBPF *set_steering_ebpf; + NetCheckPeerType *check_peer_type; } NetClientInfo; struct NetClientState { @@ -101,6 +105,7 @@ struct NetClientState { int vnet_hdr_len; bool is_netdev; bool do_not_pad; /* do not pad to the minimum ethernet frame length */ + bool is_datapath; QTAILQ_HEAD(, NetFilterState) filters; }; @@ -132,6 +137,10 @@ NetClientState *qemu_new_net_client(NetClientInfo *info, NetClientState *peer, const char *model, const char *name); +NetClientState *qemu_new_net_control_client(NetClientInfo *info, + NetClientState *peer, + const char *model, + const char *name); NICState *qemu_new_nic(NetClientInfo *info, NICConf *conf, const char *model, diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h index 45e34b7cfcf..b81f9a6f2a0 100644 --- a/include/net/vhost-vdpa.h +++ b/include/net/vhost-vdpa.h @@ -15,7 +15,6 @@ #define TYPE_VHOST_VDPA "vhost-vdpa" struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc); -uint64_t vhost_vdpa_get_acked_features(NetClientState *nc); extern const int vdpa_feature_bits[]; diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h index 172b0051d81..387e913e4e6 100644 --- a/include/net/vhost_net.h +++ b/include/net/vhost_net.h @@ -14,14 +14,17 @@ typedef struct VhostNetOptions { VhostBackendType backend_type; NetClientState *net_backend; uint32_t busyloop_timeout; + unsigned int nvqs; void *opaque; } VhostNetOptions; uint64_t vhost_net_get_max_queues(VHostNetState *net); struct vhost_net *vhost_net_init(VhostNetOptions *options); -int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues); -void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, int total_queues); +int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + int data_queue_pairs, int cvq); +void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, + int data_queue_pairs, int cvq); void vhost_net_cleanup(VHostNetState *net); diff --git a/include/qapi/compat-policy.h b/include/qapi/compat-policy.h index 1083f951228..8b7b25c0b5a 100644 --- a/include/qapi/compat-policy.h +++ b/include/qapi/compat-policy.h @@ -13,10 +13,17 @@ #ifndef QAPI_COMPAT_POLICY_H #define QAPI_COMPAT_POLICY_H +#include "qapi/error.h" #include "qapi/qapi-types-compat.h" extern CompatPolicy compat_policy; +bool compat_policy_input_ok(unsigned special_features, + const CompatPolicy *policy, + ErrorClass error_class, + const char *kind, const char *name, + Error **errp); + /* * Create a QObject input visitor for @obj for use with QMP * diff --git a/include/qapi/forward-visitor.h b/include/qapi/forward-visitor.h new file mode 100644 index 00000000000..50fb3e9d50b --- /dev/null +++ b/include/qapi/forward-visitor.h @@ -0,0 +1,27 @@ +/* + * Forwarding visitor + * + * Copyright Red Hat, Inc. 2021 + * + * Author: Paolo Bonzini + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef FORWARD_VISITOR_H +#define FORWARD_VISITOR_H + +#include "qapi/visitor.h" + +typedef struct ForwardFieldVisitor ForwardFieldVisitor; + +/* + * The forwarding visitor only expects a single name, @from, to be passed for + * toplevel fields. It is converted to @to and forwarded to the @target visitor. + * Calls within a struct are forwarded without changing the name. + */ +Visitor *visitor_forward_field(Visitor *target, const char *from, const char *to); + +#endif diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h index 075203dc676..1e4240fd0db 100644 --- a/include/qapi/qmp/dispatch.h +++ b/include/qapi/qmp/dispatch.h @@ -21,12 +21,10 @@ typedef void (QmpCommandFunc)(QDict *, QObject **, Error **); typedef enum QmpCommandOptions { - QCO_NO_OPTIONS = 0x0, QCO_NO_SUCCESS_RESP = (1U << 0), QCO_ALLOW_OOB = (1U << 1), QCO_ALLOW_PRECONFIG = (1U << 2), QCO_COROUTINE = (1U << 3), - QCO_DEPRECATED = (1U << 4), } QmpCommandOptions; typedef struct QmpCommand @@ -35,6 +33,7 @@ typedef struct QmpCommand /* Runs in coroutine context if QCO_COROUTINE is set */ QmpCommandFunc *fn; QmpCommandOptions options; + unsigned special_features; QTAILQ_ENTRY(QmpCommand) node; bool enabled; const char *disable_reason; @@ -43,7 +42,8 @@ typedef struct QmpCommand typedef QTAILQ_HEAD(QmpCommandList, QmpCommand) QmpCommandList; void qmp_register_command(QmpCommandList *cmds, const char *name, - QmpCommandFunc *fn, QmpCommandOptions options); + QmpCommandFunc *fn, QmpCommandOptions options, + unsigned special_features); const QmpCommand *qmp_find_command(const QmpCommandList *cmds, const char *name); void qmp_disable_command(QmpCommandList *cmds, const char *name, diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index 9934539c1b7..d5b5430e21a 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -64,4 +64,7 @@ const char *qdict_get_try_str(const QDict *qdict, const char *key); QDict *qdict_clone_shallow(const QDict *src); +QObject *qdict_crumple(const QDict *src, Error **errp); +void qdict_flatten(QDict *qdict); + #endif /* QDICT_H */ diff --git a/include/qapi/qobject-input-visitor.h b/include/qapi/qobject-input-visitor.h index 8d693888105..95985e25e52 100644 --- a/include/qapi/qobject-input-visitor.h +++ b/include/qapi/qobject-input-visitor.h @@ -15,7 +15,6 @@ #ifndef QOBJECT_INPUT_VISITOR_H #define QOBJECT_INPUT_VISITOR_H -#include "qapi/qapi-types-compat.h" #include "qapi/visitor.h" typedef struct QObjectInputVisitor QObjectInputVisitor; @@ -59,9 +58,6 @@ typedef struct QObjectInputVisitor QObjectInputVisitor; */ Visitor *qobject_input_visitor_new(QObject *obj); -void qobject_input_visitor_set_policy(Visitor *v, - CompatPolicyInput deprecated); - /* * Create a QObject input visitor for @obj for use with keyval_parse() * diff --git a/include/qapi/qobject-output-visitor.h b/include/qapi/qobject-output-visitor.h index f2a2f92a004..2b1726baf55 100644 --- a/include/qapi/qobject-output-visitor.h +++ b/include/qapi/qobject-output-visitor.h @@ -15,7 +15,6 @@ #define QOBJECT_OUTPUT_VISITOR_H #include "qapi/visitor.h" -#include "qapi/qapi-types-compat.h" typedef struct QObjectOutputVisitor QObjectOutputVisitor; @@ -54,7 +53,4 @@ typedef struct QObjectOutputVisitor QObjectOutputVisitor; */ Visitor *qobject_output_visitor_new(QObject **result); -void qobject_output_visitor_set_policy(Visitor *v, - CompatPolicyOutput deprecated); - #endif diff --git a/include/qapi/type-helpers.h b/include/qapi/type-helpers.h new file mode 100644 index 00000000000..be1f1815264 --- /dev/null +++ b/include/qapi/type-helpers.h @@ -0,0 +1,14 @@ +/* + * QAPI common helper functions + * + * This file provides helper functions related to types defined + * in the QAPI schema. + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qapi/qapi-types-common.h" + +HumanReadableText *human_readable_text_from_str(GString *str); diff --git a/include/qapi/util.h b/include/qapi/util.h index d7bfb30e25c..81a2b13a333 100644 --- a/include/qapi/util.h +++ b/include/qapi/util.h @@ -11,9 +11,15 @@ #ifndef QAPI_UTIL_H #define QAPI_UTIL_H +typedef enum { + QAPI_DEPRECATED, + QAPI_UNSTABLE, +} QapiSpecialFeature; + typedef struct QEnumLookup { const char *const *array; - int size; + const unsigned char *const special_features; + const int size; } QEnumLookup; const char *qapi_enum_lookup(const QEnumLookup *lookup, int val); diff --git a/include/qapi/visitor-impl.h b/include/qapi/visitor-impl.h index 3b950f6e3db..2badec5ba46 100644 --- a/include/qapi/visitor-impl.h +++ b/include/qapi/visitor-impl.h @@ -114,14 +114,19 @@ struct Visitor void (*optional)(Visitor *v, const char *name, bool *present); /* Optional */ - bool (*deprecated_accept)(Visitor *v, const char *name, Error **errp); + bool (*policy_reject)(Visitor *v, const char *name, + unsigned special_features, Error **errp); /* Optional */ - bool (*deprecated)(Visitor *v, const char *name); + bool (*policy_skip)(Visitor *v, const char *name, + unsigned special_features); /* Must be set */ VisitorType type; + /* Optional */ + struct CompatPolicy compat_policy; + /* Must be set for output visitors, optional otherwise. */ void (*complete)(Visitor *v, void *opaque); diff --git a/include/qapi/visitor.h b/include/qapi/visitor.h index b3c9ef7a810..d53a84c9ba4 100644 --- a/include/qapi/visitor.h +++ b/include/qapi/visitor.h @@ -16,6 +16,7 @@ #define QAPI_VISITOR_H #include "qapi/qapi-builtin-types.h" +#include "qapi/qapi-types-compat.h" /* * The QAPI schema defines both a set of C data types, and a QMP wire @@ -460,22 +461,39 @@ void visit_end_alternate(Visitor *v, void **obj); bool visit_optional(Visitor *v, const char *name, bool *present); /* - * Should we reject deprecated member @name? + * Should we reject member @name due to policy? + * + * @special_features is the member's special features encoded as a + * bitset of QapiSpecialFeature. * * @name must not be NULL. This function is only useful between * visit_start_struct() and visit_end_struct(), since only objects * have deprecated members. */ -bool visit_deprecated_accept(Visitor *v, const char *name, Error **errp); +bool visit_policy_reject(Visitor *v, const char *name, + unsigned special_features, Error **errp); /* - * Should we visit deprecated member @name? + * + * Should we skip member @name due to policy? + * + * @special_features is the member's special features encoded as a + * bitset of QapiSpecialFeature. * * @name must not be NULL. This function is only useful between * visit_start_struct() and visit_end_struct(), since only objects * have deprecated members. */ -bool visit_deprecated(Visitor *v, const char *name); +bool visit_policy_skip(Visitor *v, const char *name, + unsigned special_features); + +/* + * Set policy for handling deprecated management interfaces. + * + * Intended use: call visit_set_policy(v, &compat_policy) when + * visiting management interface input or output. + */ +void visit_set_policy(Visitor *v, CompatPolicy *policy); /* * Visit an enum value. diff --git a/include/qemu/accel.h b/include/qemu/accel.h index b9d6d69eb8d..4f4c283f6fc 100644 --- a/include/qemu/accel.h +++ b/include/qemu/accel.h @@ -78,4 +78,17 @@ int accel_init_machine(AccelState *accel, MachineState *ms); void accel_setup_post(MachineState *ms); #endif /* !CONFIG_USER_ONLY */ +/** + * accel_cpu_instance_init: + * @cpu: The CPU that needs to do accel-specific object initializations. + */ +void accel_cpu_instance_init(CPUState *cpu); + +/** + * accel_cpu_realizefn: + * @cpu: The CPU that needs to call accel-specific cpu realization. + * @errp: currently unused. + */ +bool accel_cpu_realizefn(CPUState *cpu, Error **errp); + #endif /* QEMU_ACCEL_H */ diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 8f4b3a80fbd..112a29910be 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -8,7 +8,7 @@ * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. * - * See docs/devel/atomics.txt for discussion about the guarantees each + * See docs/devel/atomics.rst for discussion about the guarantees each * atomic primitive is meant to provide. */ @@ -60,8 +60,9 @@ (unsigned short)1, \ (expr)+0)))))) -#ifdef __ATOMIC_RELAXED -/* For C11 atomic ops */ +#ifndef __ATOMIC_RELAXED +#error "Expecting C11 atomic ops" +#endif /* Manual memory barriers * @@ -239,212 +240,27 @@ #define qatomic_xor(ptr, n) \ ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)) -#else /* __ATOMIC_RELAXED */ - -#ifdef __alpha__ -#define smp_read_barrier_depends() asm volatile("mb":::"memory") -#endif - -#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) - -/* - * Because of the strongly ordered storage model, wmb() and rmb() are nops - * here (a compiler barrier only). QEMU doesn't do accesses to write-combining - * qemu memory or non-temporal load/stores from C code. - */ -#define smp_mb_release() barrier() -#define smp_mb_acquire() barrier() - -/* - * __sync_lock_test_and_set() is documented to be an acquire barrier only, - * but it is a full barrier at the hardware level. Add a compiler barrier - * to make it a full barrier also at the compiler level. - */ -#define qatomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) - -#elif defined(_ARCH_PPC) - -/* - * We use an eieio() for wmb() on powerpc. This assumes we don't - * need to order cacheable and non-cacheable stores with respect to - * each other. - * - * smp_mb has the same problem as on x86 for not-very-new GCC - * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). - */ -#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) -#if defined(__powerpc64__) -#define smp_mb_release() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) -#define smp_mb_acquire() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) -#else -#define smp_mb_release() ({ asm volatile("sync" ::: "memory"); (void)0; }) -#define smp_mb_acquire() ({ asm volatile("sync" ::: "memory"); (void)0; }) -#endif -#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) - -#endif /* _ARCH_PPC */ - -/* - * For (host) platforms we don't have explicit barrier definitions - * for, we use the gcc __sync_synchronize() primitive to generate a - * full barrier. This should be safe on all platforms, though it may - * be overkill for smp_mb_acquire() and smp_mb_release(). - */ -#ifndef smp_mb -#define smp_mb() __sync_synchronize() -#endif - -#ifndef smp_mb_acquire -#define smp_mb_acquire() __sync_synchronize() -#endif - -#ifndef smp_mb_release -#define smp_mb_release() __sync_synchronize() -#endif - -#ifndef smp_read_barrier_depends -#define smp_read_barrier_depends() barrier() -#endif - -#ifndef signal_barrier -#define signal_barrier() barrier() -#endif - -/* These will only be atomic if the processor does the fetch or store - * in a single issue memory operation - */ -#define qatomic_read__nocheck(p) (*(__typeof__(*(p)) volatile*) (p)) -#define qatomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i)) - -#define qatomic_read(ptr) qatomic_read__nocheck(ptr) -#define qatomic_set(ptr, i) qatomic_set__nocheck(ptr,i) - -/** - * qatomic_rcu_read - reads a RCU-protected pointer to a local variable - * into a RCU read-side critical section. The pointer can later be safely - * dereferenced within the critical section. - * - * This ensures that the pointer copy is invariant thorough the whole critical - * section. - * - * Inserts memory barriers on architectures that require them (currently only - * Alpha) and documents which pointers are protected by RCU. - * - * qatomic_rcu_read also includes a compiler barrier to ensure that - * value-speculative optimizations (e.g. VSS: Value Speculation - * Scheduling) does not perform the data read before the pointer read - * by speculating the value of the pointer. - * - * Should match qatomic_rcu_set(), qatomic_xchg(), qatomic_cmpxchg(). - */ -#define qatomic_rcu_read(ptr) ({ \ - typeof(*ptr) _val = qatomic_read(ptr); \ - smp_read_barrier_depends(); \ - _val; \ -}) - -/** - * qatomic_rcu_set - assigns (publicizes) a pointer to a new data structure - * meant to be read by RCU read-side critical sections. - * - * Documents which pointers will be dereferenced by RCU read-side critical - * sections and adds the required memory barriers on architectures requiring - * them. It also makes sure the compiler does not reorder code initializing the - * data structure before its publication. - * - * Should match qatomic_rcu_read(). - */ -#define qatomic_rcu_set(ptr, i) do { \ - smp_wmb(); \ - qatomic_set(ptr, i); \ -} while (0) - -#define qatomic_load_acquire(ptr) ({ \ - typeof(*ptr) _val = qatomic_read(ptr); \ - smp_mb_acquire(); \ - _val; \ -}) - -#define qatomic_store_release(ptr, i) do { \ - smp_mb_release(); \ - qatomic_set(ptr, i); \ -} while (0) - -#ifndef qatomic_xchg -#if defined(__clang__) -#define qatomic_xchg(ptr, i) __sync_swap(ptr, i) -#else -/* __sync_lock_test_and_set() is documented to be an acquire barrier only. */ -#define qatomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) -#endif -#endif -#define qatomic_xchg__nocheck qatomic_xchg - -/* Provide shorter names for GCC atomic builtins. */ -#define qatomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) -#define qatomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) - -#define qatomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n) -#define qatomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n) -#define qatomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n) -#define qatomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n) -#define qatomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n) - -#define qatomic_inc_fetch(ptr) __sync_add_and_fetch(ptr, 1) -#define qatomic_dec_fetch(ptr) __sync_add_and_fetch(ptr, -1) -#define qatomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n) -#define qatomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n) -#define qatomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n) -#define qatomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n) -#define qatomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n) - -#define qatomic_cmpxchg(ptr, old, new) \ - __sync_val_compare_and_swap(ptr, old, new) -#define qatomic_cmpxchg__nocheck(ptr, old, new) qatomic_cmpxchg(ptr, old, new) - -/* And even shorter names that return void. */ -#define qatomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) -#define qatomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) -#define qatomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) -#define qatomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) -#define qatomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) -#define qatomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) -#define qatomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n)) - -#endif /* __ATOMIC_RELAXED */ - -#ifndef smp_wmb #define smp_wmb() smp_mb_release() -#endif -#ifndef smp_rmb #define smp_rmb() smp_mb_acquire() -#endif - -/* This is more efficient than a store plus a fence. */ -#if !defined(__SANITIZE_THREAD__) -#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) -#define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) -#endif -#endif /* qatomic_mb_read/set semantics map Java volatile variables. They are * less expensive on some platforms (notably POWER) than fully * sequentially consistent operations. * * As long as they are used as paired operations they are safe to - * use. See docs/devel/atomics.txt for more discussion. + * use. See docs/devel/atomics.rst for more discussion. */ -#ifndef qatomic_mb_read #define qatomic_mb_read(ptr) \ qatomic_load_acquire(ptr) -#endif -#ifndef qatomic_mb_set -#define qatomic_mb_set(ptr, i) do { \ - qatomic_store_release(ptr, i); \ - smp_mb(); \ -} while(0) +#if !defined(__SANITIZE_THREAD__) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) +/* This is more efficient than a store plus a fence. */ +# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) +#else +# define qatomic_mb_set(ptr, i) \ + ({ qatomic_store_release(ptr, i); smp_mb(); }) #endif #define qatomic_fetch_inc_nonzero(ptr) ({ \ @@ -455,28 +271,29 @@ _oldn; \ }) -/* Abstractions to access atomically (i.e. "once") i64/u64 variables */ -#ifdef CONFIG_ATOMIC64 -static inline int64_t qatomic_read_i64(const int64_t *ptr) -{ - /* use __nocheck because sizeof(void *) might be < sizeof(u64) */ - return qatomic_read__nocheck(ptr); -} - -static inline uint64_t qatomic_read_u64(const uint64_t *ptr) -{ - return qatomic_read__nocheck(ptr); -} - -static inline void qatomic_set_i64(int64_t *ptr, int64_t val) -{ - qatomic_set__nocheck(ptr, val); -} +/* + * Abstractions to access atomically (i.e. "once") i64/u64 variables. + * + * The i386 abi is odd in that by default members are only aligned to + * 4 bytes, which means that 8-byte types can wind up mis-aligned. + * Clang will then warn about this, and emit a call into libatomic. + * + * Use of these types in structures when they will be used with atomic + * operations can avoid this. + */ +typedef int64_t aligned_int64_t __attribute__((aligned(8))); +typedef uint64_t aligned_uint64_t __attribute__((aligned(8))); -static inline void qatomic_set_u64(uint64_t *ptr, uint64_t val) -{ - qatomic_set__nocheck(ptr, val); -} +#ifdef CONFIG_ATOMIC64 +/* Use __nocheck because sizeof(void *) might be < sizeof(u64) */ +#define qatomic_read_i64(P) \ + _Generic(*(P), int64_t: qatomic_read__nocheck(P)) +#define qatomic_read_u64(P) \ + _Generic(*(P), uint64_t: qatomic_read__nocheck(P)) +#define qatomic_set_i64(P, V) \ + _Generic(*(P), int64_t: qatomic_set__nocheck(P, V)) +#define qatomic_set_u64(P, V) \ + _Generic(*(P), uint64_t: qatomic_set__nocheck(P, V)) static inline void qatomic64_init(void) { diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h index ad2bcf45b4f..adb9a1a260b 100644 --- a/include/qemu/atomic128.h +++ b/include/qemu/atomic128.h @@ -6,7 +6,7 @@ * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. * - * See docs/devel/atomics.txt for discussion about the guarantees each + * See docs/devel/atomics.rst for discussion about the guarantees each * atomic primitive is meant to provide. */ diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index efaa8f94c43..92ee3788bb5 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -140,7 +140,8 @@ static inline int test_bit(long nr, const unsigned long *addr) * @addr: The address to start the search at * @size: The maximum size to search * - * Returns the bit number of the first set bit, or size. + * Returns the bit number of the last set bit, + * or @size if there is no set bit in the bitmap. */ unsigned long find_last_bit(const unsigned long *addr, unsigned long size); @@ -150,6 +151,9 @@ unsigned long find_last_bit(const unsigned long *addr, * @addr: The address to base the search on * @offset: The bitnumber to start searching at * @size: The bitmap size in bits + * + * Returns the bit number of the next set bit, + * or @size if there are no further set bits in the bitmap. */ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, @@ -160,6 +164,9 @@ unsigned long find_next_bit(const unsigned long *addr, * @addr: The address to base the search on * @offset: The bitnumber to start searching at * @size: The bitmap size in bits + * + * Returns the bit number of the next cleared bit, + * or @size if there are no further clear bits in the bitmap. */ unsigned long find_next_zero_bit(const unsigned long *addr, @@ -171,7 +178,8 @@ unsigned long find_next_zero_bit(const unsigned long *addr, * @addr: The address to start the search at * @size: The maximum size to search * - * Returns the bit number of the first set bit. + * Returns the bit number of the first set bit, + * or @size if there is no set bit in the bitmap. */ static inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) @@ -194,7 +202,8 @@ static inline unsigned long find_first_bit(const unsigned long *addr, * @addr: The address to start the search at * @size: The maximum size to search * - * Returns the bit number of the first cleared bit. + * Returns the bit number of the first cleared bit, + * or @size if there is no clear bit in the bitmap. */ static inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) @@ -282,6 +291,35 @@ static inline uint64_t ror64(uint64_t word, unsigned int shift) return (word >> shift) | (word << ((64 - shift) & 63)); } +/** + * hswap32 - swap 16-bit halfwords within a 32-bit value + * @h: value to swap + */ +static inline uint32_t hswap32(uint32_t h) +{ + return rol32(h, 16); +} + +/** + * hswap64 - swap 16-bit halfwords within a 64-bit value + * @h: value to swap + */ +static inline uint64_t hswap64(uint64_t h) +{ + uint64_t m = 0x0000ffff0000ffffull; + h = rol64(h, 32); + return ((h & m) << 16) | ((h >> 16) & m); +} + +/** + * wswap64 - swap 32-bit words within a 64-bit value + * @h: value to swap + */ +static inline uint64_t wswap64(uint64_t h) +{ + return rol64(h, 32); +} + /** * extract32: * @value: the value to extract the bit field from diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h index 4aaf992b5d7..2d3bb8bbedd 100644 --- a/include/qemu/bswap.h +++ b/include/qemu/bswap.h @@ -1,8 +1,6 @@ #ifndef BSWAP_H #define BSWAP_H -#include "fpu/softfloat-types.h" - #ifdef CONFIG_MACHINE_BSWAP_H # include # include @@ -12,7 +10,18 @@ # include #elif defined(CONFIG_BYTESWAP_H) # include +#define BSWAP_FROM_BYTESWAP +# else +#define BSWAP_FROM_FALLBACKS +#endif /* ! CONFIG_MACHINE_BSWAP_H */ +#ifdef __cplusplus +extern "C" { +#endif + +#include "fpu/softfloat-types.h" + +#ifdef BSWAP_FROM_BYTESWAP static inline uint16_t bswap16(uint16_t x) { return bswap_16(x); @@ -27,7 +36,9 @@ static inline uint64_t bswap64(uint64_t x) { return bswap_64(x); } -# else +#endif + +#ifdef BSWAP_FROM_FALLBACKS static inline uint16_t bswap16(uint16_t x) { return (((x & 0x00ff) << 8) | @@ -53,7 +64,10 @@ static inline uint64_t bswap64(uint64_t x) ((x & 0x00ff000000000000ULL) >> 40) | ((x & 0xff00000000000000ULL) >> 56)); } -#endif /* ! CONFIG_MACHINE_BSWAP_H */ +#endif + +#undef BSWAP_FROM_BYTESWAP +#undef BSWAP_FROM_FALLBACKS static inline void bswap16s(uint16_t *s) { @@ -494,4 +508,8 @@ DO_STN_LDN_P(be) #undef le_bswaps #undef be_bswaps +#ifdef __cplusplus +} +#endif + #endif /* BSWAP_H */ diff --git a/include/qemu/co-shared-resource.h b/include/qemu/co-shared-resource.h index 4e4503004ca..78ca5850f8f 100644 --- a/include/qemu/co-shared-resource.h +++ b/include/qemu/co-shared-resource.h @@ -26,15 +26,13 @@ #ifndef QEMU_CO_SHARED_RESOURCE_H #define QEMU_CO_SHARED_RESOURCE_H - +/* Accesses to co-shared-resource API are thread-safe */ typedef struct SharedResource SharedResource; /* * Create SharedResource structure * * @total: total amount of some resource to be shared between clients - * - * Note: this API is not thread-safe. */ SharedResource *shres_create(uint64_t total); diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 091c45248b0..3baa5e3790f 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -72,18 +72,7 @@ int:(x) ? -1 : 1; \ } -/* QEMU_BUILD_BUG_MSG() emits the message given if _Static_assert is - * supported; otherwise, it will be omitted from the compiler error - * message (but as it remains present in the source code, it can still - * be useful when debugging). */ -#if defined(CONFIG_STATIC_ASSERT) #define QEMU_BUILD_BUG_MSG(x, msg) _Static_assert(!(x), msg) -#elif defined(__COUNTER__) -#define QEMU_BUILD_BUG_MSG(x, msg) typedef QEMU_BUILD_BUG_ON_STRUCT(x) \ - glue(qemu_build_bug_on__, __COUNTER__) __attribute__((unused)) -#else -#define QEMU_BUILD_BUG_MSG(x, msg) -#endif #define QEMU_BUILD_BUG_ON(x) QEMU_BUILD_BUG_MSG(x, "not expecting: " #x) @@ -173,46 +162,6 @@ #define QEMU_ALWAYS_INLINE #endif -/* Implement C11 _Generic via GCC builtins. Example: - * - * QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x) - * - * The first argument is the discriminator. The last is the default value. - * The middle ones are tuples in "(type, expansion)" format. - */ - -/* First, find out the number of generic cases. */ -#define QEMU_GENERIC(x, ...) \ - QEMU_GENERIC_(typeof(x), __VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -/* There will be extra arguments, but they are not used. */ -#define QEMU_GENERIC_(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, count, ...) \ - QEMU_GENERIC##count(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) - -/* Two more helper macros, this time to extract items from a parenthesized - * list. - */ -#define QEMU_FIRST_(a, b) a -#define QEMU_SECOND_(a, b) b - -/* ... and a final one for the common part of the "recursion". */ -#define QEMU_GENERIC_IF(x, type_then, else_) \ - __builtin_choose_expr(__builtin_types_compatible_p(x, \ - QEMU_FIRST_ type_then), \ - QEMU_SECOND_ type_then, else_) - -/* CPP poor man's "recursion". */ -#define QEMU_GENERIC1(x, a0, ...) (a0) -#define QEMU_GENERIC2(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC1(x, __VA_ARGS__)) -#define QEMU_GENERIC3(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC2(x, __VA_ARGS__)) -#define QEMU_GENERIC4(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC3(x, __VA_ARGS__)) -#define QEMU_GENERIC5(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC4(x, __VA_ARGS__)) -#define QEMU_GENERIC6(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC5(x, __VA_ARGS__)) -#define QEMU_GENERIC7(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC6(x, __VA_ARGS__)) -#define QEMU_GENERIC8(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC7(x, __VA_ARGS__)) -#define QEMU_GENERIC9(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC8(x, __VA_ARGS__)) -#define QEMU_GENERIC10(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC9(x, __VA_ARGS__)) - /** * qemu_build_not_reached() * diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h index 8d3e53ae4d4..f6054233212 100644 --- a/include/qemu/config-file.h +++ b/include/qemu/config-file.h @@ -1,7 +1,9 @@ #ifndef QEMU_CONFIG_FILE_H #define QEMU_CONFIG_FILE_H +typedef void QEMUConfigCB(const char *group, QDict *qdict, void *opaque, Error **errp); +void qemu_load_module_for_opts(const char *group); QemuOptsList *qemu_find_opts(const char *group); QemuOptsList *qemu_find_opts_err(const char *group, Error **errp); QemuOpts *qemu_find_opts_singleton(const char *group); @@ -14,7 +16,10 @@ void qemu_config_write(FILE *fp); int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname, Error **errp); -int qemu_read_config_file(const char *filename, Error **errp); +/* A default callback for qemu_read_config_file(). */ +void qemu_config_do_parse(const char *group, QDict *qdict, void *opaque, Error **errp); + +int qemu_read_config_file(const char *filename, QEMUConfigCB *f, Error **errp); /* Parse QDict options as a replacement for a config file (allowing multiple enumerated (0..(n-1)) configuration "sections") */ diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h new file mode 100644 index 00000000000..1558a826aa0 --- /dev/null +++ b/include/qemu/coroutine-tls.h @@ -0,0 +1,165 @@ +/* + * QEMU Thread Local Storage for coroutines + * + * Copyright Red Hat + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + * It is forbidden to access Thread Local Storage in coroutines because + * compiler optimizations may cause values to be cached across coroutine + * re-entry. Coroutines can run in more than one thread through the course of + * their life, leading bugs when stale TLS values from the wrong thread are + * used as a result of compiler optimization. + * + * An example is: + * + * ..code-block:: c + * :caption: A coroutine that may see the wrong TLS value + * + * static __thread AioContext *current_aio_context; + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(current_aio_context); + * qemu_coroutine_yield(); + * aio_notify(current_aio_context); // <-- may be stale after yielding! + * } + * + * This header provides macros for safely defining variables in Thread Local + * Storage: + * + * ..code-block:: c + * :caption: A coroutine that safely uses TLS + * + * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context) + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(get_current_aio_context()); + * qemu_coroutine_yield(); + * aio_notify(get_current_aio_context()); // <-- safe + * } + */ + +#ifndef QEMU_COROUTINE_TLS_H +#define QEMU_COROUTINE_TLS_H + +/* + * To stop the compiler from caching TLS values we define accessor functions + * with __attribute__((noinline)) plus asm volatile("") to prevent + * optimizations that override noinline. + * + * The compiler can still analyze noinline code and make optimizations based on + * that knowledge, so an inline asm output operand is used to prevent + * optimizations that make assumptions about the address of the TLS variable. + * + * This is fragile and ultimately needs to be solved by a mechanism that is + * guaranteed to work by the compiler (e.g. stackless coroutines), but for now + * we use this approach to prevent issues. + */ + +/** + * QEMU_DECLARE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Declare an extern variable in Thread Local Storage from a header file: + * + * .. code-block:: c + * :caption: Declaring an extern variable in Thread Local Storage + * + * QEMU_DECLARE_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Declaring a TLS variable using __thread + * + * extern __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DECLARE_CO_TLS(type, var) \ + __attribute__((noinline)) type get_##var(void); \ + __attribute__((noinline)) void set_##var(type v); \ + __attribute__((noinline)) type *get_ptr_##var(void); + +/** + * QEMU_DEFINE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a variable in Thread Local Storage that was previously declared from + * a header file with QEMU_DECLARE_CO_TLS(): + * + * .. code-block:: c + * :caption: Defining a variable in Thread Local Storage + * + * QEMU_DEFINE_CO_TLS(int, my_count) + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a TLS variable using __thread + * + * __thread int my_count; + */ +#define QEMU_DEFINE_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + type get_##var(void) { asm volatile(""); return co_tls_##var; } \ + void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +/** + * QEMU_DEFINE_STATIC_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a static variable in Thread Local Storage: + * + * .. code-block:: c + * :caption: Defining a static variable in Thread Local Storage + * + * QEMU_DEFINE_STATIC_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a static TLS variable using __thread + * + * static __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + static __attribute__((noinline, unused)) \ + type get_##var(void) \ + { asm volatile(""); return co_tls_##var; } \ + static __attribute__((noinline, unused)) \ + void set_##var(type v) \ + { asm volatile(""); co_tls_##var = v; } \ + static __attribute__((noinline, unused)) \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +#endif /* QEMU_COROUTINE_TLS_H */ diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index ce5b9c6851a..4829ff373d3 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -210,13 +210,15 @@ void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock); /** * Removes the next coroutine from the CoQueue, and wake it up. * Returns true if a coroutine was removed, false if the queue is empty. + * OK to run from coroutine and non-coroutine context. */ -bool coroutine_fn qemu_co_queue_next(CoQueue *queue); +bool qemu_co_queue_next(CoQueue *queue); /** * Empties the CoQueue; all coroutines are woken up. + * OK to run from coroutine and non-coroutine context. */ -void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue); +void qemu_co_queue_restart_all(CoQueue *queue); /** * Removes the next coroutine from the CoQueue, and wake it up. Unlike @@ -291,20 +293,27 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock); */ void qemu_co_rwlock_unlock(CoRwlock *lock); -typedef struct QemuCoSleepState QemuCoSleepState; +typedef struct QemuCoSleep { + Coroutine *to_wake; +} QemuCoSleep; /** - * Yield the coroutine for a given duration. During this yield, @sleep_state - * (if not NULL) is set to an opaque pointer, which may be used for - * qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the - * timer fires. Don't save the obtained value to other variables and don't call - * qemu_co_sleep_wake from another aio context. + * Yield the coroutine for a given duration. Initializes @w so that, + * during this yield, it can be passed to qemu_co_sleep_wake() to + * terminate the sleep. */ -void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns, - QemuCoSleepState **sleep_state); +void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, + QEMUClockType type, int64_t ns); + +/** + * Yield the coroutine until the next call to qemu_co_sleep_wake. + */ +void coroutine_fn qemu_co_sleep(QemuCoSleep *w); + static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) { - qemu_co_sleep_ns_wakeable(type, ns, NULL); + QemuCoSleep w = { 0 }; + qemu_co_sleep_ns_wakeable(&w, type, ns); } /** @@ -313,7 +322,7 @@ static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) * qemu_co_sleep_ns() and should be checked to be non-NULL before calling * qemu_co_sleep_wake(). */ -void qemu_co_sleep_wake(QemuCoSleepState *sleep_state); +void qemu_co_sleep_wake(QemuCoSleep *w); /** * Yield until a file descriptor becomes readable diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index cdca2991d8a..ca979dc6ccd 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -23,9 +23,14 @@ * THE SOFTWARE. */ +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + #ifndef HOST_UTILS_H #define HOST_UTILS_H +#include "qemu/compiler.h" #include "qemu/bswap.h" #ifdef CONFIG_INT128 @@ -51,36 +56,32 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) return (__int128_t)a * b / c; } -static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) +static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh, + uint64_t divisor) { - if (divisor == 0) { - return 1; - } else { - __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; - __uint128_t result = dividend / divisor; - *plow = result; - *phigh = dividend % divisor; - return result > UINT64_MAX; - } + __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; + __uint128_t result = dividend / divisor; + + *plow = result; + *phigh = result >> 64; + return dividend % divisor; } -static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) +static inline int64_t divs128(uint64_t *plow, int64_t *phigh, + int64_t divisor) { - if (divisor == 0) { - return 1; - } else { - __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; - __int128_t result = dividend / divisor; - *plow = result; - *phigh = dividend % divisor; - return result != *plow; - } + __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; + __int128_t result = dividend / divisor; + + *plow = result; + *phigh = result >> 64; + return dividend % divisor; } #else void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); -int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); -int divs128(int64_t *plow, int64_t *phigh, int64_t divisor); +uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); +int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor); static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) { @@ -272,6 +273,9 @@ static inline int ctpop64(uint64_t val) */ static inline uint8_t revbit8(uint8_t x) { +#if __has_builtin(__builtin_bitreverse8) + return __builtin_bitreverse8(x); +#else /* Assign the correct nibble position. */ x = ((x & 0xf0) >> 4) | ((x & 0x0f) << 4); @@ -281,6 +285,7 @@ static inline uint8_t revbit8(uint8_t x) | ((x & 0x22) << 1) | ((x & 0x11) << 3); return x; +#endif } /** @@ -289,6 +294,9 @@ static inline uint8_t revbit8(uint8_t x) */ static inline uint16_t revbit16(uint16_t x) { +#if __has_builtin(__builtin_bitreverse16) + return __builtin_bitreverse16(x); +#else /* Assign the correct byte position. */ x = bswap16(x); /* Assign the correct nibble position. */ @@ -300,6 +308,7 @@ static inline uint16_t revbit16(uint16_t x) | ((x & 0x2222) << 1) | ((x & 0x1111) << 3); return x; +#endif } /** @@ -308,6 +317,9 @@ static inline uint16_t revbit16(uint16_t x) */ static inline uint32_t revbit32(uint32_t x) { +#if __has_builtin(__builtin_bitreverse32) + return __builtin_bitreverse32(x); +#else /* Assign the correct byte position. */ x = bswap32(x); /* Assign the correct nibble position. */ @@ -319,6 +331,7 @@ static inline uint32_t revbit32(uint32_t x) | ((x & 0x22222222u) << 1) | ((x & 0x11111111u) << 3); return x; +#endif } /** @@ -327,6 +340,9 @@ static inline uint32_t revbit32(uint32_t x) */ static inline uint64_t revbit64(uint64_t x) { +#if __has_builtin(__builtin_bitreverse64) + return __builtin_bitreverse64(x); +#else /* Assign the correct byte position. */ x = bswap64(x); /* Assign the correct nibble position. */ @@ -338,6 +354,325 @@ static inline uint64_t revbit64(uint64_t x) | ((x & 0x2222222222222222ull) << 1) | ((x & 0x1111111111111111ull) << 3); return x; +#endif +} + +/** + * Return the absolute value of a 64-bit integer as an unsigned 64-bit value + */ +static inline uint64_t uabs64(int64_t v) +{ + return v < 0 ? -v : v; +} + +/** + * sadd32_overflow - addition with overflow indication + * @x, @y: addends + * @ret: Output for sum + * + * Computes *@ret = @x + @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret) +{ +#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 + return __builtin_add_overflow(x, y, ret); +#else + *ret = x + y; + return ((*ret ^ x) & ~(x ^ y)) < 0; +#endif +} + +/** + * sadd64_overflow - addition with overflow indication + * @x, @y: addends + * @ret: Output for sum + * + * Computes *@ret = @x + @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret) +{ +#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 + return __builtin_add_overflow(x, y, ret); +#else + *ret = x + y; + return ((*ret ^ x) & ~(x ^ y)) < 0; +#endif +} + +/** + * uadd32_overflow - addition with overflow indication + * @x, @y: addends + * @ret: Output for sum + * + * Computes *@ret = @x + @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret) +{ +#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 + return __builtin_add_overflow(x, y, ret); +#else + *ret = x + y; + return *ret < x; +#endif +} + +/** + * uadd64_overflow - addition with overflow indication + * @x, @y: addends + * @ret: Output for sum + * + * Computes *@ret = @x + @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret) +{ +#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 + return __builtin_add_overflow(x, y, ret); +#else + *ret = x + y; + return *ret < x; +#endif +} + +/** + * ssub32_overflow - subtraction with overflow indication + * @x: Minuend + * @y: Subtrahend + * @ret: Output for difference + * + * Computes *@ret = @x - @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret) +{ +#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 + return __builtin_sub_overflow(x, y, ret); +#else + *ret = x - y; + return ((*ret ^ x) & (x ^ y)) < 0; +#endif +} + +/** + * ssub64_overflow - subtraction with overflow indication + * @x: Minuend + * @y: Subtrahend + * @ret: Output for sum + * + * Computes *@ret = @x - @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret) +{ +#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 + return __builtin_sub_overflow(x, y, ret); +#else + *ret = x - y; + return ((*ret ^ x) & (x ^ y)) < 0; +#endif +} + +/** + * usub32_overflow - subtraction with overflow indication + * @x: Minuend + * @y: Subtrahend + * @ret: Output for sum + * + * Computes *@ret = @x - @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret) +{ +#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 + return __builtin_sub_overflow(x, y, ret); +#else + *ret = x - y; + return x < y; +#endif +} + +/** + * usub64_overflow - subtraction with overflow indication + * @x: Minuend + * @y: Subtrahend + * @ret: Output for sum + * + * Computes *@ret = @x - @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret) +{ +#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 + return __builtin_sub_overflow(x, y, ret); +#else + *ret = x - y; + return x < y; +#endif +} + +/** + * smul32_overflow - multiplication with overflow indication + * @x, @y: Input multipliers + * @ret: Output for product + * + * Computes *@ret = @x * @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret) +{ +#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 + return __builtin_mul_overflow(x, y, ret); +#else + int64_t z = (int64_t)x * y; + *ret = z; + return *ret != z; +#endif +} + +/** + * smul64_overflow - multiplication with overflow indication + * @x, @y: Input multipliers + * @ret: Output for product + * + * Computes *@ret = @x * @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret) +{ +#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 + return __builtin_mul_overflow(x, y, ret); +#else + uint64_t hi, lo; + muls64(&lo, &hi, x, y); + *ret = lo; + return hi != ((int64_t)lo >> 63); +#endif +} + +/** + * umul32_overflow - multiplication with overflow indication + * @x, @y: Input multipliers + * @ret: Output for product + * + * Computes *@ret = @x * @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret) +{ +#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 + return __builtin_mul_overflow(x, y, ret); +#else + uint64_t z = (uint64_t)x * y; + *ret = z; + return z > UINT32_MAX; +#endif +} + +/** + * umul64_overflow - multiplication with overflow indication + * @x, @y: Input multipliers + * @ret: Output for product + * + * Computes *@ret = @x * @y, and returns true if and only if that + * value has been truncated. + */ +static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret) +{ +#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 + return __builtin_mul_overflow(x, y, ret); +#else + uint64_t hi; + mulu64(ret, &hi, x, y); + return hi != 0; +#endif +} + +/* + * Unsigned 128x64 multiplication. + * Returns true if the result got truncated to 128 bits. + * Otherwise, returns false and the multiplication result via plow and phigh. + */ +static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor) +{ +#if defined(CONFIG_INT128) && \ + (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5) + bool res; + __uint128_t r; + __uint128_t f = ((__uint128_t)*phigh << 64) | *plow; + res = __builtin_mul_overflow(f, factor, &r); + + *plow = r; + *phigh = r >> 64; + + return res; +#else + uint64_t dhi = *phigh; + uint64_t dlo = *plow; + uint64_t ahi; + uint64_t blo, bhi; + + if (dhi == 0) { + mulu64(plow, phigh, dlo, factor); + return false; + } + + mulu64(plow, &ahi, dlo, factor); + mulu64(&blo, &bhi, dhi, factor); + + return uadd64_overflow(ahi, blo, phigh) || bhi != 0; +#endif +} + +/** + * uadd64_carry - addition with carry-in and carry-out + * @x, @y: addends + * @pcarry: in-out carry value + * + * Computes @x + @y + *@pcarry, placing the carry-out back + * into *@pcarry and returning the 64-bit sum. + */ +static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry) +{ +#if __has_builtin(__builtin_addcll) + unsigned long long c = *pcarry; + x = __builtin_addcll(x, y, c, &c); + *pcarry = c & 1; + return x; +#else + bool c = *pcarry; + /* This is clang's internal expansion of __builtin_addc. */ + c = uadd64_overflow(x, c, &x); + c |= uadd64_overflow(x, y, &x); + *pcarry = c; + return x; +#endif +} + +/** + * usub64_borrow - subtraction with borrow-in and borrow-out + * @x, @y: addends + * @pborrow: in-out borrow value + * + * Computes @x - @y - *@pborrow, placing the borrow-out back + * into *@pborrow and returning the 64-bit sum. + */ +static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow) +{ +#if __has_builtin(__builtin_subcll) + unsigned long long b = *pborrow; + x = __builtin_subcll(x, y, b, &b); + *pborrow = b & 1; + return x; +#else + bool b = *pborrow; + b = usub64_overflow(x, b, &x); + b |= usub64_overflow(x, y, &x); + *pborrow = b; + return x; +#endif } /* Host type specific sizes of these routines. */ @@ -437,4 +772,81 @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift); */ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow); +/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd + * (https://gmplib.org/repo/gmp/file/tip/longlong.h) + * + * Licensed under the GPLv2/LGPLv3 + */ +static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, + uint64_t n0, uint64_t d) +{ +#if defined(__x86_64__) + uint64_t q; + asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); + return q; +#elif defined(__s390x__) && !defined(__clang__) + /* Need to use a TImode type to get an even register pair for DLGR. */ + unsigned __int128 n = (unsigned __int128)n1 << 64 | n0; + asm("dlgr %0, %1" : "+r"(n) : "r"(d)); + *r = n >> 64; + return n; +#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7) + /* From Power ISA 2.06, programming note for divdeu. */ + uint64_t q1, q2, Q, r1, r2, R; + asm("divdeu %0,%2,%4; divdu %1,%3,%4" + : "=&r"(q1), "=r"(q2) + : "r"(n1), "r"(n0), "r"(d)); + r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */ + r2 = n0 - (q2 * d); + Q = q1 + q2; + R = r1 + r2; + if (R >= d || R < r2) { /* overflow implies R > d */ + Q += 1; + R -= d; + } + *r = R; + return Q; +#else + uint64_t d0, d1, q0, q1, r1, r0, m; + + d0 = (uint32_t)d; + d1 = d >> 32; + + r1 = n1 % d1; + q1 = n1 / d1; + m = q1 * d0; + r1 = (r1 << 32) | (n0 >> 32); + if (r1 < m) { + q1 -= 1; + r1 += d; + if (r1 >= d) { + if (r1 < m) { + q1 -= 1; + r1 += d; + } + } + } + r1 -= m; + + r0 = r1 % d1; + q0 = r1 / d1; + m = q0 * d0; + r0 = (r0 << 32) | (uint32_t)n0; + if (r0 < m) { + q0 -= 1; + r0 += d; + if (r0 >= d) { + if (r0 < m) { + q0 -= 1; + r0 += d; + } + } + } + r0 -= m; + + *r = r0; + return (q1 << 32) | q0; +#endif +} + #endif diff --git a/include/qemu/int128.h b/include/qemu/int128.h index 52fc2384211..b6d517aea4e 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -1,9 +1,9 @@ #ifndef INT128_H #define INT128_H -#ifdef CONFIG_INT128 #include "qemu/bswap.h" +#ifdef CONFIG_INT128 typedef __int128_t Int128; static inline Int128 int128_make64(uint64_t a) @@ -11,6 +11,11 @@ static inline Int128 int128_make64(uint64_t a) return a; } +static inline Int128 int128_makes64(int64_t a) +{ + return a; +} + static inline Int128 int128_make128(uint64_t lo, uint64_t hi) { return (__uint128_t)hi << 64 | lo; @@ -53,6 +58,11 @@ static inline Int128 int128_exts64(int64_t a) return a; } +static inline Int128 int128_not(Int128 a) +{ + return ~a; +} + static inline Int128 int128_and(Int128 a, Int128 b) { return a & b; @@ -63,6 +73,11 @@ static inline Int128 int128_or(Int128 a, Int128 b) return a | b; } +static inline Int128 int128_xor(Int128 a, Int128 b) +{ + return a ^ b; +} + static inline Int128 int128_rshift(Int128 a, int n) { return a >> n; @@ -150,26 +165,48 @@ static inline void int128_subfrom(Int128 *a, Int128 b) static inline Int128 bswap128(Int128 a) { +#if __has_builtin(__builtin_bswap128) + return __builtin_bswap128(a); +#else return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a))); +#endif } #else /* !CONFIG_INT128 */ typedef struct Int128 Int128; +/* + * We guarantee that the in-memory byte representation of an + * Int128 is that of a host-endian-order 128-bit integer + * (whether using this struct or the __int128_t version of the type). + * Some code using this type relies on this (eg when copying it into + * guest memory or a gdb protocol buffer, or by using Int128 in + * a union with other integer types). + */ struct Int128 { +#ifdef HOST_WORDS_BIGENDIAN + int64_t hi; + uint64_t lo; +#else uint64_t lo; int64_t hi; +#endif }; static inline Int128 int128_make64(uint64_t a) { - return (Int128) { a, 0 }; + return (Int128) { .lo = a, .hi = 0 }; +} + +static inline Int128 int128_makes64(int64_t a) +{ + return (Int128) { .lo = a, .hi = a >> 63 }; } static inline Int128 int128_make128(uint64_t lo, uint64_t hi) { - return (Int128) { lo, hi }; + return (Int128) { .lo = lo, .hi = hi }; } static inline uint64_t int128_get64(Int128 a) @@ -200,22 +237,32 @@ static inline Int128 int128_one(void) static inline Int128 int128_2_64(void) { - return (Int128) { 0, 1 }; + return int128_make128(0, 1); } static inline Int128 int128_exts64(int64_t a) { - return (Int128) { .lo = a, .hi = (a < 0) ? -1 : 0 }; + return int128_make128(a, (a < 0) ? -1 : 0); +} + +static inline Int128 int128_not(Int128 a) +{ + return int128_make128(~a.lo, ~a.hi); } static inline Int128 int128_and(Int128 a, Int128 b) { - return (Int128) { a.lo & b.lo, a.hi & b.hi }; + return int128_make128(a.lo & b.lo, a.hi & b.hi); } static inline Int128 int128_or(Int128 a, Int128 b) { - return (Int128) { a.lo | b.lo, a.hi | b.hi }; + return int128_make128(a.lo | b.lo, a.hi | b.hi); +} + +static inline Int128 int128_xor(Int128 a, Int128 b) +{ + return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi); } static inline Int128 int128_rshift(Int128 a, int n) @@ -327,5 +374,16 @@ static inline void int128_subfrom(Int128 *a, Int128 b) *a = int128_sub(*a, b); } +static inline Int128 bswap128(Int128 a) +{ + return int128_make128(bswap64(a.hi), bswap64(a.lo)); +} + #endif /* CONFIG_INT128 */ + +static inline void bswap128s(Int128 *s) +{ + *s = bswap128(*s); +} + #endif /* INT128_H */ diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h index b66cf93c4bc..8249edd764e 100644 --- a/include/qemu/iova-tree.h +++ b/include/qemu/iova-tree.h @@ -59,7 +59,7 @@ IOVATree *iova_tree_new(void); * * Return: 0 if succeeded, or <0 if error. */ -int iova_tree_insert(IOVATree *tree, DMAMap *map); +int iova_tree_insert(IOVATree *tree, const DMAMap *map); /** * iova_tree_remove: @@ -74,7 +74,7 @@ int iova_tree_insert(IOVATree *tree, DMAMap *map); * * Return: 0 if succeeded, or <0 if error. */ -int iova_tree_remove(IOVATree *tree, DMAMap *map); +int iova_tree_remove(IOVATree *tree, const DMAMap *map); /** * iova_tree_find: @@ -92,7 +92,7 @@ int iova_tree_remove(IOVATree *tree, DMAMap *map); * user is responsible to make sure the pointer is valid (say, no * concurrent deletion in progress). */ -DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map); +const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map); /** * iova_tree_find_address: @@ -105,7 +105,7 @@ DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map); * * Return: same as iova_tree_find(). */ -DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova); +const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova); /** * iova_tree_foreach: diff --git a/include/qemu/job.h b/include/qemu/job.h index efc6fa75449..6e67b6977ff 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -253,8 +253,17 @@ struct JobDriver { /** * If the callback is not NULL, it will be invoked in job_cancel_async + * + * This function must return true if the job will be cancelled + * immediately without any further I/O (mandatory if @force is + * true), and false otherwise. This lets the generic job layer + * know whether a job has been truly (force-)cancelled, or whether + * it is just in a special completion mode (like mirror after + * READY). + * (If the callback is NULL, the job is assumed to terminate + * without I/O.) */ - void (*cancel)(Job *job); + bool (*cancel)(Job *job, bool force); /** Called when the job is freed */ @@ -427,9 +436,15 @@ const char *job_type_str(const Job *job); /** Returns true if the job should not be visible to the management layer. */ bool job_is_internal(Job *job); -/** Returns whether the job is scheduled for cancellation. */ +/** Returns whether the job is being cancelled. */ bool job_is_cancelled(Job *job); +/** + * Returns whether the job is scheduled for cancellation (at an + * indefinite point). + */ +bool job_cancel_requested(Job *job); + /** Returns whether the job is in a completed state. */ bool job_is_completed(Job *job); @@ -506,18 +521,18 @@ void job_user_cancel(Job *job, bool force, Error **errp); /** * Synchronously cancel the @job. The completion callback is called - * before the function returns. The job may actually complete - * instead of canceling itself; the circumstances under which this - * happens depend on the kind of job that is active. + * before the function returns. If @force is false, the job may + * actually complete instead of canceling itself; the circumstances + * under which this happens depend on the kind of job that is active. * * Returns the return value from the job if the job actually completed * during the call, or -ECANCELED if it was canceled. * * Callers must hold the AioContext lock of job->aio_context. */ -int job_cancel_sync(Job *job); +int job_cancel_sync(Job *job, bool force); -/** Synchronously cancels all jobs using job_cancel_sync(). */ +/** Synchronously force-cancels all jobs using job_cancel_sync(). */ void job_cancel_sync_all(void); /** diff --git a/include/qemu/lockable.h b/include/qemu/lockable.h index b6200231418..86db7cb04c9 100644 --- a/include/qemu/lockable.h +++ b/include/qemu/lockable.h @@ -24,79 +24,71 @@ struct QemuLockable { QemuLockUnlockFunc *unlock; }; -/* This function gives an error if an invalid, non-NULL pointer type is passed - * to QEMU_MAKE_LOCKABLE. For optimized builds, we can rely on dead-code elimination - * from the compiler, and give the errors already at link time. - */ -#if defined(__OPTIMIZE__) && !defined(__SANITIZE_ADDRESS__) -void unknown_lock_type(void *); -#else -static inline void unknown_lock_type(void *unused) -{ - abort(); -} -#endif - static inline __attribute__((__always_inline__)) QemuLockable * qemu_make_lockable(void *x, QemuLockable *lockable) { - /* We cannot test this in a macro, otherwise we get compiler + /* + * We cannot test this in a macro, otherwise we get compiler * warnings like "the address of 'm' will always evaluate as 'true'". */ return x ? lockable : NULL; } -/* Auxiliary macros to simplify QEMU_MAKE_LOCABLE. */ -#define QEMU_LOCK_FUNC(x) ((QemuLockUnlockFunc *) \ - QEMU_GENERIC(x, \ - (QemuMutex *, qemu_mutex_lock), \ - (QemuRecMutex *, qemu_rec_mutex_lock), \ - (CoMutex *, qemu_co_mutex_lock), \ - (QemuSpin *, qemu_spin_lock), \ - unknown_lock_type)) - -#define QEMU_UNLOCK_FUNC(x) ((QemuLockUnlockFunc *) \ - QEMU_GENERIC(x, \ - (QemuMutex *, qemu_mutex_unlock), \ - (QemuRecMutex *, qemu_rec_mutex_unlock), \ - (CoMutex *, qemu_co_mutex_unlock), \ - (QemuSpin *, qemu_spin_unlock), \ - unknown_lock_type)) - -/* In C, compound literals have the lifetime of an automatic variable. +static inline __attribute__((__always_inline__)) QemuLockable * +qemu_null_lockable(void *x) +{ + if (x != NULL) { + qemu_build_not_reached(); + } + return NULL; +} + +/* + * In C, compound literals have the lifetime of an automatic variable. * In C++ it would be different, but then C++ wouldn't need QemuLockable * either... */ -#define QEMU_MAKE_LOCKABLE_(x) (&(QemuLockable) { \ - .object = (x), \ - .lock = QEMU_LOCK_FUNC(x), \ - .unlock = QEMU_UNLOCK_FUNC(x), \ +#define QML_OBJ_(x, name) (&(QemuLockable) { \ + .object = (x), \ + .lock = (QemuLockUnlockFunc *) qemu_ ## name ## _lock, \ + .unlock = (QemuLockUnlockFunc *) qemu_ ## name ## _unlock \ }) -/* QEMU_MAKE_LOCKABLE - Make a polymorphic QemuLockable +/** + * QEMU_MAKE_LOCKABLE - Make a polymorphic QemuLockable * - * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin). + * @x: a lock object (currently one of QemuMutex, QemuRecMutex, + * CoMutex, QemuSpin). * * Returns a QemuLockable object that can be passed around * to a function that can operate with locks of any kind, or * NULL if @x is %NULL. + * + * Note the special case for void *, so that we may pass "NULL". */ -#define QEMU_MAKE_LOCKABLE(x) \ - QEMU_GENERIC(x, \ - (QemuLockable *, (x)), \ - qemu_make_lockable((x), QEMU_MAKE_LOCKABLE_(x))) +#define QEMU_MAKE_LOCKABLE(x) \ + _Generic((x), QemuLockable *: (x), \ + void *: qemu_null_lockable(x), \ + QemuMutex *: qemu_make_lockable(x, QML_OBJ_(x, mutex)), \ + QemuRecMutex *: qemu_make_lockable(x, QML_OBJ_(x, rec_mutex)), \ + CoMutex *: qemu_make_lockable(x, QML_OBJ_(x, co_mutex)), \ + QemuSpin *: qemu_make_lockable(x, QML_OBJ_(x, spin))) -/* QEMU_MAKE_LOCKABLE_NONNULL - Make a polymorphic QemuLockable +/** + * QEMU_MAKE_LOCKABLE_NONNULL - Make a polymorphic QemuLockable * - * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin). + * @x: a lock object (currently one of QemuMutex, QemuRecMutex, + * CoMutex, QemuSpin). * * Returns a QemuLockable object that can be passed around * to a function that can operate with locks of any kind. */ -#define QEMU_MAKE_LOCKABLE_NONNULL(x) \ - QEMU_GENERIC(x, \ - (QemuLockable *, (x)), \ - QEMU_MAKE_LOCKABLE_(x)) +#define QEMU_MAKE_LOCKABLE_NONNULL(x) \ + _Generic((x), QemuLockable *: (x), \ + QemuMutex *: QML_OBJ_(x, mutex), \ + QemuRecMutex *: QML_OBJ_(x, rec_mutex), \ + CoMutex *: QML_OBJ_(x, co_mutex), \ + QemuSpin *: QML_OBJ_(x, spin)) static inline void qemu_lockable_lock(QemuLockable *x) { diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index bbe067db974..8dbc6fcb894 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -234,25 +234,6 @@ void event_notifier_set_handler(EventNotifier *e, GSource *iohandler_get_g_source(void); AioContext *iohandler_get_aio_context(void); -#ifdef CONFIG_POSIX -/** - * qemu_add_child_watch: Register a child process for reaping. - * - * Under POSIX systems, a parent process must read the exit status of - * its child processes using waitpid, or the operating system will not - * free some of the resources attached to that process. - * - * This function directs the QEMU main loop to observe a child process - * and call waitpid as soon as it exits; the watch is then removed - * automatically. It is useful whenever QEMU forks a child process - * but will find out about its termination by other means such as a - * "broken pipe". - * - * @pid: The pid that QEMU should observe. - */ -typedef void ChildTerminationHandler(int status, void *opaque); -int qemu_add_child_watch(pid_t pid, ChildTerminationHandler *callback, void* opaque); -#endif /** * qemu_mutex_iothread_locked: Return lock status of the main loop mutex. @@ -313,7 +294,9 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); void qemu_fd_register(int fd); -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); +#define qemu_bh_new(cb, opaque) \ + qemu_bh_new_full((cb), (opaque), (stringify(cb))) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); void qemu_bh_schedule_idle(QEMUBH *bh); enum { diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index 456ff87df16..90d0eee7053 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -7,18 +7,22 @@ size_t qemu_fd_getpagesize(int fd); size_t qemu_mempath_getpagesize(const char *mem_path); /** - * qemu_ram_mmap: mmap the specified file or device. + * qemu_ram_mmap: mmap anonymous memory, the specified file or device. + * + * mmap() abstraction to map guest RAM, simplifying flag handling, taking + * care of alignment requirements and installing guard pages. * * Parameters: * @fd: the file or the device to mmap * @size: the number of bytes to be mmaped * @align: if not zero, specify the alignment of the starting mapping address; * otherwise, the alignment in use will be determined by QEMU. - * @readonly: true for a read-only mapping, false for read/write. - * @shared: map has RAM_SHARED flag. - * @is_pmem: map has RAM_PMEM flag. + * @qemu_map_flags: QEMU_MAP_* flags * @map_offset: map starts at offset of map_offset from the start of fd * + * Internally, MAP_PRIVATE, MAP_ANONYMOUS and MAP_SHARED_VALIDATE are set + * implicitly based on other parameters. + * * Return: * On success, return a pointer to the mapped area. * On failure, return MAP_FAILED. @@ -26,9 +30,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path); void *qemu_ram_mmap(int fd, size_t size, size_t align, - bool readonly, - bool shared, - bool is_pmem, + uint32_t qemu_map_flags, off_t map_offset); void qemu_ram_munmap(int fd, void *ptr, size_t size); diff --git a/include/qemu/module.h b/include/qemu/module.h index 944d403cbd1..5fcc323b2a7 100644 --- a/include/qemu/module.h +++ b/include/qemu/module.h @@ -72,5 +72,84 @@ void module_call_init(module_init_type type); bool module_load_one(const char *prefix, const char *lib_name, bool mayfail); void module_load_qom_one(const char *type); void module_load_qom_all(void); +void module_allow_arch(const char *arch); + +/** + * DOC: module info annotation macros + * + * ``scripts/modinfo-collect.py`` will collect module info, + * using the preprocessor and -DQEMU_MODINFO. + * + * ``scripts/modinfo-generate.py`` will create a module meta-data database + * from the collected information so qemu knows about module + * dependencies and QOM objects implemented by modules. + * + * See ``*.modinfo`` and ``modinfo.c`` in the build directory to check the + * script results. + */ +#ifdef QEMU_MODINFO +# define modinfo(kind, value) \ + MODINFO_START kind value MODINFO_END +#else +# define modinfo(kind, value) +#endif + +/** + * module_obj + * + * @name: QOM type. + * + * This module implements QOM type @name. + */ +#define module_obj(name) modinfo(obj, name) + +/** + * module_dep + * + * @name: module name + * + * This module depends on module @name. + */ +#define module_dep(name) modinfo(dep, name) + +/** + * module_arch + * + * @name: target architecture + * + * This module is for target architecture @arch. + * + * Note that target-dependent modules are tagged automatically, so + * this is only needed in case target-independent modules should be + * restricted. Use case example: the ccw bus is implemented by s390x + * only. + */ +#define module_arch(name) modinfo(arch, name) + +/** + * module_opts + * + * @name: QemuOpts name + * + * This module registers QemuOpts @name. + */ +#define module_opts(name) modinfo(opts, name) + +/* + * module info database + * + * scripts/modinfo-generate.c will build this using the data collected + * by scripts/modinfo-collect.py + */ +typedef struct QemuModinfo QemuModinfo; +struct QemuModinfo { + const char *name; + const char *arch; + const char **objs; + const char **deps; + const char **opts; +}; +extern const QemuModinfo qemu_modinfo[]; +void module_init_info(const QemuModinfo *info); #endif diff --git a/include/qemu/option.h b/include/qemu/option.h index f73e0dc7d95..306bf075750 100644 --- a/include/qemu/option.h +++ b/include/qemu/option.h @@ -119,7 +119,6 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, int fail_if_exists, Error **errp); void qemu_opts_reset(QemuOptsList *list); void qemu_opts_loc_restore(QemuOpts *opts); -bool qemu_opts_set(QemuOptsList *list, const char *name, const char *value, Error **errp); const char *qemu_opts_id(QemuOpts *opts); void qemu_opts_set_id(QemuOpts *opts, char *id); void qemu_opts_del(QemuOpts *opts); @@ -130,8 +129,6 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params, bool permit_abbrev); QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params, bool permit_abbrev, Error **errp); -void qemu_opts_set_defaults(QemuOptsList *list, const char *params, - int permit_abbrev); QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict, Error **errp); QDict *qemu_opts_to_qdict_filtered(QemuOpts *opts, QDict *qdict, @@ -147,7 +144,10 @@ void qemu_opts_print_help(QemuOptsList *list, bool print_caption); void qemu_opts_free(QemuOptsList *list); QemuOptsList *qemu_opts_append(QemuOptsList *dst, QemuOptsList *list); +QDict *keyval_parse_into(QDict *qdict, const char *params, const char *implied_key, + bool *p_help, Error **errp); QDict *keyval_parse(const char *params, const char *implied_key, bool *help, Error **errp); +void keyval_merge(QDict *old, const QDict *new, Error **errp); #endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 62f747f8e7b..1d2dfe97974 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -131,10 +131,6 @@ QEMU_EXTERN_C int daemon(int, int); */ #include "glib-compat.h" -#ifdef __cplusplus -extern "C" { -#endif - #ifdef _WIN32 #include "sysemu/os-win32.h" #endif @@ -143,6 +139,10 @@ extern "C" { #include "sysemu/os-posix.h" #endif +#ifdef __cplusplus +extern "C" { +#endif + #include "qemu/typedefs.h" /* @@ -195,6 +195,9 @@ extern "C" { #ifndef MAP_FIXED_NOREPLACE #define MAP_FIXED_NOREPLACE 0 #endif +#ifndef MAP_NORESERVE +#define MAP_NORESERVE 0 +#endif #ifndef ENOMEDIUM #define ENOMEDIUM ENODEV #endif @@ -253,7 +256,7 @@ extern "C" { /* Mac OSX has a bug that incorrectly defines SIZE_MAX with * the wrong type. Our replacement isn't usable in preprocessor * expressions, but it is sufficient for our needs. */ -#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX +#ifdef HAVE_BROKEN_SIZE_MAX #undef SIZE_MAX #define SIZE_MAX ((size_t)-1) #endif @@ -316,11 +319,16 @@ extern "C" { }) #endif -/* Round number down to multiple */ +/* + * Round number down to multiple. Safe when m is not a power of 2 (see + * ROUND_DOWN for a faster version when a power of 2 is guaranteed). + */ #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) -/* Round number up to multiple. Safe when m is not a power of 2 (see - * ROUND_UP for a faster version when a power of 2 is guaranteed) */ +/* + * Round number up to multiple. Safe when m is not a power of 2 (see + * ROUND_UP for a faster version when a power of 2 is guaranteed). + */ #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) /* Check if n is a multiple of m */ @@ -345,11 +353,22 @@ extern "C" { /* Check if pointer p is n-bytes aligned */ #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) -/* Round number up to multiple. Requires that d be a power of 2 (see +/* + * Round number down to multiple. Requires that d be a power of 2 (see + * QEMU_ALIGN_UP for a safer but slower version on arbitrary + * numbers); works even if d is a smaller type than n. + */ +#ifndef ROUND_DOWN +#define ROUND_DOWN(n, d) ((n) & -(0 ? (n) : (d))) +#endif + +/* + * Round number up to multiple. Requires that d be a power of 2 (see * QEMU_ALIGN_UP for a safer but slower version on arbitrary - * numbers); works even if d is a smaller type than n. */ + * numbers); works even if d is a smaller type than n. + */ #ifndef ROUND_UP -#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d))) +#define ROUND_UP(n, d) ROUND_DOWN((n) + (d) - 1, (d)) #endif #ifndef DIV_ROUND_UP @@ -370,10 +389,50 @@ extern "C" { int qemu_daemon(int nochdir, int noclose); void *qemu_try_memalign(size_t alignment, size_t size); void *qemu_memalign(size_t alignment, size_t size); -void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared); +void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared, + bool noreserve); void qemu_vfree(void *ptr); void qemu_anon_ram_free(void *ptr, size_t size); +/* + * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define + * g_autofree macro. + */ +static inline void qemu_cleanup_generic_vfree(void *p) +{ + void **pp = (void **)p; + qemu_vfree(*pp); +} + +/* + * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free. + */ +#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree))) + +/* + * Abstraction of PROT_ and MAP_ flags as passed to mmap(), for example, + * consumed by qemu_ram_mmap(). + */ + +/* Map PROT_READ instead of PROT_READ | PROT_WRITE. */ +#define QEMU_MAP_READONLY (1 << 0) + +/* Use MAP_SHARED instead of MAP_PRIVATE. */ +#define QEMU_MAP_SHARED (1 << 1) + +/* + * Use MAP_SYNC | MAP_SHARED_VALIDATE if supported. Ignored without + * QEMU_MAP_SHARED. If mapping fails, warn and fallback to !QEMU_MAP_SYNC. + */ +#define QEMU_MAP_SYNC (1 << 2) + +/* + * Use MAP_NORESERVE to skip reservation of swap space (or huge pages if + * applicable). Bail out if not supported/effective. + */ +#define QEMU_MAP_NORESERVE (1 << 3) + + #define QEMU_MADV_INVALID -1 #if defined(CONFIG_MADVISE) @@ -418,7 +477,7 @@ void qemu_anon_ram_free(void *ptr, size_t size); #ifdef MADV_REMOVE #define QEMU_MADV_REMOVE MADV_REMOVE #else -#define QEMU_MADV_REMOVE QEMU_MADV_INVALID +#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED #endif #elif defined(CONFIG_POSIX_MADVISE) @@ -432,7 +491,7 @@ void qemu_anon_ram_free(void *ptr, size_t size); #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID #define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID -#define QEMU_MADV_REMOVE QEMU_MADV_INVALID +#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED #else /* no-op */ @@ -520,6 +579,7 @@ void sigaction_invoke(struct sigaction *action, #endif int qemu_madvise(void *addr, size_t len, int advice); +int qemu_mprotect_rw(void *addr, size_t size); int qemu_mprotect_rwx(void *addr, size_t size); int qemu_mprotect_none(void *addr, size_t size); diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h index fbbe99474bd..0f592267279 100644 --- a/include/qemu/plugin-memory.h +++ b/include/qemu/plugin-memory.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _PLUGIN_MEMORY_H_ -#define _PLUGIN_MEMORY_H_ +#ifndef PLUGIN_MEMORY_H +#define PLUGIN_MEMORY_H struct qemu_plugin_hwaddr { bool is_io; @@ -18,7 +18,7 @@ struct qemu_plugin_hwaddr { hwaddr offset; } io; struct { - uint64_t hostaddr; + void *hostaddr; } ram; } v; }; diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h index c5a79a89f0d..145f8a221ac 100644 --- a/include/qemu/plugin.h +++ b/include/qemu/plugin.h @@ -12,6 +12,7 @@ #include "qemu/error-report.h" #include "qemu/queue.h" #include "qemu/option.h" +#include "exec/memopidx.h" /* * Events that plugins can subscribe to. @@ -36,6 +37,25 @@ enum qemu_plugin_event { struct qemu_plugin_desc; typedef QTAILQ_HEAD(, qemu_plugin_desc) QemuPluginList; +/* + * Construct a qemu_plugin_meminfo_t. + */ +static inline qemu_plugin_meminfo_t +make_plugin_meminfo(MemOpIdx oi, enum qemu_plugin_mem_rw rw) +{ + return oi | (rw << 16); +} + +/* + * Extract the memory operation direction from a qemu_plugin_meminfo_t. + * Other portions may be extracted via get_memop and get_mmuidx. + */ +static inline enum qemu_plugin_mem_rw +get_plugin_meminfo_rw(qemu_plugin_meminfo_t i) +{ + return i >> 16; +} + #ifdef CONFIG_PLUGIN extern QemuOptsList qemu_plugin_opts; @@ -79,7 +99,6 @@ enum plugin_dyn_cb_subtype { struct qemu_plugin_dyn_cb { union qemu_plugin_cb_sig f; void *userp; - unsigned tcg_flags; enum plugin_dyn_cb_subtype type; /* @rw applies to mem callbacks only (both regular and inline) */ enum qemu_plugin_mem_rw rw; @@ -144,10 +163,12 @@ struct qemu_plugin_tb { /** * qemu_plugin_tb_insn_get(): get next plugin record for translation. - * + * @tb: the internal tb context + * @pc: address of instruction */ static inline -struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb) +struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb, + uint64_t pc) { struct qemu_plugin_insn *insn; int i, j; @@ -160,6 +181,7 @@ struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb) g_byte_array_set_size(insn->data, 0); insn->calls_helpers = false; insn->mem_helper = false; + insn->vaddr = pc; for (i = 0; i < PLUGIN_N_CB_TYPES; i++) { for (j = 0; j < PLUGIN_N_CB_SUBTYPES; j++) { @@ -181,7 +203,8 @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1, uint64_t a6, uint64_t a7, uint64_t a8); void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret); -void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t meminfo); +void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, + MemOpIdx oi, enum qemu_plugin_mem_rw rw); void qemu_plugin_flush_cb(void); @@ -191,6 +214,16 @@ void qemu_plugin_add_dyn_cb_arr(GArray *arr); void qemu_plugin_disable_mem_helpers(CPUState *cpu); +/** + * qemu_plugin_user_exit(): clean-up callbacks before calling exit callbacks + * + * This is a user-mode only helper that ensure we have fully cleared + * callbacks from all threads before calling the exit callbacks. This + * is so the plugins themselves don't have to jump through hoops to + * guard against race conditions. + */ +void qemu_plugin_user_exit(void); + #else /* !CONFIG_PLUGIN */ static inline void qemu_plugin_add_opts(void) @@ -235,7 +268,8 @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret) { } static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, - uint32_t meminfo) + MemOpIdx oi, + enum qemu_plugin_mem_rw rw) { } static inline void qemu_plugin_flush_cb(void) @@ -251,6 +285,8 @@ void qemu_plugin_add_dyn_cb_arr(GArray *arr) static inline void qemu_plugin_disable_mem_helpers(CPUState *cpu) { } +static inline void qemu_plugin_user_exit(void) +{ } #endif /* !CONFIG_PLUGIN */ #endif /* QEMU_PLUGIN_H */ diff --git a/include/qemu/progress_meter.h b/include/qemu/progress_meter.h index 9a23ff071c4..dadf822bbf8 100644 --- a/include/qemu/progress_meter.h +++ b/include/qemu/progress_meter.h @@ -27,6 +27,8 @@ #ifndef QEMU_PROGRESS_METER_H #define QEMU_PROGRESS_METER_H +#include "qemu/lockable.h" + typedef struct ProgressMeter { /** * Current progress. The unit is arbitrary as long as the ratio between @@ -37,22 +39,24 @@ typedef struct ProgressMeter { /** Estimated current value at the completion of the process */ uint64_t total; + + QemuMutex lock; /* protects concurrent access to above fields */ } ProgressMeter; -static inline void progress_work_done(ProgressMeter *pm, uint64_t done) -{ - pm->current += done; -} - -static inline void progress_set_remaining(ProgressMeter *pm, uint64_t remaining) -{ - pm->total = pm->current + remaining; -} - -static inline void progress_increase_remaining(ProgressMeter *pm, - uint64_t delta) -{ - pm->total += delta; -} +void progress_init(ProgressMeter *pm); +void progress_destroy(ProgressMeter *pm); + +/* Get a snapshot of internal current and total values */ +void progress_get_snapshot(ProgressMeter *pm, uint64_t *current, + uint64_t *total); + +/* Increases the amount of work done so far by @done */ +void progress_work_done(ProgressMeter *pm, uint64_t done); + +/* Sets how much work has to be done to complete to @remaining */ +void progress_set_remaining(ProgressMeter *pm, uint64_t remaining); + +/* Increases the total work to do by @delta */ +void progress_increase_remaining(ProgressMeter *pm, uint64_t delta); #endif /* QEMU_PROGRESS_METER_H */ diff --git a/qemu-options.h b/include/qemu/qemu-options.h similarity index 88% rename from qemu-options.h rename to include/qemu/qemu-options.h index b4ee63cd601..4a62c83c453 100644 --- a/qemu-options.h +++ b/include/qemu/qemu-options.h @@ -29,8 +29,13 @@ #define QEMU_OPTIONS_H enum { -#define QEMU_OPTIONS_GENERATE_ENUM -#include "qemu-options-wrapper.h" + +#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ + opt_enum, +#define DEFHEADING(text) +#define ARCHHEADING(text, arch_mask) + +#include "qemu-options.def" }; #endif diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h index 97cdfd77618..5f1017201f3 100644 --- a/include/qemu/qemu-plugin.h +++ b/include/qemu/qemu-plugin.h @@ -525,6 +525,15 @@ qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id, char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn); +/** + * qemu_plugin_insn_symbol() - best effort symbol lookup + * @insn: instruction reference + * + * Return a static string referring to the symbol. This is dependent + * on the binary QEMU is running having provided a symbol table. + */ +const char *qemu_plugin_insn_symbol(const struct qemu_plugin_insn *insn); + /** * qemu_plugin_vcpu_for_each() - iterate over the existing vCPU * @id: plugin ID @@ -540,6 +549,19 @@ void qemu_plugin_vcpu_for_each(qemu_plugin_id_t id, void qemu_plugin_register_flush_cb(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb); +/** + * qemu_plugin_register_atexit_cb() - register exit callback + * @id: plugin ID + * @cb: callback + * @userdata: user data for callback + * + * The @cb function is called once execution has finished. Plugins + * should be able to free all their resources at this point much like + * after a reset/uninstall callback is called. + * + * In user-mode it is possible a few un-instrumented instructions from + * child threads may run before the host kernel reaps the threads. + */ void qemu_plugin_register_atexit_cb(qemu_plugin_id_t id, qemu_plugin_udata_cb_t cb, void *userdata); @@ -555,4 +577,17 @@ int qemu_plugin_n_max_vcpus(void); */ void qemu_plugin_outs(const char *string); +/** + * qemu_plugin_bool_parse() - parses a boolean argument in the form of + * "=[on|yes|true|off|no|false]" + * + * @name: argument name, the part before the equals sign + * @val: argument value, what's after the equals sign + * @ret: output return value + * + * returns true if the combination @name=@val parses correctly to a boolean + * argument, and false otherwise + */ +bool qemu_plugin_bool_parse(const char *name, const char *val, bool *ret); + #endif /* QEMU_PLUGIN_API_H */ diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h index 01da8d63f14..48bf59e8572 100644 --- a/include/qemu/ratelimit.h +++ b/include/qemu/ratelimit.h @@ -14,9 +14,11 @@ #ifndef QEMU_RATELIMIT_H #define QEMU_RATELIMIT_H +#include "qemu/lockable.h" #include "qemu/timer.h" typedef struct { + QemuMutex lock; int64_t slice_start_time; int64_t slice_end_time; uint64_t slice_quota; @@ -40,7 +42,12 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); double delay_slices; - assert(limit->slice_quota && limit->slice_ns); + QEMU_LOCK_GUARD(&limit->lock); + if (!limit->slice_quota) { + /* Throttling disabled. */ + return 0; + } + assert(limit->slice_ns); if (limit->slice_end_time < now) { /* Previous, possibly extended, time slice finished; reset the @@ -65,11 +72,26 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) return limit->slice_end_time - now; } +static inline void ratelimit_init(RateLimit *limit) +{ + qemu_mutex_init(&limit->lock); +} + +static inline void ratelimit_destroy(RateLimit *limit) +{ + qemu_mutex_destroy(&limit->lock); +} + static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed, uint64_t slice_ns) { + QEMU_LOCK_GUARD(&limit->lock); limit->slice_ns = slice_ns; - limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1); + if (speed == 0) { + limit->slice_quota = 0; + } else { + limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1); + } } #endif diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 515d327cf11..b063c6fde81 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -27,7 +27,9 @@ #include "qemu/thread.h" #include "qemu/queue.h" #include "qemu/atomic.h" +#include "qemu/notify.h" #include "qemu/sys_membarrier.h" +#include "qemu/coroutine-tls.h" #ifdef __cplusplus extern "C" { @@ -66,13 +68,20 @@ struct rcu_reader_data { /* Data used for registry, protected by rcu_registry_lock */ QLIST_ENTRY(rcu_reader_data) node; + + /* + * NotifierList used to force an RCU grace period. Accessed under + * rcu_registry_lock. Note that the notifier is called _outside_ + * the thread! + */ + NotifierList force_rcu; }; -extern __thread struct rcu_reader_data rcu_reader; +QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader) static inline void rcu_read_lock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); unsigned ctr; if (p_rcu_reader->depth++ > 0) { @@ -88,7 +97,7 @@ static inline void rcu_read_lock(void) static inline void rcu_read_unlock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); assert(p_rcu_reader->depth != 0); if (--p_rcu_reader->depth > 0) { @@ -180,6 +189,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock) #define RCU_READ_LOCK_GUARD() \ g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock() +/* + * Force-RCU notifiers tell readers that they should exit their + * read-side critical section. + */ +void rcu_add_force_rcu_notifier(Notifier *n); +void rcu_remove_force_rcu_notifier(Notifier *n); + #ifdef __cplusplus } #endif diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h index 8382c4c779d..80cf920fbad 100644 --- a/include/qemu/selfmap.h +++ b/include/qemu/selfmap.h @@ -6,8 +6,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _SELFMAP_H_ -#define _SELFMAP_H_ +#ifndef SELFMAP_H +#define SELFMAP_H typedef struct { unsigned long start; diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 7d1f8135767..0c34bf23987 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -111,4 +111,15 @@ SocketAddress *socket_remote_address(int fd, Error **errp); */ SocketAddress *socket_address_flatten(SocketAddressLegacy *addr); +/** + * socket_address_parse_named_fd: + * + * Modify @addr, replacing a named fd by its corresponding number. + * Needed for callers that plan to pass @addr to a context where the + * current monitor is not available. + * + * Return 0 on success. + */ +int socket_address_parse_named_fd(SocketAddress *addr, Error **errp); + #endif /* QEMU_SOCKETS_H */ diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h index fdd3d1b8f98..802402254b6 100644 --- a/include/qemu/stats64.h +++ b/include/qemu/stats64.h @@ -21,7 +21,7 @@ typedef struct Stat64 { #ifdef CONFIG_ATOMIC64 - uint64_t value; + aligned_uint64_t value; #else uint32_t low, high; uint32_t lock; diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index c903525062e..bda54e599c8 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -4,11 +4,9 @@ #include #include -typedef QemuMutex QemuRecMutex; -#define qemu_rec_mutex_destroy qemu_mutex_destroy -#define qemu_rec_mutex_lock_impl qemu_mutex_lock_impl -#define qemu_rec_mutex_trylock_impl qemu_mutex_trylock_impl -#define qemu_rec_mutex_unlock qemu_mutex_unlock +#ifdef __FreeBSD__ +#include +#endif struct QemuMutex { pthread_mutex_t lock; @@ -19,6 +17,14 @@ struct QemuMutex { bool initialized; }; +/* + * QemuRecMutex cannot be a typedef of QemuMutex lest we have two + * compatible cases in _Generic. See qemu/lockable.h. + */ +typedef struct QemuRecMutex { + QemuMutex m; +} QemuRecMutex; + struct QemuCond { pthread_cond_t cond; bool initialized; diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h index d0a1a9597eb..d95af4498fc 100644 --- a/include/qemu/thread-win32.h +++ b/include/qemu/thread-win32.h @@ -18,12 +18,6 @@ struct QemuRecMutex { bool initialized; }; -void qemu_rec_mutex_destroy(QemuRecMutex *mutex); -void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line); -int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, - int line); -void qemu_rec_mutex_unlock(QemuRecMutex *mutex); - struct QemuCond { CONDITION_VARIABLE var; bool initialized; diff --git a/include/qemu/thread.h b/include/qemu/thread.h index 54357631846..460568d67d5 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -28,6 +28,12 @@ int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line); void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line); void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line); +void qemu_rec_mutex_init(QemuRecMutex *mutex); +void qemu_rec_mutex_destroy(QemuRecMutex *mutex); +void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line); +int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line); +void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line); + typedef void (*QemuMutexLockFunc)(QemuMutex *m, const char *f, int l); typedef int (*QemuMutexTrylockFunc)(QemuMutex *m, const char *f, int l); typedef void (*QemuRecMutexLockFunc)(QemuRecMutex *m, const char *f, int l); @@ -104,6 +110,9 @@ extern QemuCondTimedWaitFunc qemu_cond_timedwait_func; #define qemu_mutex_unlock(mutex) \ qemu_mutex_unlock_impl(mutex, __FILE__, __LINE__) +#define qemu_rec_mutex_unlock(mutex) \ + qemu_rec_mutex_unlock_impl(mutex, __FILE__, __LINE__) + static inline void (qemu_mutex_lock)(QemuMutex *mutex) { qemu_mutex_lock(mutex); @@ -129,8 +138,10 @@ static inline int (qemu_rec_mutex_trylock)(QemuRecMutex *mutex) return qemu_rec_mutex_trylock(mutex); } -/* Prototypes for other functions are in thread-posix.h/thread-win32.h. */ -void qemu_rec_mutex_init(QemuRecMutex *mutex); +static inline void (qemu_rec_mutex_unlock)(QemuRecMutex *mutex) +{ + qemu_rec_mutex_unlock(mutex); +} void qemu_cond_init(QemuCond *cond); void qemu_cond_destroy(QemuCond *cond); diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h new file mode 100644 index 00000000000..2f2060acd90 --- /dev/null +++ b/include/qemu/transactions.h @@ -0,0 +1,66 @@ +/* + * Simple transactions API + * + * Copyright (c) 2021 Virtuozzo International GmbH. + * + * Author: + * Vladimir Sementsov-Ogievskiy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * + * = Generic transaction API = + * + * The intended usage is the following: you create "prepare" functions, which + * represents the actions. They will usually have Transaction* argument, and + * call tran_add() to register finalization callbacks. For finalization + * callbacks, prepare corresponding TransactionActionDrv structures. + * + * Then, when you need to make a transaction, create an empty Transaction by + * tran_create(), call your "prepare" functions on it, and finally call + * tran_abort() or tran_commit() to finalize the transaction by corresponding + * finalization actions in reverse order. + * + * The clean() functions registered by the drivers in a transaction are called + * last, after all abort() or commit() functions have been called. + */ + +#ifndef QEMU_TRANSACTIONS_H +#define QEMU_TRANSACTIONS_H + +#include + +typedef struct TransactionActionDrv { + void (*abort)(void *opaque); + void (*commit)(void *opaque); + void (*clean)(void *opaque); +} TransactionActionDrv; + +typedef struct Transaction Transaction; + +Transaction *tran_new(void); +void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque); +void tran_abort(Transaction *tran); +void tran_commit(Transaction *tran); + +static inline void tran_finalize(Transaction *tran, int ret) +{ + if (ret < 0) { + tran_abort(tran); + } else { + tran_commit(tran); + } +} + +#endif /* QEMU_TRANSACTIONS_H */ diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h index 4491c8e1a6e..bde9495b254 100644 --- a/include/qemu/vfio-helpers.h +++ b/include/qemu/vfio-helpers.h @@ -18,7 +18,7 @@ typedef struct QEMUVFIOState QEMUVFIOState; QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp); void qemu_vfio_close(QEMUVFIOState *s); int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size, - bool temporary, uint64_t *iova_list); + bool temporary, uint64_t *iova_list, Error **errp); int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s); void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host); void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index, diff --git a/include/qom/object.h b/include/qom/object.h index 6721cd312e6..fae096f51cc 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -861,6 +861,29 @@ static void do_qemu_init_ ## type_array(void) \ } \ type_init(do_qemu_init_ ## type_array) +/** + * type_print_class_properties: + * @type: a QOM class name + * + * Print the object's class properties to stdout or the monitor. + * Return whether an object was found. + */ +bool type_print_class_properties(const char *type); + +/** + * object_set_properties_from_keyval: + * @obj: a QOM object + * @qdict: a dictionary with the properties to be set + * @from_json: true if leaf values of @qdict are typed, false if they + * are strings + * @errp: pointer to error object + * + * For each key in the dictionary, parse the value string if needed, + * then set the corresponding property in @obj. + */ +void object_set_properties_from_keyval(Object *obj, const QDict *qdict, + bool from_json, Error **errp); + /** * object_class_dynamic_cast_assert: * @klass: The #ObjectClass to attempt to cast. @@ -1520,6 +1543,18 @@ Object *object_resolve_path(const char *path, bool *ambiguous); Object *object_resolve_path_type(const char *path, const char *typename, bool *ambiguous); +/** + * object_resolve_path_at: + * @parent: the object in which to resolve the path + * @path: the path to resolve + * + * This is like object_resolve_path(), except paths not starting with + * a slash are relative to @parent. + * + * Returns: The resolved object or NULL on path lookup failure. + */ +Object *object_resolve_path_at(Object *parent, const char *path); + /** * object_resolve_path_component: * @parent: the object in which to resolve the path diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index 215d01b4eca..204cfb8640d 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -33,6 +33,8 @@ #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 #define KVM_FEATURE_MSI_EXT_DEST_ID 15 +#define KVM_FEATURE_HC_MAP_GPA_RANGE 16 +#define KVM_FEATURE_MIGRATION_CONTROL 17 #define KVM_HINTS_REALTIME 0 @@ -54,6 +56,7 @@ #define MSR_KVM_POLL_CONTROL 0x4b564d05 #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 +#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 struct kvm_steal_time { uint64_t steal; @@ -90,6 +93,16 @@ struct kvm_clock_pairing { /* MSR_KVM_ASYNC_PF_INT */ #define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +/* MSR_KVM_MIGRATION_CONTROL */ +#define KVM_MIGRATION_READY (1 << 0) + +/* KVM_HC_MAP_GPA_RANGE */ +#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0 +#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0) +#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1) +#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4) +#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1) +#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 1677208a411..94d41b202c9 100644 --- a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -70,30 +70,6 @@ enum pvrdma_mtu { PVRDMA_MTU_4096 = 5, }; -static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu) -{ - switch (mtu) { - case PVRDMA_MTU_256: return 256; - case PVRDMA_MTU_512: return 512; - case PVRDMA_MTU_1024: return 1024; - case PVRDMA_MTU_2048: return 2048; - case PVRDMA_MTU_4096: return 4096; - default: return -1; - } -} - -static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu) -{ - switch (mtu) { - case 256: return PVRDMA_MTU_256; - case 512: return PVRDMA_MTU_512; - case 1024: return PVRDMA_MTU_1024; - case 2048: return PVRDMA_MTU_2048; - case 4096: - default: return PVRDMA_MTU_4096; - } -} - enum pvrdma_port_state { PVRDMA_PORT_NOP = 0, PVRDMA_PORT_DOWN = 1, @@ -138,17 +114,6 @@ enum pvrdma_port_width { PVRDMA_WIDTH_12X = 8, }; -static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width) -{ - switch (width) { - case PVRDMA_WIDTH_1X: return 1; - case PVRDMA_WIDTH_4X: return 4; - case PVRDMA_WIDTH_8X: return 8; - case PVRDMA_WIDTH_12X: return 12; - default: return -1; - } -} - enum pvrdma_port_speed { PVRDMA_SPEED_SDR = 1, PVRDMA_SPEED_DDR = 2, diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index c47e19810c0..352b51fd0ac 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -167,6 +167,13 @@ extern "C" { #define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ #define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ +/* 64 bpp RGB */ +#define DRM_FORMAT_XRGB16161616 fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_XBGR16161616 fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */ + +#define DRM_FORMAT_ARGB16161616 fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + /* * Floating point 64bpp RGB * IEEE 754-2008 binary16 half-precision float @@ -526,6 +533,25 @@ extern "C" { */ #define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7) +/* + * Intel Color Control Surface with Clear Color (CCS) for Gen-12 render + * compression. + * + * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear + * and at index 1. The clear color is stored at index 2, and the pitch should + * be ignored. The clear color structure is 256 bits. The first 128 bits + * represents Raw Clear Color Red, Green, Blue and Alpha color each represented + * by 32 bits. The raw clear color is consumed by the 3d engine and generates + * the converted clear color of size 64 bits. The first 32 bits store the Lower + * Converted Clear Color value and the next 32 bits store the Higher Converted + * Clear Color value when applicable. The Converted Clear Color values are + * consumed by the DE. The last 64 bits are used to store Color Discard Enable + * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line + * corresponds to an area of 4x1 tiles in the main surface. The main surface + * pitch is required to be a multiple of 4 tile widths. + */ +#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * @@ -1035,9 +1061,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) * Not all combinations are valid, and different SoCs may support different * combinations of layout and options. */ -#define __fourcc_mod_amlogic_layout_mask 0xf +#define __fourcc_mod_amlogic_layout_mask 0xff #define __fourcc_mod_amlogic_options_shift 8 -#define __fourcc_mod_amlogic_options_mask 0xf +#define __fourcc_mod_amlogic_options_mask 0xff #define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ fourcc_mod_code(AMLOGIC, \ diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 8bfd01d230d..053d3fafdf3 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -26,6 +26,14 @@ * have the same layout for 32-bit and 64-bit userland. */ +/* Note on reserved space. + * Reserved fields must not be accessed directly by user space because + * they may be replaced by a different field in the future. They must + * be initialized to zero before making the request, e.g. via memset + * of the entire structure or implicitly by not being set in a structure + * initializer. + */ + /** * struct ethtool_cmd - DEPRECATED, link control and status * This structure is DEPRECATED, please use struct ethtool_link_settings. @@ -67,6 +75,7 @@ * and other link features that the link partner advertised * through autonegotiation; 0 if unknown or not applicable. * Read-only. + * @reserved: Reserved for future use; see the note on reserved space. * * The link speed in Mbps is split between @speed and @speed_hi. Use * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to @@ -155,6 +164,7 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep) * @bus_info: Device bus address. This should match the dev_name() * string for the underlying bus device, if there is one. May be * an empty string. + * @reserved2: Reserved for future use; see the note on reserved space. * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and * %ETHTOOL_SPFLAGS commands; also the number of strings in the * %ETH_SS_PRIV_FLAGS set @@ -223,7 +233,7 @@ enum tunable_id { ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ /* * Add your fresh new tunable attribute above and remember to update - * tunable_strings[] in net/core/ethtool.c + * tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_TUNABLE_COUNT, }; @@ -287,7 +297,7 @@ enum phy_tunable_id { ETHTOOL_PHY_EDPD, /* * Add your fresh new phy tunable attribute above and remember to update - * phy_tunable_strings[] in net/core/ethtool.c + * phy_tunable_strings[] in net/ethtool/common.c */ __ETHTOOL_PHY_TUNABLE_COUNT, }; @@ -356,6 +366,7 @@ struct ethtool_eeprom { * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting * its tx lpi (after reaching 'idle' state). Effective only when eee * was negotiated and tx_lpi_enabled was set. + * @reserved: Reserved for future use; see the note on reserved space. */ struct ethtool_eee { uint32_t cmd; @@ -374,6 +385,7 @@ struct ethtool_eee { * @cmd: %ETHTOOL_GMODULEINFO * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx * @eeprom_len: Length of the eeprom + * @reserved: Reserved for future use; see the note on reserved space. * * This structure is used to return the information to * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. @@ -579,9 +591,7 @@ struct ethtool_pauseparam { uint32_t tx_pause; }; -/** - * enum ethtool_link_ext_state - link extended state - */ +/* Link extended state */ enum ethtool_link_ext_state { ETHTOOL_LINK_EXT_STATE_AUTONEG, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, @@ -595,10 +605,7 @@ enum ethtool_link_ext_state { ETHTOOL_LINK_EXT_STATE_OVERHEAT, }; -/** - * enum ethtool_link_ext_substate_autoneg - more information in addition to - * ETHTOOL_LINK_EXT_STATE_AUTONEG. - */ +/* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ enum ethtool_link_ext_substate_autoneg { ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, @@ -608,9 +615,7 @@ enum ethtool_link_ext_substate_autoneg { ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, }; -/** - * enum ethtool_link_ext_substate_link_training - more information in addition to - * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. +/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. */ enum ethtool_link_ext_substate_link_training { ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, @@ -619,9 +624,7 @@ enum ethtool_link_ext_substate_link_training { ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, }; -/** - * enum ethtool_link_ext_substate_logical_mismatch - more information in addition - * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. +/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. */ enum ethtool_link_ext_substate_link_logical_mismatch { ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, @@ -631,19 +634,14 @@ enum ethtool_link_ext_substate_link_logical_mismatch { ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, }; -/** - * enum ethtool_link_ext_substate_bad_signal_integrity - more information in - * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. +/* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. */ enum ethtool_link_ext_substate_bad_signal_integrity { ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, }; -/** - * enum ethtool_link_ext_substate_cable_issue - more information in - * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. - */ +/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ enum ethtool_link_ext_substate_cable_issue { ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, @@ -661,6 +659,7 @@ enum ethtool_link_ext_substate_cable_issue { * now deprecated * @ETH_SS_FEATURES: Device feature names * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names + * @ETH_SS_TUNABLES: tunable names * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS * @ETH_SS_PHY_TUNABLES: PHY tunable names * @ETH_SS_LINK_MODES: link mode names @@ -670,6 +669,13 @@ enum ethtool_link_ext_substate_cable_issue { * @ETH_SS_TS_TX_TYPES: timestamping Tx types * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types + * @ETH_SS_STATS_STD: standardized stats + * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics + * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics + * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics + * @ETH_SS_STATS_RMON: names of RMON statistics + * + * @ETH_SS_COUNT: number of defined string sets */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -688,6 +694,11 @@ enum ethtool_stringset { ETH_SS_TS_TX_TYPES, ETH_SS_TS_RX_FILTERS, ETH_SS_UDP_TUNNEL_TYPES, + ETH_SS_STATS_STD, + ETH_SS_STATS_ETH_PHY, + ETH_SS_STATS_ETH_MAC, + ETH_SS_STATS_ETH_CTRL, + ETH_SS_STATS_RMON, /* add new constants above here */ ETH_SS_COUNT @@ -715,6 +726,7 @@ struct ethtool_gstrings { /** * struct ethtool_sset_info - string set information * @cmd: Command number = %ETHTOOL_GSSET_INFO + * @reserved: Reserved for future use; see the note on reserved space. * @sset_mask: On entry, a bitmask of string sets to query, with bits * numbered according to &enum ethtool_stringset. On return, a * bitmask of those string sets queried that are supported. @@ -759,6 +771,7 @@ enum ethtool_test_flags { * @flags: A bitmask of flags from &enum ethtool_test_flags. Some * flags may be set by the user on entry; others may be set by * the driver on return. + * @reserved: Reserved for future use; see the note on reserved space. * @len: On return, the number of test results * @data: Array of test results * @@ -959,6 +972,7 @@ union ethtool_flow_union { * @vlan_etype: VLAN EtherType * @vlan_tci: VLAN tag control information * @data: user defined data + * @padding: Reserved for future use; see the note on reserved space. * * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT * is set in &struct ethtool_rx_flow_spec @flow_type. @@ -1134,7 +1148,8 @@ struct ethtool_rxfh_indir { * hardware hash key. * @hfunc: Defines the current RSS hash function used by HW (or to be set to). * Valid values are one of the %ETH_RSS_HASH_*. - * @rsvd: Reserved for future extensions. + * @rsvd8: Reserved for future use; see the note on reserved space. + * @rsvd32: Reserved for future use; see the note on reserved space. * @rss_config: RX ring/queue index for each hash value i.e., indirection table * of @indir_size uint32_t elements, followed by hash key of @key_size * bytes. @@ -1302,7 +1317,9 @@ struct ethtool_sfeatures { * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @tx_reserved: Reserved for future use; see the note on reserved space. * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + * @rx_reserved: Reserved for future use; see the note on reserved space. * * The bits in the 'tx_types' and 'rx_filters' fields correspond to * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, @@ -1376,15 +1393,33 @@ struct ethtool_per_queue_op { }; /** - * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * struct ethtool_fecparam - Ethernet Forward Error Correction parameters * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM - * @active_fec: FEC mode which is active on porte - * @fec: Bitmask of supported/configured FEC modes - * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * @active_fec: FEC mode which is active on the port, single bit set, GET only. + * @fec: Bitmask of configured FEC modes. + * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET. * - * Drivers should reject a non-zero setting of @autoneg when - * autoneogotiation is disabled (or not supported) for the link. + * Note that @reserved was never validated on input and ethtool user space + * left it uninitialized when calling SET. Hence going forward it can only be + * used to return a value to userspace with GET. + * + * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS. + * FEC settings are configured by link autonegotiation whenever it's enabled. + * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode. + * + * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings. + * It is recommended that drivers only accept a single bit set in @fec. + * When multiple bits are set in @fec drivers may pick mode in an implementation + * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other + * FEC modes, because it's unclear whether in this case other modes constrain + * AUTO or are independent choices. + * Drivers must reject SET requests if they support none of the requested modes. + * + * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead + * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM. * + * See enum ethtool_fec_config_bits for definition of valid bits for both + * @fec and @active_fec. */ struct ethtool_fecparam { uint32_t cmd; @@ -1396,11 +1431,16 @@ struct ethtool_fecparam { /** * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration - * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported - * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver - * @ETHTOOL_FEC_OFF: No FEC Mode - * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode - * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not + * be used together with other bits. GET only. + * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually + * based link mode and SFP parameters read from module's + * EEPROM. This bit does _not_ mean autonegotiation. + * @ETHTOOL_FEC_OFF_BIT: No FEC Mode + * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode + * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode + * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet + * Consortium) */ enum ethtool_fec_config_bits { ETHTOOL_FEC_NONE_BIT, @@ -1958,6 +1998,11 @@ enum ethtool_reset_flags { * autonegotiation; 0 if unknown or not applicable. Read-only. * @transceiver: Used to distinguish different possible PHY types, * reported consistently by PHYLIB. Read-only. + * @master_slave_cfg: Master/slave port mode. + * @master_slave_state: Master/slave port state. + * @reserved: Reserved for future use; see the note on reserved space. + * @reserved1: Reserved for future use; see the note on reserved space. + * @link_mode_masks: Variable length bitmaps. * * If autonegotiation is disabled, the speed and @duplex represent the * fixed link mode and are writable if the driver supports multiple diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index 950d7edb7ef..cce105bfbab 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -179,6 +179,8 @@ * 7.33 * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID * - add FUSE_OPEN_KILL_SUIDGID + * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT + * - add FUSE_SETXATTR_ACL_KILL_SGID */ #ifndef _LINUX_FUSE_H @@ -326,6 +328,7 @@ struct fuse_file_lock { * does not have CAP_FSETID. Additionally upon * write/truncate sgid is killed only if file has group * execute permission. (Same as Linux VFS behavior). + * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -356,6 +359,7 @@ struct fuse_file_lock { #define FUSE_MAP_ALIGNMENT (1 << 26) #define FUSE_SUBMOUNTS (1 << 27) #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) +#define FUSE_SETXATTR_EXT (1 << 29) /** * CUSE INIT request/reply flags @@ -447,6 +451,12 @@ struct fuse_file_lock { */ #define FUSE_OPEN_KILL_SUIDGID (1 << 0) +/** + * setxattr flags + * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set + */ +#define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -677,9 +687,13 @@ struct fuse_fsync_in { uint32_t padding; }; +#define FUSE_COMPAT_SETXATTR_IN_SIZE 8 + struct fuse_setxattr_in { uint32_t size; uint32_t flags; + uint32_t setxattr_flags; + uint32_t padding; }; struct fuse_getxattr_in { @@ -899,7 +913,8 @@ struct fuse_notify_retrieve_in { }; /* Device ioctls: */ -#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) +#define FUSE_DEV_IOC_MAGIC 229 +#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) struct fuse_lseek_in { uint64_t fh; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index c403b9cb0d4..b5e86b40abd 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -611,6 +611,7 @@ #define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ +#define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index f89c986190d..7822c241784 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -81,7 +81,7 @@ struct input_id { * in units per radian. * When INPUT_PROP_ACCELEROMETER is set the resolution changes. * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in - * in units per g (units/g) and in units per degree per second + * units per g (units/g) and in units per degree per second * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). */ struct input_absinfo { diff --git a/include/standard-headers/linux/udmabuf.h b/include/standard-headers/linux/udmabuf.h new file mode 100644 index 00000000000..e19eb5b5ce7 --- /dev/null +++ b/include/standard-headers/linux/udmabuf.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_UDMABUF_H +#define _LINUX_UDMABUF_H + +#include "standard-headers/linux/types.h" + +#define UDMABUF_FLAGS_CLOEXEC 0x01 + +struct udmabuf_create { + uint32_t memfd; + uint32_t flags; + uint64_t offset; + uint64_t size; +}; + +struct udmabuf_create_item { + uint32_t memfd; + uint32_t __pad; + uint64_t offset; + uint64_t size; +}; + +struct udmabuf_create_list { + uint32_t flags; + uint32_t count; + struct udmabuf_create_item list[]; +}; + +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create) +#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct udmabuf_create_list) + +#endif /* _LINUX_UDMABUF_H */ diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h new file mode 100644 index 00000000000..245e1eff4b9 --- /dev/null +++ b/include/standard-headers/linux/virtio_bt.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef _LINUX_VIRTIO_BT_H +#define _LINUX_VIRTIO_BT_H + +#include "standard-headers/linux/virtio_types.h" + +/* Feature bits */ +#define VIRTIO_BT_F_VND_HCI 0 /* Indicates vendor command support */ +#define VIRTIO_BT_F_MSFT_EXT 1 /* Indicates MSFT vendor support */ +#define VIRTIO_BT_F_AOSP_EXT 2 /* Indicates AOSP vendor support */ + +enum virtio_bt_config_type { + VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0, + VIRTIO_BT_CONFIG_TYPE_AMP = 1, +}; + +enum virtio_bt_config_vendor { + VIRTIO_BT_CONFIG_VENDOR_NONE = 0, + VIRTIO_BT_CONFIG_VENDOR_ZEPHYR = 1, + VIRTIO_BT_CONFIG_VENDOR_INTEL = 2, + VIRTIO_BT_CONFIG_VENDOR_REALTEK = 3, +}; + +struct virtio_bt_config { + uint8_t type; + uint16_t vendor; + uint16_t msft_opcode; +} QEMU_PACKED; + +#endif /* _LINUX_VIRTIO_BT_H */ diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index bc1c0621f5e..4fe842c3a3a 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -51,8 +51,10 @@ #define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_BT 40 /* virtio bluetooth */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h new file mode 100644 index 00000000000..1af96b9fc61 --- /dev/null +++ b/include/standard-headers/linux/virtio_snd.h @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (C) 2021 OpenSynergy GmbH + */ +#ifndef VIRTIO_SND_IF_H +#define VIRTIO_SND_IF_H + +#include "standard-headers/linux/virtio_types.h" + +/******************************************************************************* + * CONFIGURATION SPACE + */ +struct virtio_snd_config { + /* # of available physical jacks */ + uint32_t jacks; + /* # of available PCM streams */ + uint32_t streams; + /* # of available channel maps */ + uint32_t chmaps; +}; + +enum { + /* device virtqueue indexes */ + VIRTIO_SND_VQ_CONTROL = 0, + VIRTIO_SND_VQ_EVENT, + VIRTIO_SND_VQ_TX, + VIRTIO_SND_VQ_RX, + /* # of device virtqueues */ + VIRTIO_SND_VQ_MAX +}; + +/******************************************************************************* + * COMMON DEFINITIONS + */ + +/* supported dataflow directions */ +enum { + VIRTIO_SND_D_OUTPUT = 0, + VIRTIO_SND_D_INPUT +}; + +enum { + /* jack control request types */ + VIRTIO_SND_R_JACK_INFO = 1, + VIRTIO_SND_R_JACK_REMAP, + + /* PCM control request types */ + VIRTIO_SND_R_PCM_INFO = 0x0100, + VIRTIO_SND_R_PCM_SET_PARAMS, + VIRTIO_SND_R_PCM_PREPARE, + VIRTIO_SND_R_PCM_RELEASE, + VIRTIO_SND_R_PCM_START, + VIRTIO_SND_R_PCM_STOP, + + /* channel map control request types */ + VIRTIO_SND_R_CHMAP_INFO = 0x0200, + + /* jack event types */ + VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, + VIRTIO_SND_EVT_JACK_DISCONNECTED, + + /* PCM event types */ + VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, + VIRTIO_SND_EVT_PCM_XRUN, + + /* common status codes */ + VIRTIO_SND_S_OK = 0x8000, + VIRTIO_SND_S_BAD_MSG, + VIRTIO_SND_S_NOT_SUPP, + VIRTIO_SND_S_IO_ERR +}; + +/* common header */ +struct virtio_snd_hdr { + uint32_t code; +}; + +/* event notification */ +struct virtio_snd_event { + /* VIRTIO_SND_EVT_XXX */ + struct virtio_snd_hdr hdr; + /* optional event data */ + uint32_t data; +}; + +/* common control request to query an item information */ +struct virtio_snd_query_info { + /* VIRTIO_SND_R_XXX_INFO */ + struct virtio_snd_hdr hdr; + /* item start identifier */ + uint32_t start_id; + /* item count to query */ + uint32_t count; + /* item information size in bytes */ + uint32_t size; +}; + +/* common item information header */ +struct virtio_snd_info { + /* function group node id (High Definition Audio Specification 7.1.2) */ + uint32_t hda_fn_nid; +}; + +/******************************************************************************* + * JACK CONTROL MESSAGES + */ +struct virtio_snd_jack_hdr { + /* VIRTIO_SND_R_JACK_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::jacks - 1 */ + uint32_t jack_id; +}; + +/* supported jack features */ +enum { + VIRTIO_SND_JACK_F_REMAP = 0 +}; + +struct virtio_snd_jack_info { + /* common header */ + struct virtio_snd_info hdr; + /* supported feature bit map (1 << VIRTIO_SND_JACK_F_XXX) */ + uint32_t features; + /* pin configuration (High Definition Audio Specification 7.3.3.31) */ + uint32_t hda_reg_defconf; + /* pin capabilities (High Definition Audio Specification 7.3.4.9) */ + uint32_t hda_reg_caps; + /* current jack connection status (0: disconnected, 1: connected) */ + uint8_t connected; + + uint8_t padding[7]; +}; + +/* jack remapping control request */ +struct virtio_snd_jack_remap { + /* .code = VIRTIO_SND_R_JACK_REMAP */ + struct virtio_snd_jack_hdr hdr; + /* selected association number */ + uint32_t association; + /* selected sequence number */ + uint32_t sequence; +}; + +/******************************************************************************* + * PCM CONTROL MESSAGES + */ +struct virtio_snd_pcm_hdr { + /* VIRTIO_SND_R_PCM_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::streams - 1 */ + uint32_t stream_id; +}; + +/* supported PCM stream features */ +enum { + VIRTIO_SND_PCM_F_SHMEM_HOST = 0, + VIRTIO_SND_PCM_F_SHMEM_GUEST, + VIRTIO_SND_PCM_F_MSG_POLLING, + VIRTIO_SND_PCM_F_EVT_SHMEM_PERIODS, + VIRTIO_SND_PCM_F_EVT_XRUNS +}; + +/* supported PCM sample formats */ +enum { + /* analog formats (width / physical width) */ + VIRTIO_SND_PCM_FMT_IMA_ADPCM = 0, /* 4 / 4 bits */ + VIRTIO_SND_PCM_FMT_MU_LAW, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_A_LAW, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_S8, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_U8, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_S16, /* 16 / 16 bits */ + VIRTIO_SND_PCM_FMT_U16, /* 16 / 16 bits */ + VIRTIO_SND_PCM_FMT_S18_3, /* 18 / 24 bits */ + VIRTIO_SND_PCM_FMT_U18_3, /* 18 / 24 bits */ + VIRTIO_SND_PCM_FMT_S20_3, /* 20 / 24 bits */ + VIRTIO_SND_PCM_FMT_U20_3, /* 20 / 24 bits */ + VIRTIO_SND_PCM_FMT_S24_3, /* 24 / 24 bits */ + VIRTIO_SND_PCM_FMT_U24_3, /* 24 / 24 bits */ + VIRTIO_SND_PCM_FMT_S20, /* 20 / 32 bits */ + VIRTIO_SND_PCM_FMT_U20, /* 20 / 32 bits */ + VIRTIO_SND_PCM_FMT_S24, /* 24 / 32 bits */ + VIRTIO_SND_PCM_FMT_U24, /* 24 / 32 bits */ + VIRTIO_SND_PCM_FMT_S32, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_U32, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_FLOAT, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_FLOAT64, /* 64 / 64 bits */ + /* digital formats (width / physical width) */ + VIRTIO_SND_PCM_FMT_DSD_U8, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_DSD_U16, /* 16 / 16 bits */ + VIRTIO_SND_PCM_FMT_DSD_U32, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME /* 32 / 32 bits */ +}; + +/* supported PCM frame rates */ +enum { + VIRTIO_SND_PCM_RATE_5512 = 0, + VIRTIO_SND_PCM_RATE_8000, + VIRTIO_SND_PCM_RATE_11025, + VIRTIO_SND_PCM_RATE_16000, + VIRTIO_SND_PCM_RATE_22050, + VIRTIO_SND_PCM_RATE_32000, + VIRTIO_SND_PCM_RATE_44100, + VIRTIO_SND_PCM_RATE_48000, + VIRTIO_SND_PCM_RATE_64000, + VIRTIO_SND_PCM_RATE_88200, + VIRTIO_SND_PCM_RATE_96000, + VIRTIO_SND_PCM_RATE_176400, + VIRTIO_SND_PCM_RATE_192000, + VIRTIO_SND_PCM_RATE_384000 +}; + +struct virtio_snd_pcm_info { + /* common header */ + struct virtio_snd_info hdr; + /* supported feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ + uint32_t features; + /* supported sample format bit map (1 << VIRTIO_SND_PCM_FMT_XXX) */ + uint64_t formats; + /* supported frame rate bit map (1 << VIRTIO_SND_PCM_RATE_XXX) */ + uint64_t rates; + /* dataflow direction (VIRTIO_SND_D_XXX) */ + uint8_t direction; + /* minimum # of supported channels */ + uint8_t channels_min; + /* maximum # of supported channels */ + uint8_t channels_max; + + uint8_t padding[5]; +}; + +/* set PCM stream format */ +struct virtio_snd_pcm_set_params { + /* .code = VIRTIO_SND_R_PCM_SET_PARAMS */ + struct virtio_snd_pcm_hdr hdr; + /* size of the hardware buffer */ + uint32_t buffer_bytes; + /* size of the hardware period */ + uint32_t period_bytes; + /* selected feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ + uint32_t features; + /* selected # of channels */ + uint8_t channels; + /* selected sample format (VIRTIO_SND_PCM_FMT_XXX) */ + uint8_t format; + /* selected frame rate (VIRTIO_SND_PCM_RATE_XXX) */ + uint8_t rate; + + uint8_t padding; +}; + +/******************************************************************************* + * PCM I/O MESSAGES + */ + +/* I/O request header */ +struct virtio_snd_pcm_xfer { + /* 0 ... virtio_snd_config::streams - 1 */ + uint32_t stream_id; +}; + +/* I/O request status */ +struct virtio_snd_pcm_status { + /* VIRTIO_SND_S_XXX */ + uint32_t status; + /* current device latency */ + uint32_t latency_bytes; +}; + +/******************************************************************************* + * CHANNEL MAP CONTROL MESSAGES + */ +struct virtio_snd_chmap_hdr { + /* VIRTIO_SND_R_CHMAP_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::chmaps - 1 */ + uint32_t chmap_id; +}; + +/* standard channel position definition */ +enum { + VIRTIO_SND_CHMAP_NONE = 0, /* undefined */ + VIRTIO_SND_CHMAP_NA, /* silent */ + VIRTIO_SND_CHMAP_MONO, /* mono stream */ + VIRTIO_SND_CHMAP_FL, /* front left */ + VIRTIO_SND_CHMAP_FR, /* front right */ + VIRTIO_SND_CHMAP_RL, /* rear left */ + VIRTIO_SND_CHMAP_RR, /* rear right */ + VIRTIO_SND_CHMAP_FC, /* front center */ + VIRTIO_SND_CHMAP_LFE, /* low frequency (LFE) */ + VIRTIO_SND_CHMAP_SL, /* side left */ + VIRTIO_SND_CHMAP_SR, /* side right */ + VIRTIO_SND_CHMAP_RC, /* rear center */ + VIRTIO_SND_CHMAP_FLC, /* front left center */ + VIRTIO_SND_CHMAP_FRC, /* front right center */ + VIRTIO_SND_CHMAP_RLC, /* rear left center */ + VIRTIO_SND_CHMAP_RRC, /* rear right center */ + VIRTIO_SND_CHMAP_FLW, /* front left wide */ + VIRTIO_SND_CHMAP_FRW, /* front right wide */ + VIRTIO_SND_CHMAP_FLH, /* front left high */ + VIRTIO_SND_CHMAP_FCH, /* front center high */ + VIRTIO_SND_CHMAP_FRH, /* front right high */ + VIRTIO_SND_CHMAP_TC, /* top center */ + VIRTIO_SND_CHMAP_TFL, /* top front left */ + VIRTIO_SND_CHMAP_TFR, /* top front right */ + VIRTIO_SND_CHMAP_TFC, /* top front center */ + VIRTIO_SND_CHMAP_TRL, /* top rear left */ + VIRTIO_SND_CHMAP_TRR, /* top rear right */ + VIRTIO_SND_CHMAP_TRC, /* top rear center */ + VIRTIO_SND_CHMAP_TFLC, /* top front left center */ + VIRTIO_SND_CHMAP_TFRC, /* top front right center */ + VIRTIO_SND_CHMAP_TSL, /* top side left */ + VIRTIO_SND_CHMAP_TSR, /* top side right */ + VIRTIO_SND_CHMAP_LLFE, /* left LFE */ + VIRTIO_SND_CHMAP_RLFE, /* right LFE */ + VIRTIO_SND_CHMAP_BC, /* bottom center */ + VIRTIO_SND_CHMAP_BLC, /* bottom left center */ + VIRTIO_SND_CHMAP_BRC /* bottom right center */ +}; + +/* maximum possible number of channels */ +#define VIRTIO_SND_CHMAP_MAX_SIZE 18 + +struct virtio_snd_chmap_info { + /* common header */ + struct virtio_snd_info hdr; + /* dataflow direction (VIRTIO_SND_D_XXX) */ + uint8_t direction; + /* # of valid channel position values */ + uint8_t channels; + /* channel position values (VIRTIO_SND_CHMAP_XXX) */ + uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; +}; + +#endif /* VIRTIO_SND_IF_H */ diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h index be443211ce9..3a23488e421 100644 --- a/include/standard-headers/linux/virtio_vsock.h +++ b/include/standard-headers/linux/virtio_vsock.h @@ -38,6 +38,9 @@ #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_config.h" +/* The feature bitmap for virtio vsock */ +#define VIRTIO_VSOCK_F_SEQPACKET 1 /* SOCK_SEQPACKET supported */ + struct virtio_vsock_config { uint64_t guest_cid; } QEMU_PACKED; @@ -65,6 +68,7 @@ struct virtio_vsock_hdr { enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_SEQPACKET = 2, }; enum virtio_vsock_op { @@ -91,4 +95,9 @@ enum virtio_vsock_shutdown { VIRTIO_VSOCK_SHUTDOWN_SEND = 2, }; +/* VIRTIO_VSOCK_OP_RW flags values */ +enum virtio_vsock_rw { + VIRTIO_VSOCK_SEQ_EOR = 1, +}; + #endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/standard-headers/rdma/vmw_pvrdma-abi.h b/include/standard-headers/rdma/vmw_pvrdma-abi.h index 0989426a3f5..c30182a7ae7 100644 --- a/include/standard-headers/rdma/vmw_pvrdma-abi.h +++ b/include/standard-headers/rdma/vmw_pvrdma-abi.h @@ -133,6 +133,13 @@ enum pvrdma_wc_flags { PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, }; +enum pvrdma_network_type { + PVRDMA_NETWORK_IB, + PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB, + PVRDMA_NETWORK_IPV4, + PVRDMA_NETWORK_IPV6 +}; + struct pvrdma_alloc_ucontext_resp { uint32_t qp_tab_size; uint32_t reserved; diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h index 16da2796962..70c579560ad 100644 --- a/include/sysemu/arch_init.h +++ b/include/sysemu/arch_init.h @@ -9,7 +9,6 @@ enum { QEMU_ARCH_CRIS = (1 << 2), QEMU_ARCH_I386 = (1 << 3), QEMU_ARCH_M68K = (1 << 4), - QEMU_ARCH_LM32 = (1 << 5), QEMU_ARCH_MICROBLAZE = (1 << 6), QEMU_ARCH_MIPS = (1 << 7), QEMU_ARCH_PPC = (1 << 8), @@ -18,30 +17,15 @@ enum { QEMU_ARCH_SPARC = (1 << 11), QEMU_ARCH_XTENSA = (1 << 12), QEMU_ARCH_OPENRISC = (1 << 13), - QEMU_ARCH_UNICORE32 = (1 << 14), - QEMU_ARCH_MOXIE = (1 << 15), QEMU_ARCH_TRICORE = (1 << 16), QEMU_ARCH_NIOS2 = (1 << 17), QEMU_ARCH_HPPA = (1 << 18), QEMU_ARCH_RISCV = (1 << 19), QEMU_ARCH_RX = (1 << 20), QEMU_ARCH_AVR = (1 << 21), - - QEMU_ARCH_NONE = (1 << 31), + QEMU_ARCH_HEXAGON = (1 << 22), }; extern const uint32_t arch_type; -int kvm_available(void); -int xen_available(void); - -/* default virtio transport per architecture */ -#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \ - QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \ - QEMU_ARCH_MIPS | QEMU_ARCH_PPC | \ - QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \ - QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA) -#define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X) -#define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K) - #endif diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 880e9032930..e5e1524f065 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -66,6 +66,10 @@ typedef struct BlockDevOps { * Runs when the backend's last drain request ends. */ void (*drained_end)(void *opaque); + /* + * Is the device still busy? + */ + bool (*drained_poll)(void *opaque); } BlockDevOps; /* This struct is embedded in (the private) BlockBackend struct and contains @@ -98,6 +102,7 @@ BlockBackend *blk_by_public(BlockBackendPublic *public); BlockDriverState *blk_bs(BlockBackend *blk); void blk_remove_bs(BlockBackend *blk); int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); +int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); bool bdrv_has_blk(BlockDriverState *bs); bool bdrv_is_root_node(BlockDriverState *bs); int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, @@ -121,38 +126,42 @@ BlockBackend *blk_by_dev(void *dev); BlockBackend *blk_by_qdev_id(const char *id, Error **errp); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, + int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - unsigned int bytes, + int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, + int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, - unsigned int bytes, void *buf, + int64_t bytes, void *buf, BdrvRequestFlags flags) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + assert(bytes <= SIZE_MAX); + return blk_co_preadv(blk, offset, bytes, &qiov, flags); } static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, - unsigned int bytes, void *buf, + int64_t bytes, void *buf, BdrvRequestFlags flags) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + assert(bytes <= SIZE_MAX); + return blk_co_pwritev(blk, offset, bytes, &qiov, flags); } int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int bytes, BdrvRequestFlags flags); + int64_t bytes, BdrvRequestFlags flags); BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int bytes, BdrvRequestFlags flags, + int64_t bytes, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); @@ -169,15 +178,16 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, BlockCompletionFunc *cb, void *opaque); void blk_aio_cancel(BlockAIOCB *acb); void blk_aio_cancel_async(BlockAIOCB *acb); int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque); -int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes); -int blk_co_flush(BlockBackend *blk); +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes); +int coroutine_fn blk_co_flush(BlockBackend *blk); int blk_flush(BlockBackend *blk); int blk_commit_all(void); void blk_inc_in_flight(BlockBackend *blk); @@ -204,7 +214,9 @@ void blk_eject(BlockBackend *blk, bool eject_flag); int blk_get_flags(BlockBackend *blk); uint32_t blk_get_request_alignment(BlockBackend *blk); uint32_t blk_get_max_transfer(BlockBackend *blk); +uint64_t blk_get_max_hw_transfer(BlockBackend *blk); int blk_get_max_iov(BlockBackend *blk); +int blk_get_max_hw_iov(BlockBackend *blk); void blk_set_guest_block_size(BlockBackend *blk, int align); void *blk_try_blockalign(BlockBackend *blk, size_t size); void *blk_blockalign(BlockBackend *blk, size_t size); @@ -236,12 +248,12 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int bytes, BdrvRequestFlags flags); + int64_t bytes, BdrvRequestFlags flags); int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - int bytes); + int64_t bytes); int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); +int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size); int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); @@ -262,7 +274,7 @@ void blk_unregister_buf(BlockBackend *blk, void *host); int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, BlockBackend *blk_out, int64_t off_out, - int bytes, BdrvRequestFlags read_flags, + int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); const BdrvChild *blk_root(BlockBackend *blk); diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h index 8a2fe556225..ef060a97590 100644 --- a/include/sysemu/device_tree.h +++ b/include/sysemu/device_tree.h @@ -121,6 +121,7 @@ uint32_t qemu_fdt_get_phandle(void *fdt, const char *path); uint32_t qemu_fdt_alloc_phandle(void *fdt); int qemu_fdt_nop_node(void *fdt, const char *node_path); int qemu_fdt_add_subnode(void *fdt, const char *name); +int qemu_fdt_add_path(void *fdt, const char *path); #define qemu_fdt_setprop_cells(fdt, node_path, property, ...) \ do { \ diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h index 12fb54f9902..247f0661d12 100644 --- a/include/sysemu/hax.h +++ b/include/sysemu/hax.h @@ -24,6 +24,8 @@ int hax_sync_vcpus(void); +#ifdef NEED_CPU_H + #ifdef CONFIG_HAX int hax_enabled(void); @@ -34,4 +36,6 @@ int hax_enabled(void); #endif /* CONFIG_HAX */ +#endif /* NEED_CPU_H */ + #endif /* QEMU_HAX_H */ diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 31d7343cb21..3f1af140bd3 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -65,7 +65,7 @@ struct HostMemoryBackend { uint64_t size; bool merge, dump, use_canonical_path; bool cheri_tags; - bool prealloc, is_mapped, share; + bool prealloc, is_mapped, share, reserve; uint32_t prealloc_threads; DECLARE_BITMAP(host_nodes, MAX_NODES + 1); HostMemPolicy policy; diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index c98636bc812..bb70082e458 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -16,6 +16,8 @@ #include "qemu/accel.h" #include "qom/object.h" +#ifdef NEED_CPU_H + #ifdef CONFIG_HVF uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); @@ -26,6 +28,8 @@ extern bool hvf_allowed; #define hvf_get_supported_cpuid(func, idx, reg) 0 #endif /* !CONFIG_HVF */ +#endif /* NEED_CPU_H */ + #define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf") typedef struct HVFState HVFState; diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h new file mode 100644 index 00000000000..6545f7cd613 --- /dev/null +++ b/include/sysemu/hvf_int.h @@ -0,0 +1,68 @@ +/* + * QEMU Hypervisor.framework (HVF) support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in HVF-specific code */ + +#ifndef HVF_INT_H +#define HVF_INT_H + +#ifdef __aarch64__ +#include +#else +#include +#endif + +/* hvf_slot flags */ +#define HVF_SLOT_LOG (1 << 0) + +typedef struct hvf_slot { + uint64_t start; + uint64_t size; + uint8_t *mem; + int slot_id; + uint32_t flags; + MemoryRegion *region; +} hvf_slot; + +typedef struct hvf_vcpu_caps { + uint64_t vmx_cap_pinbased; + uint64_t vmx_cap_procbased; + uint64_t vmx_cap_procbased2; + uint64_t vmx_cap_entry; + uint64_t vmx_cap_exit; + uint64_t vmx_cap_preemption_timer; +} hvf_vcpu_caps; + +struct HVFState { + AccelState parent; + hvf_slot slots[32]; + int num_slots; + + hvf_vcpu_caps *hvf_caps; + uint64_t vtimer_offset; +}; +extern HVFState *hvf_state; + +struct hvf_vcpu_state { + uint64_t fd; + void *exit; + bool vtimer_masked; + sigset_t unblock_ipi_mask; +}; + +void assert_hvf_ok(hv_return_t ret); +int hvf_arch_init(void); +int hvf_arch_init_vcpu(CPUState *cpu); +void hvf_arch_vcpu_destroy(CPUState *cpu); +int hvf_vcpu_exec(CPUState *); +hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); +int hvf_put_registers(CPUState *); +int hvf_get_registers(CPUState *); +void hvf_kick_vcpu_thread(CPUState *cpu); + +#endif diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h index 61672f9b322..01b5ebf442a 100644 --- a/include/sysemu/hw_accel.h +++ b/include/sysemu/hw_accel.h @@ -16,6 +16,7 @@ #include "sysemu/kvm.h" #include "sysemu/hvf.h" #include "sysemu/whpx.h" +#include "sysemu/nvmm.h" void cpu_synchronize_state(CPUState *cpu); void cpu_synchronize_post_reset(CPUState *cpu); diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h index f177142f16c..7f714bd1368 100644 --- a/include/sysemu/iothread.h +++ b/include/sysemu/iothread.h @@ -37,6 +37,9 @@ struct IOThread { int64_t poll_max_ns; int64_t poll_grow; int64_t poll_shrink; + + /* AioContext AIO engine parameters */ + int64_t aio_max_batch; }; typedef struct IOThread IOThread; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index a1ab1ee12d3..7b22aeb6ae1 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -547,4 +547,5 @@ bool kvm_cpu_check_are_resettable(void); bool kvm_arch_cpu_check_are_resettable(void); +bool kvm_dirty_ring_enabled(void); #endif diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index ccb8869f01b..1f5487d9b74 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -23,18 +23,21 @@ typedef struct KVMSlot int old_flags; /* Dirty bitmap cache for the slot */ unsigned long *dirty_bmap; + unsigned long dirty_bmap_size; + /* Cache of the address space ID */ + int as_id; + /* Cache of the offset in ram address space */ + ram_addr_t ram_start_offset; } KVMSlot; typedef struct KVMMemoryListener { MemoryListener listener; - /* Protects the slots and all inside them */ - QemuMutex slots_lock; KVMSlot *slots; int as_id; } KVMMemoryListener; void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, - AddressSpace *as, int as_id); + AddressSpace *as, int as_id, const char *name); void kvm_set_max_memslot_size(hwaddr max_slot_size); diff --git a/include/sysemu/nvmm.h b/include/sysemu/nvmm.h new file mode 100644 index 00000000000..833670fccbe --- /dev/null +++ b/include/sysemu/nvmm.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved. + * + * NetBSD Virtual Machine Monitor (NVMM) accelerator support. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_NVMM_H +#define QEMU_NVMM_H + +#ifdef NEED_CPU_H + +#ifdef CONFIG_NVMM + +int nvmm_enabled(void); + +#else /* CONFIG_NVMM */ + +#define nvmm_enabled() (0) + +#endif /* CONFIG_NVMM */ + +#endif /* NEED_CPU_H */ + +#endif /* QEMU_NVMM_H */ diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 629c8c648b7..2edf33658a4 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -38,6 +38,10 @@ #include #endif +#ifdef __cplusplus +extern "C" { +#endif + void os_set_line_buffering(void); void os_set_proc_name(const char *s); void os_setup_signal_handling(void); @@ -92,4 +96,8 @@ static inline void qemu_funlockfile(FILE *f) funlockfile(f); } +#ifdef __cplusplus +} +#endif + #endif diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index 5346d51e890..43f569b5c21 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -30,6 +30,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + #if defined(_WIN64) /* On w64, setjmp is implemented by _setjmp which needs a second parameter. * If this parameter is NULL, longjump does no stack unwinding. @@ -194,4 +198,8 @@ ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags); ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, struct sockaddr *addr, socklen_t *addrlen); +#ifdef __cplusplus +} +#endif + #endif diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h deleted file mode 100644 index 94d821d737c..00000000000 --- a/include/sysemu/sev.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * QEMU Secure Encrypted Virutualization (SEV) support - * - * Copyright: Advanced Micro Devices, 2016-2018 - * - * Authors: - * Brijesh Singh - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef QEMU_SEV_H -#define QEMU_SEV_H - -#include "sysemu/kvm.h" - -bool sev_enabled(void); -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); -int sev_inject_launch_secret(const char *hdr, const char *secret, - uint64_t gpa, Error **errp); - -int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); -void sev_es_set_reset_vector(CPUState *cpu); - -#endif diff --git a/include/sysemu/tcg.h b/include/sysemu/tcg.h index 00349fb18a7..53352450ff6 100644 --- a/include/sysemu/tcg.h +++ b/include/sysemu/tcg.h @@ -8,8 +8,6 @@ #ifndef SYSEMU_TCG_H #define SYSEMU_TCG_H -void tcg_exec_init(unsigned long tb_size, int splitwx); - #ifdef CONFIG_TCG extern bool tcg_allowed; #define tcg_enabled() (tcg_allowed) diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h index 1a85564e479..68b2206463c 100644 --- a/include/sysemu/tpm.h +++ b/include/sysemu/tpm.h @@ -15,6 +15,8 @@ #include "qapi/qapi-types-tpm.h" #include "qom/object.h" +#ifdef CONFIG_TPM + int tpm_config_parse(QemuOptsList *opts_list, const char *optarg); int tpm_init(void); void tpm_cleanup(void); @@ -73,4 +75,11 @@ static inline TPMVersion tpm_get_version(TPMIf *ti) return TPM_IF_GET_CLASS(ti)->get_version(ti); } +#else /* CONFIG_TPM */ + +#define tpm_init() (0) +#define tpm_cleanup() + +#endif /* CONFIG_TPM */ + #endif /* QEMU_TPM_H */ diff --git a/include/sysemu/tpm_backend.h b/include/sysemu/tpm_backend.h index 6f078f5f482..8fd3269c117 100644 --- a/include/sysemu/tpm_backend.h +++ b/include/sysemu/tpm_backend.h @@ -18,6 +18,8 @@ #include "sysemu/tpm.h" #include "qapi/error.h" +#ifdef CONFIG_TPM + #define TYPE_TPM_BACKEND "tpm-backend" OBJECT_DECLARE_TYPE(TPMBackend, TPMBackendClass, TPM_BACKEND) @@ -209,4 +211,6 @@ TPMInfo *tpm_backend_query_tpm(TPMBackend *s); TPMBackend *qemu_find_tpm_be(const char *id); -#endif +#endif /* CONFIG_TPM */ + +#endif /* TPM_BACKEND_H */ diff --git a/include/sysemu/watchdog.h b/include/sysemu/watchdog.h index a08d16380d7..d2d4901dbbe 100644 --- a/include/sysemu/watchdog.h +++ b/include/sysemu/watchdog.h @@ -37,7 +37,6 @@ typedef struct WatchdogTimerModel WatchdogTimerModel; /* in hw/watchdog.c */ int select_watchdog(const char *p); -int select_watchdog_action(const char *action); WatchdogAction get_watchdog_action(void); void watchdog_add_model(WatchdogTimerModel *model); void watchdog_perform_action(void); diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h index 8ca1c1c4ac7..2889fa2278b 100644 --- a/include/sysemu/whpx.h +++ b/include/sysemu/whpx.h @@ -13,6 +13,8 @@ #ifndef QEMU_WHPX_H #define QEMU_WHPX_H +#ifdef NEED_CPU_H + #ifdef CONFIG_WHPX int whpx_enabled(void); @@ -25,4 +27,6 @@ bool whpx_apic_in_platform(void); #endif /* CONFIG_WHPX */ +#endif /* NEED_CPU_H */ + #endif /* QEMU_WHPX_H */ diff --git a/include/tcg/tcg-cond.h b/include/tcg/tcg-cond.h new file mode 100644 index 00000000000..3819aa2d0ed --- /dev/null +++ b/include/tcg/tcg-cond.h @@ -0,0 +1,133 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_COND_H +#define TCG_COND_H + +/* + * Conditions. Note that these are laid out for easy manipulation by + * the functions below: + * bit 0 is used for inverting; + * bit 1 is signed, + * bit 2 is unsigned, + * bit 3 is used with bit 0 for swapping signed/unsigned. + */ +typedef enum { + /* non-signed */ + TCG_COND_NEVER = 0 | 0 | 0 | 0, + TCG_COND_ALWAYS = 0 | 0 | 0 | 1, + TCG_COND_EQ = 8 | 0 | 0 | 0, + TCG_COND_NE = 8 | 0 | 0 | 1, + /* signed */ + TCG_COND_LT = 0 | 0 | 2 | 0, + TCG_COND_GE = 0 | 0 | 2 | 1, + TCG_COND_LE = 8 | 0 | 2 | 0, + TCG_COND_GT = 8 | 0 | 2 | 1, + /* unsigned */ + TCG_COND_LTU = 0 | 4 | 0 | 0, + TCG_COND_GEU = 0 | 4 | 0 | 1, + TCG_COND_LEU = 8 | 4 | 0 | 0, + TCG_COND_GTU = 8 | 4 | 0 | 1, +} TCGCond; + +/* Invert the sense of the comparison. */ +static inline TCGCond tcg_invert_cond(TCGCond c) +{ + return (TCGCond)(c ^ 1); +} + +/* Swap the operands in a comparison. */ +static inline TCGCond tcg_swap_cond(TCGCond c) +{ + return c & 6 ? (TCGCond)(c ^ 9) : c; +} + +/* Create an "unsigned" version of a "signed" comparison. */ +static inline TCGCond tcg_unsigned_cond(TCGCond c) +{ + return c & 2 ? (TCGCond)(c ^ 6) : c; +} + +/* Create a "signed" version of an "unsigned" comparison. */ +static inline TCGCond tcg_signed_cond(TCGCond c) +{ + return c & 4 ? (TCGCond)(c ^ 6) : c; +} + +/* Must a comparison be considered unsigned? */ +static inline bool is_unsigned_cond(TCGCond c) +{ + return (c & 4) != 0; +} + +/* + * Create a "high" version of a double-word comparison. + * This removes equality from a LTE or GTE comparison. + */ +static inline TCGCond tcg_high_cond(TCGCond c) +{ + switch (c) { + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GEU: + case TCG_COND_LEU: + return (TCGCond)(c ^ 8); + default: + return c; + } +} + +static inline const char *tcg_cond_string(TCGCond c) +{ + switch (c) { + case TCG_COND_NEVER: + return "NEVER"; + case TCG_COND_ALWAYS: + return "ALWAYS"; + case TCG_COND_EQ: + return "=="; + case TCG_COND_NE: + return "!="; + case TCG_COND_LT: + return "<_s"; + case TCG_COND_LTU: + return "<_u"; + case TCG_COND_GE: + return ">=_s"; + case TCG_COND_GEU: + return ">=_u"; + case TCG_COND_LE: + return "<=_s"; + case TCG_COND_LEU: + return "<=_u"; + case TCG_COND_GT: + return ">_s"; + case TCG_COND_GTU: + return ">_u"; + default: + return "?"; + } +} + +#endif /* TCG_COND_H */ diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h new file mode 100644 index 00000000000..bf40942de4a --- /dev/null +++ b/include/tcg/tcg-ldst.h @@ -0,0 +1,79 @@ +/* + * Memory helpers that will be used by TCG generated code. + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_LDST_H +#define TCG_LDST_H 1 + +#ifdef CONFIG_SOFTMMU + +/* Value zero-extended to tcg register size. */ +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); + +/* Value sign-extended to tcg register size. */ +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, + MemOpIdx oi, uintptr_t retaddr); + +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + MemOpIdx oi, uintptr_t retaddr); +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t retaddr); +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t retaddr); +void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr); +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t retaddr); +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t retaddr); +void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t retaddr); + +#else + +void QEMU_NORETURN helper_unaligned_ld(CPUArchState *env, target_ulong addr); +void QEMU_NORETURN helper_unaligned_st(CPUArchState *env, target_ulong addr); + +#endif /* CONFIG_SOFTMMU */ +#endif /* TCG_LDST_H */ diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index c69a7de984b..da55fed8704 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -401,4 +401,47 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); +/* 32-bit vector operations. */ +void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); + +void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); + +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); + +#if TARGET_LONG_BITS == 64 +#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i64 +#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i64 +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64 +#define tcg_gen_vec_add32_tl tcg_gen_vec_add32_i64 +#define tcg_gen_vec_sub32_tl tcg_gen_vec_sub32_i64 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64 +#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64 +#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64 +#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64 + +#else +#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i32 +#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i32 +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32 +#define tcg_gen_vec_add32_tl tcg_gen_add_i32 +#define tcg_gen_vec_sub32_tl tcg_gen_sub_i32 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32 +#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32 +#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32 +#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32 +#endif + #endif diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index e3a2a45ec58..3c790c96659 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -330,7 +330,7 @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); -void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags); void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); @@ -338,6 +338,9 @@ void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_abs_i32(TCGv_i32, TCGv_i32); +/* Replicate a value of size @vece from @in to all the lanes in @out */ +void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in); + static inline void tcg_gen_discard_i32(TCGv_i32 arg) { tcg_gen_op1_i32(INDEX_op_discard, arg); @@ -530,8 +533,8 @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg); -void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg); -void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags); +void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags); void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); @@ -539,6 +542,9 @@ void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_abs_i64(TCGv_i64, TCGv_i64); +/* Replicate a value of size @vece from @in to all the lanes in @out */ +void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in); + #if TCG_TARGET_REG_BITS == 64 static inline void tcg_gen_discard_i64(TCGv_i64 arg) { @@ -848,7 +854,6 @@ static inline void tcg_gen_plugin_cb_end(void) #if TARGET_LONG_BITS == 32 #define tcg_temp_new() tcg_temp_new_i32() -#define tcg_global_reg_new tcg_global_reg_new_i32 #define tcg_global_mem_new tcg_global_mem_new_i32 #define tcg_temp_local_new() tcg_temp_local_new_i32() #define tcg_temp_free tcg_temp_free_i32 @@ -860,7 +865,6 @@ static inline void tcg_gen_plugin_cb_end(void) #define tcg_gen_qemu_st_tl_with_checked_addr tcg_gen_qemu_st_i32_with_checked_addr #else #define tcg_temp_new() tcg_temp_new_i64() -#define tcg_global_reg_new tcg_global_reg_new_i64 #define tcg_global_mem_new tcg_global_mem_new_i64 #define tcg_temp_local_new() tcg_temp_local_new_i64() #define tcg_temp_free tcg_temp_free_i64 @@ -1135,6 +1139,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_sextract_tl tcg_gen_sextract_i64 #define tcg_gen_extract2_tl tcg_gen_extract2_i64 #define tcg_const_tl tcg_const_i64 +#define tcg_constant_tl tcg_constant_i64 #define tcg_const_local_tl tcg_const_local_i64 #define tcg_gen_movcond_tl tcg_gen_movcond_i64 #define tcg_gen_add2_tl tcg_gen_add2_i64 @@ -1168,6 +1173,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64 #define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64 #define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec +#define tcg_gen_dup_tl tcg_gen_dup_i64 #else #define tcg_gen_movi_tl tcg_gen_movi_i32 #define tcg_gen_mov_tl tcg_gen_mov_i32 @@ -1227,7 +1233,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_ext32u_tl tcg_gen_mov_i32 #define tcg_gen_ext32s_tl tcg_gen_mov_i32 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 -#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S) #define tcg_gen_bswap_tl tcg_gen_bswap32_i32 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32 @@ -1252,6 +1258,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_sextract_tl tcg_gen_sextract_i32 #define tcg_gen_extract2_tl tcg_gen_extract2_i32 #define tcg_const_tl tcg_const_i32 +#define tcg_constant_tl tcg_constant_i32 #define tcg_const_local_tl tcg_const_local_i32 #define tcg_gen_movcond_tl tcg_gen_movcond_i32 #define tcg_gen_add2_tl tcg_gen_add2_i32 @@ -1285,6 +1292,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32 #define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32 #define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec +#define tcg_gen_dup_tl tcg_gen_dup_i32 #endif static inline void tcg_gen_mov_cap_checked(TCGv_cap_checked_ptr ret, diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 14ece2c1993..71d3703867a 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -97,8 +97,8 @@ DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32)) DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32)) DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32)) -DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32)) -DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32)) +DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32)) +DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32)) DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32)) DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32)) DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32)) @@ -166,9 +166,9 @@ DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64)) DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64)) DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64)) -DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) -DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) -DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) +DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) +DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) +DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64)) DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64)) DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64)) @@ -195,8 +195,7 @@ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) -DEF(goto_ptr, 0, 1, 0, - TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) +DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT) DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT) @@ -278,8 +277,8 @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT) #ifdef TCG_TARGET_INTERPRETER /* These opcodes are only for use between the tci generator and interpreter. */ -DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT) -DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) +DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) #endif #undef TLADDR_ARGS diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 685e71cd371..4c16a3331a4 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -27,13 +27,13 @@ #include "cpu.h" #include "exec/memop.h" -#include "exec/tb-context.h" +#include "exec/memopidx.h" #include "qemu/bitops.h" #include "qemu/plugin.h" #include "qemu/queue.h" #include "tcg/tcg-mo.h" #include "tcg-target.h" -#include "qemu/int128.h" +#include "tcg/tcg-cond.h" /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 @@ -53,6 +53,7 @@ #define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) #define CPU_TEMP_BUF_NLONGS 128 +#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) /* Default target word size to pointer size. */ #ifndef TCG_TARGET_REG_BITS @@ -414,105 +415,17 @@ typedef struct TCGv_cap_checked_ptr_tl_d *TCGv_cap_checked_ptr; /* Used to align parameters. See the comment before tcgv_i32_temp. */ #define TCG_CALL_DUMMY_ARG ((TCGArg)0) -/* Conditions. Note that these are laid out for easy manipulation by - the functions below: - bit 0 is used for inverting; - bit 1 is signed, - bit 2 is unsigned, - bit 3 is used with bit 0 for swapping signed/unsigned. */ -typedef enum { - /* non-signed */ - TCG_COND_NEVER = 0 | 0 | 0 | 0, - TCG_COND_ALWAYS = 0 | 0 | 0 | 1, - TCG_COND_EQ = 8 | 0 | 0 | 0, - TCG_COND_NE = 8 | 0 | 0 | 1, - /* signed */ - TCG_COND_LT = 0 | 0 | 2 | 0, - TCG_COND_GE = 0 | 0 | 2 | 1, - TCG_COND_LE = 8 | 0 | 2 | 0, - TCG_COND_GT = 8 | 0 | 2 | 1, - /* unsigned */ - TCG_COND_LTU = 0 | 4 | 0 | 0, - TCG_COND_GEU = 0 | 4 | 0 | 1, - TCG_COND_LEU = 8 | 4 | 0 | 0, - TCG_COND_GTU = 8 | 4 | 0 | 1, -} TCGCond; - -static inline const char *tcg_cond_string(TCGCond c) -{ - switch (c) { - case TCG_COND_NEVER: - return "NEVER"; - case TCG_COND_ALWAYS: - return "ALWAYS"; - case TCG_COND_EQ: - return "=="; - case TCG_COND_NE: - return "!="; - case TCG_COND_LT: - return "<_s"; - case TCG_COND_LTU: - return "<_u"; - case TCG_COND_GE: - return ">=_s"; - case TCG_COND_GEU: - return ">=_u"; - case TCG_COND_LE: - return "<=_s"; - case TCG_COND_LEU: - return "<=_u"; - case TCG_COND_GT: - return ">_s"; - case TCG_COND_GTU: - return ">_u"; - default: - return "?"; - } -} -/* Invert the sense of the comparison. */ -static inline TCGCond tcg_invert_cond(TCGCond c) -{ - return (TCGCond)(c ^ 1); -} - -/* Swap the operands in a comparison. */ -static inline TCGCond tcg_swap_cond(TCGCond c) -{ - return c & 6 ? (TCGCond)(c ^ 9) : c; -} - -/* Create an "unsigned" version of a "signed" comparison. */ -static inline TCGCond tcg_unsigned_cond(TCGCond c) -{ - return c & 2 ? (TCGCond)(c ^ 6) : c; -} - -/* Create a "signed" version of an "unsigned" comparison. */ -static inline TCGCond tcg_signed_cond(TCGCond c) -{ - return c & 4 ? (TCGCond)(c ^ 6) : c; -} - -/* Must a comparison be considered unsigned? */ -static inline bool is_unsigned_cond(TCGCond c) -{ - return (c & 4) != 0; -} - -/* Create a "high" version of a double-word comparison. - This removes equality from a LTE or GTE comparison. */ -static inline TCGCond tcg_high_cond(TCGCond c) -{ - switch (c) { - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GEU: - case TCG_COND_LEU: - return (TCGCond)(c ^ 8); - default: - return c; - } -} +/* + * Flags for the bswap opcodes. + * If IZ, the input is zero-extended, otherwise unknown. + * If OZ or OS, the output is zero- or sign-extended respectively, + * otherwise the high bits are undefined. + */ +enum { + TCG_BSWAP_IZ = 1, + TCG_BSWAP_OZ = 2, + TCG_BSWAP_OS = 4, +}; typedef enum TCGTempVal { TEMP_VAL_DEAD, @@ -586,9 +499,6 @@ typedef struct TCGOp { /* Next and previous opcodes. */ QTAILQ_ENTRY(TCGOp) link; -#ifdef CONFIG_PLUGIN - QSIMPLEQ_ENTRY(TCGOp) plugin_link; -#endif /* Arguments for the opcode. */ TCGArg args[MAX_OPC_PARAM]; @@ -673,8 +583,6 @@ struct TCGContext { /* Threshold to flush the translated code buffer. */ void *code_gen_highwater; - size_t tb_phys_invalidate_count; - /* Track which vCPU triggers events */ CPUState *cpu; /* *_trans */ @@ -700,9 +608,6 @@ struct TCGContext { /* descriptor of the instruction being translated */ struct qemu_plugin_insn *plugin_insn; - - /* list to quickly access the injected ops */ - QSIMPLEQ_HEAD(, TCGOp) plugin_ops; #endif GHashTable *const_table[TCG_TYPE_COUNT]; @@ -728,7 +633,6 @@ static inline bool temp_readonly(TCGTemp *ts) return ts->kind >= TEMP_FIXED; } -extern TCGContext tcg_init_ctx; extern __thread TCGContext *tcg_ctx; extern const void *tcg_code_gen_epilogue; extern uintptr_t tcg_splitwx_diff; @@ -743,16 +647,7 @@ extern TCGv _pc_is_current; extern TCGv_i32 cpu_rvfi_available_fields; #endif -static inline bool in_code_gen_buffer(const void *p) -{ - const TCGContext *s = &tcg_init_ctx; - /* - * Much like it is valid to have a pointer to the byte past the - * end of an array (so long as you don't dereference it), allow - * a pointer to the byte past the end of the code gen buffer. - */ - return (size_t)(p - s->code_gen_buffer) <= s->code_gen_buffer_size; -} +bool in_code_gen_buffer(const void *p); #ifdef CONFIG_DEBUG_TCG const void *tcg_splitwx_to_rx(void *rw); @@ -926,8 +821,6 @@ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); TranslationBlock *tcg_tb_alloc(TCGContext *s); -void tcg_region_init(void); -void tb_destroy(TranslationBlock *tb); void tcg_region_reset_all(void); size_t tcg_code_size(void); @@ -935,7 +828,6 @@ size_t tcg_code_capacity(void); void tcg_tb_insert(TranslationBlock *tb); void tcg_tb_remove(TranslationBlock *tb); -size_t tcg_tb_phys_invalidate_count(void); TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr); void tcg_tb_foreach(GTraverseFunc func, gpointer user_data); size_t tcg_nb_tbs(void); @@ -959,7 +851,7 @@ static inline void *tcg_malloc(int size) } } -void tcg_context_init(TCGContext *s); +void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus); void tcg_register_thread(void); void tcg_prologue_init(TCGContext *s); void tcg_func_start(TCGContext *s); @@ -1066,8 +958,8 @@ int tcg_check_temp_count(void); #endif int64_t tcg_cpu_exec_time(void); -void tcg_dump_info(void); -void tcg_dump_op_count(void); +void tcg_dump_info(GString *buf); +void tcg_dump_op_count(GString *buf); #define TCG_CT_CONST 1 /* any constant of register size */ @@ -1135,6 +1027,16 @@ void tcg_op_remove(TCGContext *s, TCGOp *op); TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc); TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); +/** + * tcg_remove_ops_after: + * @op: target operation + * + * Discard any opcodes emitted since @op. Expected usage is to save + * a starting point with tcg_last_op(), speculatively emit opcodes, + * then decide whether or not to keep those opcodes after the fact. + */ +void tcg_remove_ops_after(TCGOp *op); + void tcg_optimize(TCGContext *s); /* Allocate a new temporary and initialize it with a constant. */ @@ -1149,7 +1051,8 @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec); /* * Locate or create a read-only temporary that is a constant. - * This kind of temporary need not and should not be freed. + * This kind of temporary need not be freed, but for convenience + * will be silently ignored by tcg_temp_free_*. */ TCGTemp *tcg_constant_internal(TCGType type, int64_t val); @@ -1259,44 +1162,6 @@ static inline size_t tcg_current_code_size(TCGContext *s) return tcg_ptr_byte_diff(s->code_ptr, s->code_buf); } -/* Combine the MemOp and mmu_idx parameters into a single value. */ -typedef uint32_t TCGMemOpIdx; - -/** - * make_memop_idx - * @op: memory operation - * @idx: mmu index - * - * Encode these values into a single parameter. - */ -static inline TCGMemOpIdx make_memop_idx(MemOp op, unsigned idx) -{ - tcg_debug_assert(idx <= 15); - return (op << 4) | idx; -} - -/** - * get_memop - * @oi: combined op/idx parameter - * - * Extract the memory operation from the combined value. - */ -static inline MemOp get_memop(TCGMemOpIdx oi) -{ - return oi >> 4; -} - -/** - * get_mmuidx - * @oi: combined op/idx parameter - * - * Extract the mmu index from the combined value. - */ -static inline unsigned get_mmuidx(TCGMemOpIdx oi) -{ - return oi & 15; -} - /** * tcg_qemu_tb_exec: * @env: pointer to CPUArchState for the CPU @@ -1384,172 +1249,18 @@ uint64_t dup_const(unsigned vece, uint64_t c); : (qemu_build_not_reached_always(), 0)) \ : dup_const(VECE, C)) - -/* - * Memory helpers that will be used by TCG generated code. - */ -#ifdef CONFIG_SOFTMMU -/* Value zero-extended to tcg register size. */ -tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); - -/* Value sign-extended to tcg register size. */ -tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); - -void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - TCGMemOpIdx oi, uintptr_t retaddr); - -/* Temporary aliases until backends are converted. */ -#ifdef TARGET_WORDS_BIGENDIAN -# define helper_ret_ldsw_mmu helper_be_ldsw_mmu -# define helper_ret_lduw_mmu helper_be_lduw_mmu -# define helper_ret_ldsl_mmu helper_be_ldsl_mmu -# define helper_ret_ldul_mmu helper_be_ldul_mmu -# define helper_ret_ldl_mmu helper_be_ldul_mmu -# define helper_ret_ldq_mmu helper_be_ldq_mmu -# define helper_ret_stw_mmu helper_be_stw_mmu -# define helper_ret_stl_mmu helper_be_stl_mmu -# define helper_ret_stq_mmu helper_be_stq_mmu -#else -# define helper_ret_ldsw_mmu helper_le_ldsw_mmu -# define helper_ret_lduw_mmu helper_le_lduw_mmu -# define helper_ret_ldsl_mmu helper_le_ldsl_mmu -# define helper_ret_ldul_mmu helper_le_ldul_mmu -# define helper_ret_ldl_mmu helper_le_ldul_mmu -# define helper_ret_ldq_mmu helper_le_ldq_mmu -# define helper_ret_stw_mmu helper_le_stw_mmu -# define helper_ret_stl_mmu helper_le_stl_mmu -# define helper_ret_stq_mmu helper_le_stq_mmu -#endif - -uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr, - uint32_t cmpv, uint32_t newv, - TCGMemOpIdx oi, uintptr_t retaddr); -uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr, - uint32_t cmpv, uint32_t newv, - TCGMemOpIdx oi, uintptr_t retaddr); -uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr, - uint32_t cmpv, uint32_t newv, - TCGMemOpIdx oi, uintptr_t retaddr); -uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr, - uint64_t cmpv, uint64_t newv, - TCGMemOpIdx oi, uintptr_t retaddr); -uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr, - uint32_t cmpv, uint32_t newv, - TCGMemOpIdx oi, uintptr_t retaddr); -uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr, - uint32_t cmpv, uint32_t newv, - TCGMemOpIdx oi, uintptr_t retaddr); -uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr, - uint64_t cmpv, uint64_t newv, - TCGMemOpIdx oi, uintptr_t retaddr); - -#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX) \ -TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu \ - (CPUArchState *env, target_ulong addr, TYPE val, \ - TCGMemOpIdx oi, uintptr_t retaddr); - -#ifdef CONFIG_ATOMIC64 -#define GEN_ATOMIC_HELPER_ALL(NAME) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) \ - GEN_ATOMIC_HELPER(NAME, uint64_t, q_le) \ - GEN_ATOMIC_HELPER(NAME, uint64_t, q_be) +#if TARGET_LONG_BITS == 64 +# define dup_const_tl dup_const #else -#define GEN_ATOMIC_HELPER_ALL(NAME) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, b) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \ - GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) +# define dup_const_tl(VECE, C) \ + (__builtin_constant_p(VECE) \ + ? ( (VECE) == MO_8 ? 0x01010101ul * (uint8_t)(C) \ + : (VECE) == MO_16 ? 0x00010001ul * (uint16_t)(C) \ + : (VECE) == MO_32 ? 0x00000001ul * (uint32_t)(C) \ + : (qemu_build_not_reached_always(), 0)) \ + : (target_long)dup_const(VECE, C)) #endif -GEN_ATOMIC_HELPER_ALL(fetch_add) -GEN_ATOMIC_HELPER_ALL(fetch_sub) -GEN_ATOMIC_HELPER_ALL(fetch_and) -GEN_ATOMIC_HELPER_ALL(fetch_or) -GEN_ATOMIC_HELPER_ALL(fetch_xor) -GEN_ATOMIC_HELPER_ALL(fetch_smin) -GEN_ATOMIC_HELPER_ALL(fetch_umin) -GEN_ATOMIC_HELPER_ALL(fetch_smax) -GEN_ATOMIC_HELPER_ALL(fetch_umax) - -GEN_ATOMIC_HELPER_ALL(add_fetch) -GEN_ATOMIC_HELPER_ALL(sub_fetch) -GEN_ATOMIC_HELPER_ALL(and_fetch) -GEN_ATOMIC_HELPER_ALL(or_fetch) -GEN_ATOMIC_HELPER_ALL(xor_fetch) -GEN_ATOMIC_HELPER_ALL(smin_fetch) -GEN_ATOMIC_HELPER_ALL(umin_fetch) -GEN_ATOMIC_HELPER_ALL(smax_fetch) -GEN_ATOMIC_HELPER_ALL(umax_fetch) - -GEN_ATOMIC_HELPER_ALL(xchg) - -#undef GEN_ATOMIC_HELPER_ALL -#undef GEN_ATOMIC_HELPER -#endif /* CONFIG_SOFTMMU */ - -/* - * These aren't really a "proper" helpers because TCG cannot manage Int128. - * However, use the same format as the others, for use by the backends. - * - * The cmpxchg functions are only defined if HAVE_CMPXCHG128; - * the ld/st functions are only defined if HAVE_ATOMIC128, - * as defined by . - */ -Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, - Int128 cmpv, Int128 newv, - TCGMemOpIdx oi, uintptr_t retaddr); -Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, - Int128 cmpv, Int128 newv, - TCGMemOpIdx oi, uintptr_t retaddr); - -Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, - TCGMemOpIdx oi, uintptr_t retaddr); -void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, - TCGMemOpIdx oi, uintptr_t retaddr); - #ifdef CONFIG_DEBUG_TCG void tcg_assert_listed_vecop(TCGOpcode); #else diff --git a/include/ui/clipboard.h b/include/ui/clipboard.h new file mode 100644 index 00000000000..6298986b15c --- /dev/null +++ b/include/ui/clipboard.h @@ -0,0 +1,226 @@ +#ifndef QEMU_CLIPBOARD_H +#define QEMU_CLIPBOARD_H + +#include "qemu/notify.h" + +/** + * DOC: Introduction + * + * The header ``ui/clipboard.h`` declares the qemu clipboard interface. + * + * All qemu elements which want use the clipboard can register as + * clipboard peer. Subsequently they can set the clipboard content + * and get notifications for clipboard updates. + * + * Typical users are user interfaces (gtk), remote access protocols + * (vnc) and devices talking to the guest (vdagent). + * + * Even though the design allows different data types only plain text + * is supported for now. + */ + +typedef enum QemuClipboardType QemuClipboardType; +typedef enum QemuClipboardSelection QemuClipboardSelection; +typedef struct QemuClipboardPeer QemuClipboardPeer; +typedef struct QemuClipboardInfo QemuClipboardInfo; + +/** + * enum QemuClipboardType + * + * @QEMU_CLIPBOARD_TYPE_TEXT: text/plain; charset=utf-8 + * @QEMU_CLIPBOARD_TYPE__COUNT: type count. + */ +enum QemuClipboardType { + QEMU_CLIPBOARD_TYPE_TEXT, + QEMU_CLIPBOARD_TYPE__COUNT, +}; + +/* same as VD_AGENT_CLIPBOARD_SELECTION_* */ +/** + * enum QemuClipboardSelection + * + * @QEMU_CLIPBOARD_SELECTION_CLIPBOARD: clipboard (explitcit cut+paste). + * @QEMU_CLIPBOARD_SELECTION_PRIMARY: primary selection (select + middle mouse button). + * @QEMU_CLIPBOARD_SELECTION_SECONDARY: secondary selection (dunno). + * @QEMU_CLIPBOARD_SELECTION__COUNT: selection count. + */ +enum QemuClipboardSelection { + QEMU_CLIPBOARD_SELECTION_CLIPBOARD, + QEMU_CLIPBOARD_SELECTION_PRIMARY, + QEMU_CLIPBOARD_SELECTION_SECONDARY, + QEMU_CLIPBOARD_SELECTION__COUNT, +}; + +/** + * struct QemuClipboardPeer + * + * @name: peer name. + * @update: notifier for clipboard updates. + * @request: callback for clipboard data requests. + * + * Clipboard peer description. + */ +struct QemuClipboardPeer { + const char *name; + Notifier update; + void (*request)(QemuClipboardInfo *info, + QemuClipboardType type); +}; + +/** + * struct QemuClipboardInfo + * + * @refcount: reference counter. + * @owner: clipboard owner. + * @selection: clipboard selection. + * @types: clipboard data array (one entry per type). + * + * Clipboard content data and metadata. + */ +struct QemuClipboardInfo { + uint32_t refcount; + QemuClipboardPeer *owner; + QemuClipboardSelection selection; + struct { + bool available; + bool requested; + size_t size; + void *data; + } types[QEMU_CLIPBOARD_TYPE__COUNT]; +}; + +/** + * qemu_clipboard_peer_register + * + * @peer: peer information. + * + * Register clipboard peer. Registering is needed for both active + * (set+grab clipboard) and passive (watch clipboard for updates) + * interaction with the qemu clipboard. + */ +void qemu_clipboard_peer_register(QemuClipboardPeer *peer); + +/** + * qemu_clipboard_peer_unregister + * + * @peer: peer information. + * + * Unregister clipboard peer. + */ +void qemu_clipboard_peer_unregister(QemuClipboardPeer *peer); + +/** + * qemu_clipboard_peer_owns + * + * @peer: peer information. + * @selection: clipboard selection. + * + * Return TRUE if the peer owns the clipboard. + */ +bool qemu_clipboard_peer_owns(QemuClipboardPeer *peer, + QemuClipboardSelection selection); + +/** + * qemu_clipboard_peer_release + * + * @peer: peer information. + * @selection: clipboard selection. + * + * If the peer owns the clipboard, release it. + */ +void qemu_clipboard_peer_release(QemuClipboardPeer *peer, + QemuClipboardSelection selection); + +/** + * qemu_clipboard_info + * + * @selection: clipboard selection. + * + * Return the current clipboard data & owner informations. + */ +QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection); + +/** + * qemu_clipboard_info_new + * + * @owner: clipboard owner. + * @selection: clipboard selection. + * + * Allocate a new QemuClipboardInfo and initialize it with the given + * @owner and @selection. + * + * QemuClipboardInfo is a reference-counted struct. The new struct is + * returned with a reference already taken (i.e. reference count is + * one). + */ +QemuClipboardInfo *qemu_clipboard_info_new(QemuClipboardPeer *owner, + QemuClipboardSelection selection); +/** + * qemu_clipboard_info_ref + * + * @info: clipboard info. + * + * Increase @info reference count. + */ +QemuClipboardInfo *qemu_clipboard_info_ref(QemuClipboardInfo *info); + +/** + * qemu_clipboard_info_unref + * + * @info: clipboard info. + * + * Decrease @info reference count. When the count goes down to zero + * free the @info struct itself and all clipboard data. + */ +void qemu_clipboard_info_unref(QemuClipboardInfo *info); + +/** + * qemu_clipboard_update + * + * @info: clipboard info. + * + * Update the qemu clipboard. Notify all registered peers (including + * the clipboard owner) that the qemu clipboard has been updated. + * + * This is used for both new completely clipboard content and for + * clipboard data updates in response to qemu_clipboard_request() + * calls. + */ +void qemu_clipboard_update(QemuClipboardInfo *info); + +/** + * qemu_clipboard_request + * + * @info: clipboard info. + * @type: clipboard data type. + * + * Request clipboard content. Typically the clipboard owner only + * advertises the available data types and provides the actual data + * only on request. + */ +void qemu_clipboard_request(QemuClipboardInfo *info, + QemuClipboardType type); + +/** + * qemu_clipboard_set_data + * + * @peer: clipboard peer. + * @info: clipboard info. + * @type: clipboard data type. + * @size: data size. + * @data: data blob. + * @update: notify peers about the update. + * + * Set clipboard content for the given @type. This function will make + * a copy of the content data and store that. + */ +void qemu_clipboard_set_data(QemuClipboardPeer *peer, + QemuClipboardInfo *info, + QemuClipboardType type, + uint32_t size, + const void *data, + bool update); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QemuClipboardInfo, qemu_clipboard_info_unref) + +#endif /* QEMU_CLIPBOARD_H */ diff --git a/include/ui/console.h b/include/ui/console.h index ca3c7af6a6c..6d678924f6f 100644 --- a/include/ui/console.h +++ b/include/ui/console.h @@ -167,7 +167,15 @@ typedef struct QemuDmaBuf { uint32_t fourcc; uint64_t modifier; uint32_t texture; + uint32_t x; + uint32_t y; + uint32_t scanout_width; + uint32_t scanout_height; bool y0_top; + void *sync; + int fence_fd; + bool allow_fences; + bool draw_submitted; } QemuDmaBuf; typedef struct DisplayState DisplayState; @@ -471,4 +479,9 @@ bool vnc_display_reload_certs(const char *id, Error **errp); /* input.c */ int index_from_key(const char *key, size_t key_length); +#ifdef CONFIG_LINUX +/* udmabuf.c */ +int udmabuf_fd(void); +#endif + #endif diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h index f1bf8f97fc3..2fb6e0dd6b8 100644 --- a/include/ui/egl-helpers.h +++ b/include/ui/egl-helpers.h @@ -19,6 +19,7 @@ typedef struct egl_fb { GLuint texture; GLuint framebuffer; bool delete_texture; + QemuDmaBuf *dmabuf; } egl_fb; void egl_fb_destroy(egl_fb *fb); @@ -45,6 +46,8 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc, void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf); void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf); +void egl_dmabuf_create_sync(QemuDmaBuf *dmabuf); +void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf); #endif diff --git a/include/ui/gtk.h b/include/ui/gtk.h index 5ae0ad60a60..7d22affd381 100644 --- a/include/ui/gtk.h +++ b/include/ui/gtk.h @@ -18,11 +18,18 @@ #include #endif +#include "ui/clipboard.h" +#include "ui/console.h" #include "ui/kbd-state.h" #if defined(CONFIG_OPENGL) #include "ui/egl-helpers.h" #include "ui/egl-context.h" #endif +#ifdef CONFIG_VTE +#include "qemu/fifo8.h" +#endif + +#define MAX_VCS 10 typedef struct GtkDisplayState GtkDisplayState; @@ -58,6 +65,7 @@ typedef struct VirtualVteConsole { GtkWidget *scrollbar; GtkWidget *terminal; Chardev *chr; + Fifo8 out_fifo; bool echo; } VirtualVteConsole; #endif @@ -83,11 +91,71 @@ typedef struct VirtualConsole { }; } VirtualConsole; +struct GtkDisplayState { + GtkWidget *window; + + GtkWidget *menu_bar; + + GtkAccelGroup *accel_group; + + GtkWidget *machine_menu_item; + GtkWidget *machine_menu; + GtkWidget *pause_item; + GtkWidget *reset_item; + GtkWidget *powerdown_item; + GtkWidget *quit_item; + + GtkWidget *view_menu_item; + GtkWidget *view_menu; + GtkWidget *full_screen_item; + GtkWidget *copy_item; + GtkWidget *zoom_in_item; + GtkWidget *zoom_out_item; + GtkWidget *zoom_fixed_item; + GtkWidget *zoom_fit_item; + GtkWidget *grab_item; + GtkWidget *grab_on_hover_item; + + int nb_vcs; + VirtualConsole vc[MAX_VCS]; + + GtkWidget *show_tabs_item; + GtkWidget *untabify_item; + GtkWidget *show_menubar_item; + + GtkWidget *vbox; + GtkWidget *notebook; + int button_mask; + gboolean last_set; + int last_x; + int last_y; + int grab_x_root; + int grab_y_root; + VirtualConsole *kbd_owner; + VirtualConsole *ptr_owner; + + gboolean full_screen; + + GdkCursor *null_cursor; + Notifier mouse_mode_notifier; + gboolean free_scale; + + bool external_pause_update; + + QemuClipboardPeer cbpeer; + uint32_t cbpending[QEMU_CLIPBOARD_SELECTION__COUNT]; + GtkClipboard *gtkcb[QEMU_CLIPBOARD_SELECTION__COUNT]; + bool cbowner[QEMU_CLIPBOARD_SELECTION__COUNT]; + + DisplayOptions *opts; +}; + extern bool gtk_use_gl_area; /* ui/gtk.c */ void gd_update_windowsize(VirtualConsole *vc); int gd_monitor_update_interval(GtkWidget *widget); +void gd_hw_gl_flushed(void *vc); /* ui/gtk-egl.c */ void gd_egl_init(VirtualConsole *vc); @@ -114,8 +182,8 @@ void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl, uint32_t hot_x, uint32_t hot_y); void gd_egl_cursor_position(DisplayChangeListener *dcl, uint32_t pos_x, uint32_t pos_y); -void gd_egl_release_dmabuf(DisplayChangeListener *dcl, - QemuDmaBuf *dmabuf); +void gd_egl_flush(DisplayChangeListener *dcl, + uint32_t x, uint32_t y, uint32_t w, uint32_t h); void gd_egl_scanout_flush(DisplayChangeListener *dcl, uint32_t x, uint32_t y, uint32_t w, uint32_t h); void gtk_egl_init(DisplayGLMode mode); @@ -150,4 +218,7 @@ void gtk_gl_area_init(void); int gd_gl_area_make_current(DisplayChangeListener *dcl, QEMUGLContext ctx); +/* gtk-clipboard.c */ +void gd_clipboard_init(GtkDisplayState *gd); + #endif /* UI_GTK_H */ diff --git a/include/ui/qemu-pixman.h b/include/ui/qemu-pixman.h index 87737a6f162..806ddcd7cda 100644 --- a/include/ui/qemu-pixman.h +++ b/include/ui/qemu-pixman.h @@ -62,6 +62,7 @@ typedef struct PixelFormat { PixelFormat qemu_pixelformat_from_pixman(pixman_format_code_t format); pixman_format_code_t qemu_default_pixman_format(int bpp, bool native_endian); pixman_format_code_t qemu_drm_format_to_pixman(uint32_t drm_format); +uint32_t qemu_pixman_to_drm_format(pixman_format_code_t pixman); int qemu_pixman_get_type(int rshift, int gshift, int bshift); pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf); bool qemu_pixman_check_format(DisplayChangeListener *dcl, diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h index 42e3b48b032..614cfacfa58 100644 --- a/include/user/syscall-trace.h +++ b/include/user/syscall-trace.h @@ -7,8 +7,8 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _SYSCALL_TRACE_H_ -#define _SYSCALL_TRACE_H_ +#ifndef SYSCALL_TRACE_H +#define SYSCALL_TRACE_H #include "trace/trace-root.h" diff --git a/io/channel-socket.c b/io/channel-socket.c index de259f7eed2..606ec97cf7c 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -487,15 +487,15 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); -#ifdef MSG_CMSG_CLOEXEC - sflags |= MSG_CMSG_CLOEXEC; -#endif - msg.msg_iov = (struct iovec *)iov; msg.msg_iovlen = niov; if (fds && nfds) { msg.msg_control = control; msg.msg_controllen = sizeof(control); +#ifdef MSG_CMSG_CLOEXEC + sflags |= MSG_CMSG_CLOEXEC; +#endif + } retry: diff --git a/io/channel-websock.c b/io/channel-websock.c index 03c1f7cb62f..70889bb54da 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -177,15 +177,9 @@ qio_channel_websock_handshake_send_res(QIOChannelWebsock *ioc, static gchar *qio_channel_websock_date_str(void) { - struct tm tm; - time_t now = time(NULL); - char datebuf[128]; + g_autoptr(GDateTime) now = g_date_time_new_now_utc(); - gmtime_r(&now, &tm); - - strftime(datebuf, sizeof(datebuf), "%a, %d %b %Y %H:%M:%S GMT", &tm); - - return g_strdup(datebuf); + return g_date_time_format(now, "%a, %d %b %Y %H:%M:%S GMT"); } static void qio_channel_websock_handshake_send_res_err(QIOChannelWebsock *ioc, diff --git a/io/dns-resolver.c b/io/dns-resolver.c index 743a0efc876..53b0e8407a9 100644 --- a/io/dns-resolver.c +++ b/io/dns-resolver.c @@ -122,6 +122,10 @@ static int qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver, .ipv4 = iaddr->ipv4, .has_ipv6 = iaddr->has_ipv6, .ipv6 = iaddr->ipv6, +#ifdef HAVE_IPPROTO_MPTCP + .has_mptcp = iaddr->has_mptcp, + .mptcp = iaddr->mptcp, +#endif }; (*addrs)[i] = newaddr; diff --git a/io/net-listener.c b/io/net-listener.c index 46c2643d005..1c984d69c69 100644 --- a/io/net-listener.c +++ b/io/net-listener.c @@ -292,6 +292,9 @@ static void qio_net_listener_finalize(Object *obj) QIONetListener *listener = QIO_NET_LISTENER(obj); size_t i; + if (listener->io_notify) { + listener->io_notify(listener->io_data); + } qio_net_listener_disconnect(listener); for (i = 0; i < listener->nsioc; i++) { diff --git a/io/trace-events b/io/trace-events index d7bc70b9666..c5e814eb446 100644 --- a/io/trace-events +++ b/io/trace-events @@ -1,4 +1,4 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. # task.c qio_task_new(void *task, void *source, void *func, void *opaque) "Task new task=%p source=%p func=%p opaque=%p" diff --git a/iothread.c b/iothread.c index 7f086387be9..0f98af0f2aa 100644 --- a/iothread.c +++ b/iothread.c @@ -39,13 +39,6 @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD, #define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL #endif -static __thread IOThread *my_iothread; - -AioContext *qemu_get_current_aio_context(void) -{ - return my_iothread ? my_iothread->ctx : qemu_get_aio_context(); -} - static void *iothread_run(void *opaque) { IOThread *iothread = opaque; @@ -56,7 +49,7 @@ static void *iothread_run(void *opaque) * in this new thread uses glib. */ g_main_context_push_thread_default(iothread->worker_context); - my_iothread = iothread; + qemu_set_current_aio_context(iothread->ctx); iothread->thread_id = qemu_get_thread_id(); qemu_sem_post(&iothread->init_done_sem); @@ -159,6 +152,24 @@ static void iothread_init_gcontext(IOThread *iothread) iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); } +static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) +{ + ERRP_GUARD(); + + aio_context_set_poll_params(iothread->ctx, + iothread->poll_max_ns, + iothread->poll_grow, + iothread->poll_shrink, + errp); + if (*errp) { + return; + } + + aio_context_set_aio_params(iothread->ctx, + iothread->aio_max_batch, + errp); +} + static void iothread_complete(UserCreatable *obj, Error **errp) { Error *local_error = NULL; @@ -178,11 +189,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) */ iothread_init_gcontext(iothread); - aio_context_set_poll_params(iothread->ctx, - iothread->poll_max_ns, - iothread->poll_grow, - iothread->poll_shrink, - &local_error); + iothread_set_aio_context_params(iothread, &local_error); if (local_error) { error_propagate(errp, local_error); aio_context_unref(iothread->ctx); @@ -208,48 +215,70 @@ static void iothread_complete(UserCreatable *obj, Error **errp) typedef struct { const char *name; ptrdiff_t offset; /* field's byte offset in IOThread struct */ -} PollParamInfo; +} IOThreadParamInfo; -static PollParamInfo poll_max_ns_info = { +static IOThreadParamInfo poll_max_ns_info = { "poll-max-ns", offsetof(IOThread, poll_max_ns), }; -static PollParamInfo poll_grow_info = { +static IOThreadParamInfo poll_grow_info = { "poll-grow", offsetof(IOThread, poll_grow), }; -static PollParamInfo poll_shrink_info = { +static IOThreadParamInfo poll_shrink_info = { "poll-shrink", offsetof(IOThread, poll_shrink), }; +static IOThreadParamInfo aio_max_batch_info = { + "aio-max-batch", offsetof(IOThread, aio_max_batch), +}; -static void iothread_get_poll_param(Object *obj, Visitor *v, - const char *name, void *opaque, Error **errp) +static void iothread_get_param(Object *obj, Visitor *v, + const char *name, IOThreadParamInfo *info, Error **errp) { IOThread *iothread = IOTHREAD(obj); - PollParamInfo *info = opaque; int64_t *field = (void *)iothread + info->offset; visit_type_int64(v, name, field, errp); } -static void iothread_set_poll_param(Object *obj, Visitor *v, - const char *name, void *opaque, Error **errp) +static bool iothread_set_param(Object *obj, Visitor *v, + const char *name, IOThreadParamInfo *info, Error **errp) { IOThread *iothread = IOTHREAD(obj); - PollParamInfo *info = opaque; int64_t *field = (void *)iothread + info->offset; int64_t value; if (!visit_type_int64(v, name, &value, errp)) { - return; + return false; } if (value < 0) { error_setg(errp, "%s value must be in range [0, %" PRId64 "]", info->name, INT64_MAX); - return; + return false; } *field = value; + return true; +} + +static void iothread_get_poll_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThreadParamInfo *info = opaque; + + iothread_get_param(obj, v, name, info, errp); +} + +static void iothread_set_poll_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThread *iothread = IOTHREAD(obj); + IOThreadParamInfo *info = opaque; + + if (!iothread_set_param(obj, v, name, info, errp)) { + return; + } + if (iothread->ctx) { aio_context_set_poll_params(iothread->ctx, iothread->poll_max_ns, @@ -259,6 +288,31 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, } } +static void iothread_get_aio_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThreadParamInfo *info = opaque; + + iothread_get_param(obj, v, name, info, errp); +} + +static void iothread_set_aio_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThread *iothread = IOTHREAD(obj); + IOThreadParamInfo *info = opaque; + + if (!iothread_set_param(obj, v, name, info, errp)) { + return; + } + + if (iothread->ctx) { + aio_context_set_aio_params(iothread->ctx, + iothread->aio_max_batch, + errp); + } +} + static void iothread_class_init(ObjectClass *klass, void *class_data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); @@ -276,6 +330,10 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) iothread_get_poll_param, iothread_set_poll_param, NULL, &poll_shrink_info); + object_class_property_add(klass, "aio-max-batch", "int", + iothread_get_aio_param, + iothread_set_aio_param, + NULL, &aio_max_batch_info); } static const TypeInfo iothread_info = { @@ -325,6 +383,7 @@ static int query_one_iothread(Object *object, void *opaque) info->poll_max_ns = iothread->poll_max_ns; info->poll_grow = iothread->poll_grow; info->poll_shrink = iothread->poll_shrink; + info->aio_max_batch = iothread->aio_max_batch; QAPI_LIST_APPEND(*tail, info); return 0; diff --git a/job-qmp.c b/job-qmp.c index 34c4da094f2..829a28aa70e 100644 --- a/job-qmp.c +++ b/job-qmp.c @@ -144,16 +144,20 @@ void qmp_job_dismiss(const char *id, Error **errp) static JobInfo *job_query_single(Job *job, Error **errp) { JobInfo *info; + uint64_t progress_current; + uint64_t progress_total; assert(!job_is_internal(job)); + progress_get_snapshot(&job->progress, &progress_current, + &progress_total); info = g_new(JobInfo, 1); *info = (JobInfo) { .id = g_strdup(job->id), .type = job_type(job), .status = job->status, - .current_progress = job->progress.current, - .total_progress = job->progress.total, + .current_progress = progress_current, + .total_progress = progress_total, .has_error = !!job->err, .error = job->err ? \ g_strdup(error_get_pretty(job->err)) : NULL, diff --git a/job.c b/job.c index 4aff13d95ab..dbfa67bb0a3 100644 --- a/job.c +++ b/job.c @@ -216,6 +216,13 @@ const char *job_type_str(const Job *job) } bool job_is_cancelled(Job *job) +{ + /* force_cancel may be true only if cancelled is true, too */ + assert(job->cancelled || !job->force_cancel); + return job->force_cancel; +} + +bool job_cancel_requested(Job *job) { return job->cancelled; } @@ -339,6 +346,8 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, job->cb = cb; job->opaque = opaque; + progress_init(&job->progress); + notifier_list_init(&job->on_finalize_cancelled); notifier_list_init(&job->on_finalize_completed); notifier_list_init(&job->on_pending); @@ -382,6 +391,7 @@ void job_unref(Job *job) QLIST_REMOVE(job, job_list); + progress_destroy(&job->progress); error_free(job->err); g_free(job->id); g_free(job); @@ -716,8 +726,12 @@ static int job_finalize_single(Job *job) static void job_cancel_async(Job *job, bool force) { if (job->driver->cancel) { - job->driver->cancel(job); + force = job->driver->cancel(job, force); + } else { + /* No .cancel() means the job will behave as if force-cancelled */ + force = true; } + if (job->user_paused) { /* Do not call job_enter here, the caller will handle it. */ if (job->driver->user_resume) { @@ -727,14 +741,23 @@ static void job_cancel_async(Job *job, bool force) assert(job->pause_count > 0); job->pause_count--; } - job->cancelled = true; - /* To prevent 'force == false' overriding a previous 'force == true' */ - job->force_cancel |= force; + + /* + * Ignore soft cancel requests after the job is already done + * (We will still invoke job->driver->cancel() above, but if the + * job driver supports soft cancelling and the job is done, that + * should be a no-op, too. We still call it so it can override + * @force.) + */ + if (force || !job->deferred_to_main_loop) { + job->cancelled = true; + /* To prevent 'force == false' overriding a previous 'force == true' */ + job->force_cancel |= force; + } } static void job_completed_txn_abort(Job *job) { - AioContext *outer_ctx = job->aio_context; AioContext *ctx; JobTxn *txn = job->txn; Job *other_job; @@ -748,10 +771,14 @@ static void job_completed_txn_abort(Job *job) txn->aborting = true; job_txn_ref(txn); - /* We can only hold the single job's AioContext lock while calling + /* + * We can only hold the single job's AioContext lock while calling * job_finalize_single() because the finalization callbacks can involve - * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */ - aio_context_release(outer_ctx); + * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. + * Note that the job's AioContext may change when it is finalized. + */ + job_ref(job); + aio_context_release(job->aio_context); /* Other jobs are effectively cancelled by us, set the status for * them; this job, however, may or may not be cancelled, depending @@ -760,23 +787,37 @@ static void job_completed_txn_abort(Job *job) if (other_job != job) { ctx = other_job->aio_context; aio_context_acquire(ctx); - job_cancel_async(other_job, false); + /* + * This is a transaction: If one job failed, no result will matter. + * Therefore, pass force=true to terminate all other jobs as quickly + * as possible. + */ + job_cancel_async(other_job, true); aio_context_release(ctx); } } while (!QLIST_EMPTY(&txn->jobs)) { other_job = QLIST_FIRST(&txn->jobs); + /* + * The job's AioContext may change, so store it in @ctx so we + * release the same context that we have acquired before. + */ ctx = other_job->aio_context; aio_context_acquire(ctx); if (!job_is_completed(other_job)) { - assert(job_is_cancelled(other_job)); + assert(job_cancel_requested(other_job)); job_finish_sync(other_job, NULL, NULL); } job_finalize_single(other_job); aio_context_release(ctx); } - aio_context_acquire(outer_ctx); + /* + * Use job_ref()/job_unref() so we can read the AioContext here + * even if the job went away during job_finalize_single(). + */ + aio_context_acquire(job->aio_context); + job_unref(job); job_txn_unref(txn); } @@ -939,7 +980,19 @@ void job_cancel(Job *job, bool force) if (!job_started(job)) { job_completed(job); } else if (job->deferred_to_main_loop) { - job_completed_txn_abort(job); + /* + * job_cancel_async() ignores soft-cancel requests for jobs + * that are already done (i.e. deferred to the main loop). We + * have to check again whether the job is really cancelled. + * (job_cancel_requested() and job_is_cancelled() are equivalent + * here, because job_cancel_async() will make soft-cancel + * requests no-ops when deferred_to_main_loop is true. We + * choose to call job_is_cancelled() to show that we invoke + * job_completed_txn_abort() only for force-cancelled jobs.) + */ + if (job_is_cancelled(job)) { + job_completed_txn_abort(job); + } } else { job_enter(job); } @@ -961,9 +1014,21 @@ static void job_cancel_err(Job *job, Error **errp) job_cancel(job, false); } -int job_cancel_sync(Job *job) +/** + * Same as job_cancel_err(), but force-cancel. + */ +static void job_force_cancel_err(Job *job, Error **errp) +{ + job_cancel(job, true); +} + +int job_cancel_sync(Job *job, bool force) { - return job_finish_sync(job, &job_cancel_err, NULL); + if (force) { + return job_finish_sync(job, &job_force_cancel_err, NULL); + } else { + return job_finish_sync(job, &job_cancel_err, NULL); + } } void job_cancel_sync_all(void) @@ -974,7 +1039,7 @@ void job_cancel_sync_all(void) while ((job = job_next(NULL))) { aio_context = job->aio_context; aio_context_acquire(aio_context); - job_cancel_sync(job); + job_cancel_sync(job, true); aio_context_release(aio_context); } } @@ -991,7 +1056,7 @@ void job_complete(Job *job, Error **errp) if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { return; } - if (job_is_cancelled(job) || !job->driver->complete) { + if (job_cancel_requested(job) || !job->driver->complete) { error_setg(errp, "The active block job '%s' cannot be completed", job->id); return; diff --git a/libdecnumber/decContext.c b/libdecnumber/decContext.c index 7d97a65ac56..1956edf0a7a 100644 --- a/libdecnumber/decContext.c +++ b/libdecnumber/decContext.c @@ -53,12 +53,13 @@ static const Flag *mfctop=(Flag *)&mfcone; /* -> top byte */ const uByte DECSTICKYTAB[10]={1,1,2,3,4,6,6,7,8,9}; /* used if sticky */ /* ------------------------------------------------------------------ */ -/* Powers of ten (powers[n]==10**n, 0<=n<=9) */ +/* Powers of ten (powers[n]==10**n, 0<=n<=19) */ /* ------------------------------------------------------------------ */ -const uLong DECPOWERS[19] = {1, 10, 100, 1000, 10000, 100000, 1000000, +const uLong DECPOWERS[20] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000ULL, 100000000000ULL, 1000000000000ULL, 10000000000000ULL, 100000000000000ULL, 1000000000000000ULL, - 10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL, }; + 10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL, + 10000000000000000000ULL,}; /* ------------------------------------------------------------------ */ /* decContextClearStatus -- clear bits in current status */ diff --git a/libdecnumber/decNumber.c b/libdecnumber/decNumber.c index 1ffe458ad83..31282adafdc 100644 --- a/libdecnumber/decNumber.c +++ b/libdecnumber/decNumber.c @@ -167,6 +167,7 @@ /* ------------------------------------------------------------------ */ #include "qemu/osdep.h" +#include "qemu/host-utils.h" #include "libdecnumber/dconfig.h" #include "libdecnumber/decNumber.h" #include "libdecnumber/decNumberLocal.h" @@ -263,6 +264,7 @@ static decNumber * decTrim(decNumber *, decContext *, Flag, Int *); static Int decUnitAddSub(const Unit *, Int, const Unit *, Int, Int, Unit *, Int); static Int decUnitCompare(const Unit *, Int, const Unit *, Int, Int); +static bool mulUInt128ByPowOf10(uLong *, uLong *, uInt); #if !DECSUBSET /* decFinish == decFinalize when no subset arithmetic needed */ @@ -462,6 +464,41 @@ decNumber *decNumberFromUInt64(decNumber *dn, uint64_t uin) return dn; } /* decNumberFromUInt64 */ +decNumber *decNumberFromInt128(decNumber *dn, uint64_t lo, int64_t hi) +{ + uint64_t unsig_hi = hi; + if (hi < 0) { + if (lo == 0) { + unsig_hi = -unsig_hi; + } else { + unsig_hi = ~unsig_hi; + lo = -lo; + } + } + + decNumberFromUInt128(dn, lo, unsig_hi); + if (hi < 0) { + dn->bits = DECNEG; /* sign needed */ + } + return dn; +} /* decNumberFromInt128 */ + +decNumber *decNumberFromUInt128(decNumber *dn, uint64_t lo, uint64_t hi) +{ + uint64_t rem; + Unit *up; /* work pointer */ + decNumberZero(dn); /* clean */ + if (lo == 0 && hi == 0) { + return dn; /* [or decGetDigits bad call] */ + } + for (up = dn->lsu; hi > 0 || lo > 0; up++) { + rem = divu128(&lo, &hi, DECDPUNMAX + 1); + *up = (Unit)rem; + } + dn->digits = decGetDigits(dn->lsu, up - dn->lsu); + return dn; +} /* decNumberFromUInt128 */ + /* ------------------------------------------------------------------ */ /* to-int64 -- conversion to int64 */ /* */ @@ -506,6 +543,68 @@ int64_t decNumberIntegralToInt64(const decNumber *dn, decContext *set) return 0; } /* decNumberIntegralToInt64 */ +/* ------------------------------------------------------------------ */ +/* decNumberIntegralToInt128 -- conversion to int128 */ +/* */ +/* dn is the decNumber to convert. dn is assumed to have been */ +/* rounded to a floating point integer value. */ +/* set is the context for reporting errors */ +/* returns the converted decNumber via plow and phigh */ +/* */ +/* Invalid is set if the decNumber is a NaN, Infinite or is out of */ +/* range for a signed 128 bit integer. */ +/* ------------------------------------------------------------------ */ + +void decNumberIntegralToInt128(const decNumber *dn, decContext *set, + uint64_t *plow, uint64_t *phigh) +{ + int d; /* work */ + const Unit *up; /* .. */ + uint64_t lo = 0, hi = 0; + + if (decNumberIsSpecial(dn) || (dn->exponent < 0) || + (dn->digits + dn->exponent > 39)) { + goto Invalid; + } + + up = dn->lsu; /* -> lsu */ + + for (d = (dn->digits - 1) / DECDPUN; d >= 0; d--) { + if (mulu128(&lo, &hi, DECDPUNMAX + 1)) { + /* overflow */ + goto Invalid; + } + if (uadd64_overflow(lo, up[d], &lo)) { + if (uadd64_overflow(hi, 1, &hi)) { + /* overflow */ + goto Invalid; + } + } + } + + if (mulUInt128ByPowOf10(&lo, &hi, dn->exponent)) { + /* overflow */ + goto Invalid; + } + + if (decNumberIsNegative(dn)) { + if (lo == 0) { + *phigh = -hi; + *plow = 0; + } else { + *phigh = ~hi; + *plow = -lo; + } + } else { + *plow = lo; + *phigh = hi; + } + + return; + +Invalid: + decContextSetStatus(set, DEC_Invalid_operation); +} /* decNumberIntegralToInt128 */ /* ------------------------------------------------------------------ */ /* to-scientific-string -- conversion to numeric string */ @@ -7849,6 +7948,38 @@ static Int decGetDigits(Unit *uar, Int len) { return digits; } /* decGetDigits */ +/* ------------------------------------------------------------------ */ +/* mulUInt128ByPowOf10 -- multiply a 128-bit unsigned integer by a */ +/* power of 10. */ +/* */ +/* The 128-bit factor composed of plow and phigh is multiplied */ +/* by 10^exp. */ +/* */ +/* plow pointer to the low 64 bits of the first factor */ +/* phigh pointer to the high 64 bits of the first factor */ +/* exp the exponent of the power of 10 of the second factor */ +/* */ +/* If the result fits in 128 bits, returns false and the */ +/* multiplication result through plow and phigh. */ +/* Otherwise, returns true. */ +/* ------------------------------------------------------------------ */ +static bool mulUInt128ByPowOf10(uLong *plow, uLong *phigh, uInt pow10) +{ + while (pow10 >= ARRAY_SIZE(powers)) { + if (mulu128(plow, phigh, powers[ARRAY_SIZE(powers) - 1])) { + /* Overflow */ + return true; + } + pow10 -= ARRAY_SIZE(powers) - 1; + } + + if (pow10 > 0) { + return mulu128(plow, phigh, powers[pow10]); + } else { + return false; + } +} + #if DECTRACE | DECCHECK /* ------------------------------------------------------------------ */ /* decNumberShow -- display a number [debug aid] */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index b6a0eaa32ae..3d2ce9912dc 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -184,6 +184,17 @@ struct kvm_vcpu_events { __u32 reserved[12]; }; +struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void *addr; + __u64 flags; + __u64 reserved[2]; +}; + +#define KVM_ARM_TAGS_TO_GUEST 0 +#define KVM_ARM_TAGS_FROM_GUEST 1 + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h index f94f65d429b..1567a3294c3 100644 --- a/linux-headers/asm-generic/mman-common.h +++ b/linux-headers/asm-generic/mman-common.h @@ -72,6 +72,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 72875291778..f211961ce1d 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -861,9 +861,20 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) __SYSCALL(__NR_process_madvise, sys_process_madvise) #define __NR_epoll_pwait2 441 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) +#define __NR_mount_setattr 442 +__SYSCALL(__NR_mount_setattr, sys_mount_setattr) +#define __NR_quotactl_fd 443 +__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) + +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #undef __NR_syscalls -#define __NR_syscalls 442 +#define __NR_syscalls 447 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h index 57dc2ac4f8b..40b210c65a5 100644 --- a/linux-headers/asm-mips/mman.h +++ b/linux-headers/asm-mips/mman.h @@ -98,6 +98,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index 59e53b6e076..09cd297698e 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -1,376 +1,380 @@ -#ifndef _ASM_MIPS_UNISTD_N32_H -#define _ASM_MIPS_UNISTD_N32_H +#ifndef _ASM_UNISTD_N32_H +#define _ASM_UNISTD_N32_H -#define __NR_read (__NR_Linux + 0) -#define __NR_write (__NR_Linux + 1) -#define __NR_open (__NR_Linux + 2) -#define __NR_close (__NR_Linux + 3) -#define __NR_stat (__NR_Linux + 4) -#define __NR_fstat (__NR_Linux + 5) -#define __NR_lstat (__NR_Linux + 6) -#define __NR_poll (__NR_Linux + 7) -#define __NR_lseek (__NR_Linux + 8) -#define __NR_mmap (__NR_Linux + 9) -#define __NR_mprotect (__NR_Linux + 10) -#define __NR_munmap (__NR_Linux + 11) -#define __NR_brk (__NR_Linux + 12) -#define __NR_rt_sigaction (__NR_Linux + 13) -#define __NR_rt_sigprocmask (__NR_Linux + 14) -#define __NR_ioctl (__NR_Linux + 15) -#define __NR_pread64 (__NR_Linux + 16) -#define __NR_pwrite64 (__NR_Linux + 17) -#define __NR_readv (__NR_Linux + 18) -#define __NR_writev (__NR_Linux + 19) -#define __NR_access (__NR_Linux + 20) -#define __NR_pipe (__NR_Linux + 21) -#define __NR__newselect (__NR_Linux + 22) -#define __NR_sched_yield (__NR_Linux + 23) -#define __NR_mremap (__NR_Linux + 24) -#define __NR_msync (__NR_Linux + 25) -#define __NR_mincore (__NR_Linux + 26) -#define __NR_madvise (__NR_Linux + 27) -#define __NR_shmget (__NR_Linux + 28) -#define __NR_shmat (__NR_Linux + 29) -#define __NR_shmctl (__NR_Linux + 30) -#define __NR_dup (__NR_Linux + 31) -#define __NR_dup2 (__NR_Linux + 32) -#define __NR_pause (__NR_Linux + 33) -#define __NR_nanosleep (__NR_Linux + 34) -#define __NR_getitimer (__NR_Linux + 35) -#define __NR_setitimer (__NR_Linux + 36) -#define __NR_alarm (__NR_Linux + 37) -#define __NR_getpid (__NR_Linux + 38) -#define __NR_sendfile (__NR_Linux + 39) -#define __NR_socket (__NR_Linux + 40) -#define __NR_connect (__NR_Linux + 41) -#define __NR_accept (__NR_Linux + 42) -#define __NR_sendto (__NR_Linux + 43) -#define __NR_recvfrom (__NR_Linux + 44) -#define __NR_sendmsg (__NR_Linux + 45) -#define __NR_recvmsg (__NR_Linux + 46) -#define __NR_shutdown (__NR_Linux + 47) -#define __NR_bind (__NR_Linux + 48) -#define __NR_listen (__NR_Linux + 49) -#define __NR_getsockname (__NR_Linux + 50) -#define __NR_getpeername (__NR_Linux + 51) -#define __NR_socketpair (__NR_Linux + 52) -#define __NR_setsockopt (__NR_Linux + 53) -#define __NR_getsockopt (__NR_Linux + 54) -#define __NR_clone (__NR_Linux + 55) -#define __NR_fork (__NR_Linux + 56) -#define __NR_execve (__NR_Linux + 57) -#define __NR_exit (__NR_Linux + 58) -#define __NR_wait4 (__NR_Linux + 59) -#define __NR_kill (__NR_Linux + 60) -#define __NR_uname (__NR_Linux + 61) -#define __NR_semget (__NR_Linux + 62) -#define __NR_semop (__NR_Linux + 63) -#define __NR_semctl (__NR_Linux + 64) -#define __NR_shmdt (__NR_Linux + 65) -#define __NR_msgget (__NR_Linux + 66) -#define __NR_msgsnd (__NR_Linux + 67) -#define __NR_msgrcv (__NR_Linux + 68) -#define __NR_msgctl (__NR_Linux + 69) -#define __NR_fcntl (__NR_Linux + 70) -#define __NR_flock (__NR_Linux + 71) -#define __NR_fsync (__NR_Linux + 72) -#define __NR_fdatasync (__NR_Linux + 73) -#define __NR_truncate (__NR_Linux + 74) -#define __NR_ftruncate (__NR_Linux + 75) -#define __NR_getdents (__NR_Linux + 76) -#define __NR_getcwd (__NR_Linux + 77) -#define __NR_chdir (__NR_Linux + 78) -#define __NR_fchdir (__NR_Linux + 79) -#define __NR_rename (__NR_Linux + 80) -#define __NR_mkdir (__NR_Linux + 81) -#define __NR_rmdir (__NR_Linux + 82) -#define __NR_creat (__NR_Linux + 83) -#define __NR_link (__NR_Linux + 84) -#define __NR_unlink (__NR_Linux + 85) -#define __NR_symlink (__NR_Linux + 86) -#define __NR_readlink (__NR_Linux + 87) -#define __NR_chmod (__NR_Linux + 88) -#define __NR_fchmod (__NR_Linux + 89) -#define __NR_chown (__NR_Linux + 90) -#define __NR_fchown (__NR_Linux + 91) -#define __NR_lchown (__NR_Linux + 92) -#define __NR_umask (__NR_Linux + 93) -#define __NR_gettimeofday (__NR_Linux + 94) -#define __NR_getrlimit (__NR_Linux + 95) -#define __NR_getrusage (__NR_Linux + 96) -#define __NR_sysinfo (__NR_Linux + 97) -#define __NR_times (__NR_Linux + 98) -#define __NR_ptrace (__NR_Linux + 99) -#define __NR_getuid (__NR_Linux + 100) -#define __NR_syslog (__NR_Linux + 101) -#define __NR_getgid (__NR_Linux + 102) -#define __NR_setuid (__NR_Linux + 103) -#define __NR_setgid (__NR_Linux + 104) -#define __NR_geteuid (__NR_Linux + 105) -#define __NR_getegid (__NR_Linux + 106) -#define __NR_setpgid (__NR_Linux + 107) -#define __NR_getppid (__NR_Linux + 108) -#define __NR_getpgrp (__NR_Linux + 109) -#define __NR_setsid (__NR_Linux + 110) -#define __NR_setreuid (__NR_Linux + 111) -#define __NR_setregid (__NR_Linux + 112) -#define __NR_getgroups (__NR_Linux + 113) -#define __NR_setgroups (__NR_Linux + 114) -#define __NR_setresuid (__NR_Linux + 115) -#define __NR_getresuid (__NR_Linux + 116) -#define __NR_setresgid (__NR_Linux + 117) -#define __NR_getresgid (__NR_Linux + 118) -#define __NR_getpgid (__NR_Linux + 119) -#define __NR_setfsuid (__NR_Linux + 120) -#define __NR_setfsgid (__NR_Linux + 121) -#define __NR_getsid (__NR_Linux + 122) -#define __NR_capget (__NR_Linux + 123) -#define __NR_capset (__NR_Linux + 124) -#define __NR_rt_sigpending (__NR_Linux + 125) -#define __NR_rt_sigtimedwait (__NR_Linux + 126) -#define __NR_rt_sigqueueinfo (__NR_Linux + 127) -#define __NR_rt_sigsuspend (__NR_Linux + 128) -#define __NR_sigaltstack (__NR_Linux + 129) -#define __NR_utime (__NR_Linux + 130) -#define __NR_mknod (__NR_Linux + 131) -#define __NR_personality (__NR_Linux + 132) -#define __NR_ustat (__NR_Linux + 133) -#define __NR_statfs (__NR_Linux + 134) -#define __NR_fstatfs (__NR_Linux + 135) -#define __NR_sysfs (__NR_Linux + 136) -#define __NR_getpriority (__NR_Linux + 137) -#define __NR_setpriority (__NR_Linux + 138) -#define __NR_sched_setparam (__NR_Linux + 139) -#define __NR_sched_getparam (__NR_Linux + 140) -#define __NR_sched_setscheduler (__NR_Linux + 141) -#define __NR_sched_getscheduler (__NR_Linux + 142) -#define __NR_sched_get_priority_max (__NR_Linux + 143) -#define __NR_sched_get_priority_min (__NR_Linux + 144) -#define __NR_sched_rr_get_interval (__NR_Linux + 145) -#define __NR_mlock (__NR_Linux + 146) -#define __NR_munlock (__NR_Linux + 147) -#define __NR_mlockall (__NR_Linux + 148) -#define __NR_munlockall (__NR_Linux + 149) -#define __NR_vhangup (__NR_Linux + 150) -#define __NR_pivot_root (__NR_Linux + 151) -#define __NR__sysctl (__NR_Linux + 152) -#define __NR_prctl (__NR_Linux + 153) -#define __NR_adjtimex (__NR_Linux + 154) -#define __NR_setrlimit (__NR_Linux + 155) -#define __NR_chroot (__NR_Linux + 156) -#define __NR_sync (__NR_Linux + 157) -#define __NR_acct (__NR_Linux + 158) -#define __NR_settimeofday (__NR_Linux + 159) -#define __NR_mount (__NR_Linux + 160) -#define __NR_umount2 (__NR_Linux + 161) -#define __NR_swapon (__NR_Linux + 162) -#define __NR_swapoff (__NR_Linux + 163) -#define __NR_reboot (__NR_Linux + 164) -#define __NR_sethostname (__NR_Linux + 165) -#define __NR_setdomainname (__NR_Linux + 166) -#define __NR_create_module (__NR_Linux + 167) -#define __NR_init_module (__NR_Linux + 168) -#define __NR_delete_module (__NR_Linux + 169) -#define __NR_get_kernel_syms (__NR_Linux + 170) -#define __NR_query_module (__NR_Linux + 171) -#define __NR_quotactl (__NR_Linux + 172) -#define __NR_nfsservctl (__NR_Linux + 173) -#define __NR_getpmsg (__NR_Linux + 174) -#define __NR_putpmsg (__NR_Linux + 175) -#define __NR_afs_syscall (__NR_Linux + 176) -#define __NR_reserved177 (__NR_Linux + 177) -#define __NR_gettid (__NR_Linux + 178) -#define __NR_readahead (__NR_Linux + 179) -#define __NR_setxattr (__NR_Linux + 180) -#define __NR_lsetxattr (__NR_Linux + 181) -#define __NR_fsetxattr (__NR_Linux + 182) -#define __NR_getxattr (__NR_Linux + 183) -#define __NR_lgetxattr (__NR_Linux + 184) -#define __NR_fgetxattr (__NR_Linux + 185) -#define __NR_listxattr (__NR_Linux + 186) -#define __NR_llistxattr (__NR_Linux + 187) -#define __NR_flistxattr (__NR_Linux + 188) -#define __NR_removexattr (__NR_Linux + 189) -#define __NR_lremovexattr (__NR_Linux + 190) -#define __NR_fremovexattr (__NR_Linux + 191) -#define __NR_tkill (__NR_Linux + 192) -#define __NR_reserved193 (__NR_Linux + 193) -#define __NR_futex (__NR_Linux + 194) -#define __NR_sched_setaffinity (__NR_Linux + 195) -#define __NR_sched_getaffinity (__NR_Linux + 196) -#define __NR_cacheflush (__NR_Linux + 197) -#define __NR_cachectl (__NR_Linux + 198) -#define __NR_sysmips (__NR_Linux + 199) -#define __NR_io_setup (__NR_Linux + 200) -#define __NR_io_destroy (__NR_Linux + 201) -#define __NR_io_getevents (__NR_Linux + 202) -#define __NR_io_submit (__NR_Linux + 203) -#define __NR_io_cancel (__NR_Linux + 204) -#define __NR_exit_group (__NR_Linux + 205) -#define __NR_lookup_dcookie (__NR_Linux + 206) -#define __NR_epoll_create (__NR_Linux + 207) -#define __NR_epoll_ctl (__NR_Linux + 208) -#define __NR_epoll_wait (__NR_Linux + 209) -#define __NR_remap_file_pages (__NR_Linux + 210) -#define __NR_rt_sigreturn (__NR_Linux + 211) -#define __NR_fcntl64 (__NR_Linux + 212) -#define __NR_set_tid_address (__NR_Linux + 213) -#define __NR_restart_syscall (__NR_Linux + 214) -#define __NR_semtimedop (__NR_Linux + 215) -#define __NR_fadvise64 (__NR_Linux + 216) -#define __NR_statfs64 (__NR_Linux + 217) -#define __NR_fstatfs64 (__NR_Linux + 218) -#define __NR_sendfile64 (__NR_Linux + 219) -#define __NR_timer_create (__NR_Linux + 220) -#define __NR_timer_settime (__NR_Linux + 221) -#define __NR_timer_gettime (__NR_Linux + 222) -#define __NR_timer_getoverrun (__NR_Linux + 223) -#define __NR_timer_delete (__NR_Linux + 224) -#define __NR_clock_settime (__NR_Linux + 225) -#define __NR_clock_gettime (__NR_Linux + 226) -#define __NR_clock_getres (__NR_Linux + 227) -#define __NR_clock_nanosleep (__NR_Linux + 228) -#define __NR_tgkill (__NR_Linux + 229) -#define __NR_utimes (__NR_Linux + 230) -#define __NR_mbind (__NR_Linux + 231) -#define __NR_get_mempolicy (__NR_Linux + 232) -#define __NR_set_mempolicy (__NR_Linux + 233) -#define __NR_mq_open (__NR_Linux + 234) -#define __NR_mq_unlink (__NR_Linux + 235) -#define __NR_mq_timedsend (__NR_Linux + 236) -#define __NR_mq_timedreceive (__NR_Linux + 237) -#define __NR_mq_notify (__NR_Linux + 238) -#define __NR_mq_getsetattr (__NR_Linux + 239) -#define __NR_vserver (__NR_Linux + 240) -#define __NR_waitid (__NR_Linux + 241) -#define __NR_add_key (__NR_Linux + 243) -#define __NR_request_key (__NR_Linux + 244) -#define __NR_keyctl (__NR_Linux + 245) -#define __NR_set_thread_area (__NR_Linux + 246) -#define __NR_inotify_init (__NR_Linux + 247) -#define __NR_inotify_add_watch (__NR_Linux + 248) -#define __NR_inotify_rm_watch (__NR_Linux + 249) -#define __NR_migrate_pages (__NR_Linux + 250) -#define __NR_openat (__NR_Linux + 251) -#define __NR_mkdirat (__NR_Linux + 252) -#define __NR_mknodat (__NR_Linux + 253) -#define __NR_fchownat (__NR_Linux + 254) -#define __NR_futimesat (__NR_Linux + 255) -#define __NR_newfstatat (__NR_Linux + 256) -#define __NR_unlinkat (__NR_Linux + 257) -#define __NR_renameat (__NR_Linux + 258) -#define __NR_linkat (__NR_Linux + 259) -#define __NR_symlinkat (__NR_Linux + 260) -#define __NR_readlinkat (__NR_Linux + 261) -#define __NR_fchmodat (__NR_Linux + 262) -#define __NR_faccessat (__NR_Linux + 263) -#define __NR_pselect6 (__NR_Linux + 264) -#define __NR_ppoll (__NR_Linux + 265) -#define __NR_unshare (__NR_Linux + 266) -#define __NR_splice (__NR_Linux + 267) -#define __NR_sync_file_range (__NR_Linux + 268) -#define __NR_tee (__NR_Linux + 269) -#define __NR_vmsplice (__NR_Linux + 270) -#define __NR_move_pages (__NR_Linux + 271) -#define __NR_set_robust_list (__NR_Linux + 272) -#define __NR_get_robust_list (__NR_Linux + 273) -#define __NR_kexec_load (__NR_Linux + 274) -#define __NR_getcpu (__NR_Linux + 275) -#define __NR_epoll_pwait (__NR_Linux + 276) -#define __NR_ioprio_set (__NR_Linux + 277) -#define __NR_ioprio_get (__NR_Linux + 278) -#define __NR_utimensat (__NR_Linux + 279) -#define __NR_signalfd (__NR_Linux + 280) -#define __NR_timerfd (__NR_Linux + 281) -#define __NR_eventfd (__NR_Linux + 282) -#define __NR_fallocate (__NR_Linux + 283) -#define __NR_timerfd_create (__NR_Linux + 284) -#define __NR_timerfd_gettime (__NR_Linux + 285) -#define __NR_timerfd_settime (__NR_Linux + 286) -#define __NR_signalfd4 (__NR_Linux + 287) -#define __NR_eventfd2 (__NR_Linux + 288) -#define __NR_epoll_create1 (__NR_Linux + 289) -#define __NR_dup3 (__NR_Linux + 290) -#define __NR_pipe2 (__NR_Linux + 291) -#define __NR_inotify_init1 (__NR_Linux + 292) -#define __NR_preadv (__NR_Linux + 293) -#define __NR_pwritev (__NR_Linux + 294) -#define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) -#define __NR_perf_event_open (__NR_Linux + 296) -#define __NR_accept4 (__NR_Linux + 297) -#define __NR_recvmmsg (__NR_Linux + 298) -#define __NR_getdents64 (__NR_Linux + 299) -#define __NR_fanotify_init (__NR_Linux + 300) -#define __NR_fanotify_mark (__NR_Linux + 301) -#define __NR_prlimit64 (__NR_Linux + 302) -#define __NR_name_to_handle_at (__NR_Linux + 303) -#define __NR_open_by_handle_at (__NR_Linux + 304) -#define __NR_clock_adjtime (__NR_Linux + 305) -#define __NR_syncfs (__NR_Linux + 306) -#define __NR_sendmmsg (__NR_Linux + 307) -#define __NR_setns (__NR_Linux + 308) -#define __NR_process_vm_readv (__NR_Linux + 309) -#define __NR_process_vm_writev (__NR_Linux + 310) -#define __NR_kcmp (__NR_Linux + 311) -#define __NR_finit_module (__NR_Linux + 312) -#define __NR_sched_setattr (__NR_Linux + 313) -#define __NR_sched_getattr (__NR_Linux + 314) -#define __NR_renameat2 (__NR_Linux + 315) -#define __NR_seccomp (__NR_Linux + 316) -#define __NR_getrandom (__NR_Linux + 317) -#define __NR_memfd_create (__NR_Linux + 318) -#define __NR_bpf (__NR_Linux + 319) -#define __NR_execveat (__NR_Linux + 320) -#define __NR_userfaultfd (__NR_Linux + 321) -#define __NR_membarrier (__NR_Linux + 322) -#define __NR_mlock2 (__NR_Linux + 323) -#define __NR_copy_file_range (__NR_Linux + 324) -#define __NR_preadv2 (__NR_Linux + 325) -#define __NR_pwritev2 (__NR_Linux + 326) -#define __NR_pkey_mprotect (__NR_Linux + 327) -#define __NR_pkey_alloc (__NR_Linux + 328) -#define __NR_pkey_free (__NR_Linux + 329) -#define __NR_statx (__NR_Linux + 330) -#define __NR_rseq (__NR_Linux + 331) -#define __NR_io_pgetevents (__NR_Linux + 332) -#define __NR_clock_gettime64 (__NR_Linux + 403) -#define __NR_clock_settime64 (__NR_Linux + 404) -#define __NR_clock_adjtime64 (__NR_Linux + 405) -#define __NR_clock_getres_time64 (__NR_Linux + 406) -#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) -#define __NR_timer_gettime64 (__NR_Linux + 408) -#define __NR_timer_settime64 (__NR_Linux + 409) -#define __NR_timerfd_gettime64 (__NR_Linux + 410) -#define __NR_timerfd_settime64 (__NR_Linux + 411) -#define __NR_utimensat_time64 (__NR_Linux + 412) -#define __NR_pselect6_time64 (__NR_Linux + 413) -#define __NR_ppoll_time64 (__NR_Linux + 414) -#define __NR_io_pgetevents_time64 (__NR_Linux + 416) -#define __NR_recvmmsg_time64 (__NR_Linux + 417) -#define __NR_mq_timedsend_time64 (__NR_Linux + 418) -#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) -#define __NR_semtimedop_time64 (__NR_Linux + 420) -#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) -#define __NR_futex_time64 (__NR_Linux + 422) -#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) -#define __NR_pidfd_send_signal (__NR_Linux + 424) -#define __NR_io_uring_setup (__NR_Linux + 425) -#define __NR_io_uring_enter (__NR_Linux + 426) -#define __NR_io_uring_register (__NR_Linux + 427) -#define __NR_open_tree (__NR_Linux + 428) -#define __NR_move_mount (__NR_Linux + 429) -#define __NR_fsopen (__NR_Linux + 430) -#define __NR_fsconfig (__NR_Linux + 431) -#define __NR_fsmount (__NR_Linux + 432) -#define __NR_fspick (__NR_Linux + 433) -#define __NR_pidfd_open (__NR_Linux + 434) -#define __NR_clone3 (__NR_Linux + 435) -#define __NR_close_range (__NR_Linux + 436) -#define __NR_openat2 (__NR_Linux + 437) -#define __NR_pidfd_getfd (__NR_Linux + 438) -#define __NR_faccessat2 (__NR_Linux + 439) -#define __NR_process_madvise (__NR_Linux + 440) -#define __NR_epoll_pwait2 (__NR_Linux + 441) +#define __NR_read (__NR_Linux + 0) +#define __NR_write (__NR_Linux + 1) +#define __NR_open (__NR_Linux + 2) +#define __NR_close (__NR_Linux + 3) +#define __NR_stat (__NR_Linux + 4) +#define __NR_fstat (__NR_Linux + 5) +#define __NR_lstat (__NR_Linux + 6) +#define __NR_poll (__NR_Linux + 7) +#define __NR_lseek (__NR_Linux + 8) +#define __NR_mmap (__NR_Linux + 9) +#define __NR_mprotect (__NR_Linux + 10) +#define __NR_munmap (__NR_Linux + 11) +#define __NR_brk (__NR_Linux + 12) +#define __NR_rt_sigaction (__NR_Linux + 13) +#define __NR_rt_sigprocmask (__NR_Linux + 14) +#define __NR_ioctl (__NR_Linux + 15) +#define __NR_pread64 (__NR_Linux + 16) +#define __NR_pwrite64 (__NR_Linux + 17) +#define __NR_readv (__NR_Linux + 18) +#define __NR_writev (__NR_Linux + 19) +#define __NR_access (__NR_Linux + 20) +#define __NR_pipe (__NR_Linux + 21) +#define __NR__newselect (__NR_Linux + 22) +#define __NR_sched_yield (__NR_Linux + 23) +#define __NR_mremap (__NR_Linux + 24) +#define __NR_msync (__NR_Linux + 25) +#define __NR_mincore (__NR_Linux + 26) +#define __NR_madvise (__NR_Linux + 27) +#define __NR_shmget (__NR_Linux + 28) +#define __NR_shmat (__NR_Linux + 29) +#define __NR_shmctl (__NR_Linux + 30) +#define __NR_dup (__NR_Linux + 31) +#define __NR_dup2 (__NR_Linux + 32) +#define __NR_pause (__NR_Linux + 33) +#define __NR_nanosleep (__NR_Linux + 34) +#define __NR_getitimer (__NR_Linux + 35) +#define __NR_setitimer (__NR_Linux + 36) +#define __NR_alarm (__NR_Linux + 37) +#define __NR_getpid (__NR_Linux + 38) +#define __NR_sendfile (__NR_Linux + 39) +#define __NR_socket (__NR_Linux + 40) +#define __NR_connect (__NR_Linux + 41) +#define __NR_accept (__NR_Linux + 42) +#define __NR_sendto (__NR_Linux + 43) +#define __NR_recvfrom (__NR_Linux + 44) +#define __NR_sendmsg (__NR_Linux + 45) +#define __NR_recvmsg (__NR_Linux + 46) +#define __NR_shutdown (__NR_Linux + 47) +#define __NR_bind (__NR_Linux + 48) +#define __NR_listen (__NR_Linux + 49) +#define __NR_getsockname (__NR_Linux + 50) +#define __NR_getpeername (__NR_Linux + 51) +#define __NR_socketpair (__NR_Linux + 52) +#define __NR_setsockopt (__NR_Linux + 53) +#define __NR_getsockopt (__NR_Linux + 54) +#define __NR_clone (__NR_Linux + 55) +#define __NR_fork (__NR_Linux + 56) +#define __NR_execve (__NR_Linux + 57) +#define __NR_exit (__NR_Linux + 58) +#define __NR_wait4 (__NR_Linux + 59) +#define __NR_kill (__NR_Linux + 60) +#define __NR_uname (__NR_Linux + 61) +#define __NR_semget (__NR_Linux + 62) +#define __NR_semop (__NR_Linux + 63) +#define __NR_semctl (__NR_Linux + 64) +#define __NR_shmdt (__NR_Linux + 65) +#define __NR_msgget (__NR_Linux + 66) +#define __NR_msgsnd (__NR_Linux + 67) +#define __NR_msgrcv (__NR_Linux + 68) +#define __NR_msgctl (__NR_Linux + 69) +#define __NR_fcntl (__NR_Linux + 70) +#define __NR_flock (__NR_Linux + 71) +#define __NR_fsync (__NR_Linux + 72) +#define __NR_fdatasync (__NR_Linux + 73) +#define __NR_truncate (__NR_Linux + 74) +#define __NR_ftruncate (__NR_Linux + 75) +#define __NR_getdents (__NR_Linux + 76) +#define __NR_getcwd (__NR_Linux + 77) +#define __NR_chdir (__NR_Linux + 78) +#define __NR_fchdir (__NR_Linux + 79) +#define __NR_rename (__NR_Linux + 80) +#define __NR_mkdir (__NR_Linux + 81) +#define __NR_rmdir (__NR_Linux + 82) +#define __NR_creat (__NR_Linux + 83) +#define __NR_link (__NR_Linux + 84) +#define __NR_unlink (__NR_Linux + 85) +#define __NR_symlink (__NR_Linux + 86) +#define __NR_readlink (__NR_Linux + 87) +#define __NR_chmod (__NR_Linux + 88) +#define __NR_fchmod (__NR_Linux + 89) +#define __NR_chown (__NR_Linux + 90) +#define __NR_fchown (__NR_Linux + 91) +#define __NR_lchown (__NR_Linux + 92) +#define __NR_umask (__NR_Linux + 93) +#define __NR_gettimeofday (__NR_Linux + 94) +#define __NR_getrlimit (__NR_Linux + 95) +#define __NR_getrusage (__NR_Linux + 96) +#define __NR_sysinfo (__NR_Linux + 97) +#define __NR_times (__NR_Linux + 98) +#define __NR_ptrace (__NR_Linux + 99) +#define __NR_getuid (__NR_Linux + 100) +#define __NR_syslog (__NR_Linux + 101) +#define __NR_getgid (__NR_Linux + 102) +#define __NR_setuid (__NR_Linux + 103) +#define __NR_setgid (__NR_Linux + 104) +#define __NR_geteuid (__NR_Linux + 105) +#define __NR_getegid (__NR_Linux + 106) +#define __NR_setpgid (__NR_Linux + 107) +#define __NR_getppid (__NR_Linux + 108) +#define __NR_getpgrp (__NR_Linux + 109) +#define __NR_setsid (__NR_Linux + 110) +#define __NR_setreuid (__NR_Linux + 111) +#define __NR_setregid (__NR_Linux + 112) +#define __NR_getgroups (__NR_Linux + 113) +#define __NR_setgroups (__NR_Linux + 114) +#define __NR_setresuid (__NR_Linux + 115) +#define __NR_getresuid (__NR_Linux + 116) +#define __NR_setresgid (__NR_Linux + 117) +#define __NR_getresgid (__NR_Linux + 118) +#define __NR_getpgid (__NR_Linux + 119) +#define __NR_setfsuid (__NR_Linux + 120) +#define __NR_setfsgid (__NR_Linux + 121) +#define __NR_getsid (__NR_Linux + 122) +#define __NR_capget (__NR_Linux + 123) +#define __NR_capset (__NR_Linux + 124) +#define __NR_rt_sigpending (__NR_Linux + 125) +#define __NR_rt_sigtimedwait (__NR_Linux + 126) +#define __NR_rt_sigqueueinfo (__NR_Linux + 127) +#define __NR_rt_sigsuspend (__NR_Linux + 128) +#define __NR_sigaltstack (__NR_Linux + 129) +#define __NR_utime (__NR_Linux + 130) +#define __NR_mknod (__NR_Linux + 131) +#define __NR_personality (__NR_Linux + 132) +#define __NR_ustat (__NR_Linux + 133) +#define __NR_statfs (__NR_Linux + 134) +#define __NR_fstatfs (__NR_Linux + 135) +#define __NR_sysfs (__NR_Linux + 136) +#define __NR_getpriority (__NR_Linux + 137) +#define __NR_setpriority (__NR_Linux + 138) +#define __NR_sched_setparam (__NR_Linux + 139) +#define __NR_sched_getparam (__NR_Linux + 140) +#define __NR_sched_setscheduler (__NR_Linux + 141) +#define __NR_sched_getscheduler (__NR_Linux + 142) +#define __NR_sched_get_priority_max (__NR_Linux + 143) +#define __NR_sched_get_priority_min (__NR_Linux + 144) +#define __NR_sched_rr_get_interval (__NR_Linux + 145) +#define __NR_mlock (__NR_Linux + 146) +#define __NR_munlock (__NR_Linux + 147) +#define __NR_mlockall (__NR_Linux + 148) +#define __NR_munlockall (__NR_Linux + 149) +#define __NR_vhangup (__NR_Linux + 150) +#define __NR_pivot_root (__NR_Linux + 151) +#define __NR__sysctl (__NR_Linux + 152) +#define __NR_prctl (__NR_Linux + 153) +#define __NR_adjtimex (__NR_Linux + 154) +#define __NR_setrlimit (__NR_Linux + 155) +#define __NR_chroot (__NR_Linux + 156) +#define __NR_sync (__NR_Linux + 157) +#define __NR_acct (__NR_Linux + 158) +#define __NR_settimeofday (__NR_Linux + 159) +#define __NR_mount (__NR_Linux + 160) +#define __NR_umount2 (__NR_Linux + 161) +#define __NR_swapon (__NR_Linux + 162) +#define __NR_swapoff (__NR_Linux + 163) +#define __NR_reboot (__NR_Linux + 164) +#define __NR_sethostname (__NR_Linux + 165) +#define __NR_setdomainname (__NR_Linux + 166) +#define __NR_create_module (__NR_Linux + 167) +#define __NR_init_module (__NR_Linux + 168) +#define __NR_delete_module (__NR_Linux + 169) +#define __NR_get_kernel_syms (__NR_Linux + 170) +#define __NR_query_module (__NR_Linux + 171) +#define __NR_quotactl (__NR_Linux + 172) +#define __NR_nfsservctl (__NR_Linux + 173) +#define __NR_getpmsg (__NR_Linux + 174) +#define __NR_putpmsg (__NR_Linux + 175) +#define __NR_afs_syscall (__NR_Linux + 176) +#define __NR_reserved177 (__NR_Linux + 177) +#define __NR_gettid (__NR_Linux + 178) +#define __NR_readahead (__NR_Linux + 179) +#define __NR_setxattr (__NR_Linux + 180) +#define __NR_lsetxattr (__NR_Linux + 181) +#define __NR_fsetxattr (__NR_Linux + 182) +#define __NR_getxattr (__NR_Linux + 183) +#define __NR_lgetxattr (__NR_Linux + 184) +#define __NR_fgetxattr (__NR_Linux + 185) +#define __NR_listxattr (__NR_Linux + 186) +#define __NR_llistxattr (__NR_Linux + 187) +#define __NR_flistxattr (__NR_Linux + 188) +#define __NR_removexattr (__NR_Linux + 189) +#define __NR_lremovexattr (__NR_Linux + 190) +#define __NR_fremovexattr (__NR_Linux + 191) +#define __NR_tkill (__NR_Linux + 192) +#define __NR_reserved193 (__NR_Linux + 193) +#define __NR_futex (__NR_Linux + 194) +#define __NR_sched_setaffinity (__NR_Linux + 195) +#define __NR_sched_getaffinity (__NR_Linux + 196) +#define __NR_cacheflush (__NR_Linux + 197) +#define __NR_cachectl (__NR_Linux + 198) +#define __NR_sysmips (__NR_Linux + 199) +#define __NR_io_setup (__NR_Linux + 200) +#define __NR_io_destroy (__NR_Linux + 201) +#define __NR_io_getevents (__NR_Linux + 202) +#define __NR_io_submit (__NR_Linux + 203) +#define __NR_io_cancel (__NR_Linux + 204) +#define __NR_exit_group (__NR_Linux + 205) +#define __NR_lookup_dcookie (__NR_Linux + 206) +#define __NR_epoll_create (__NR_Linux + 207) +#define __NR_epoll_ctl (__NR_Linux + 208) +#define __NR_epoll_wait (__NR_Linux + 209) +#define __NR_remap_file_pages (__NR_Linux + 210) +#define __NR_rt_sigreturn (__NR_Linux + 211) +#define __NR_fcntl64 (__NR_Linux + 212) +#define __NR_set_tid_address (__NR_Linux + 213) +#define __NR_restart_syscall (__NR_Linux + 214) +#define __NR_semtimedop (__NR_Linux + 215) +#define __NR_fadvise64 (__NR_Linux + 216) +#define __NR_statfs64 (__NR_Linux + 217) +#define __NR_fstatfs64 (__NR_Linux + 218) +#define __NR_sendfile64 (__NR_Linux + 219) +#define __NR_timer_create (__NR_Linux + 220) +#define __NR_timer_settime (__NR_Linux + 221) +#define __NR_timer_gettime (__NR_Linux + 222) +#define __NR_timer_getoverrun (__NR_Linux + 223) +#define __NR_timer_delete (__NR_Linux + 224) +#define __NR_clock_settime (__NR_Linux + 225) +#define __NR_clock_gettime (__NR_Linux + 226) +#define __NR_clock_getres (__NR_Linux + 227) +#define __NR_clock_nanosleep (__NR_Linux + 228) +#define __NR_tgkill (__NR_Linux + 229) +#define __NR_utimes (__NR_Linux + 230) +#define __NR_mbind (__NR_Linux + 231) +#define __NR_get_mempolicy (__NR_Linux + 232) +#define __NR_set_mempolicy (__NR_Linux + 233) +#define __NR_mq_open (__NR_Linux + 234) +#define __NR_mq_unlink (__NR_Linux + 235) +#define __NR_mq_timedsend (__NR_Linux + 236) +#define __NR_mq_timedreceive (__NR_Linux + 237) +#define __NR_mq_notify (__NR_Linux + 238) +#define __NR_mq_getsetattr (__NR_Linux + 239) +#define __NR_vserver (__NR_Linux + 240) +#define __NR_waitid (__NR_Linux + 241) +#define __NR_add_key (__NR_Linux + 243) +#define __NR_request_key (__NR_Linux + 244) +#define __NR_keyctl (__NR_Linux + 245) +#define __NR_set_thread_area (__NR_Linux + 246) +#define __NR_inotify_init (__NR_Linux + 247) +#define __NR_inotify_add_watch (__NR_Linux + 248) +#define __NR_inotify_rm_watch (__NR_Linux + 249) +#define __NR_migrate_pages (__NR_Linux + 250) +#define __NR_openat (__NR_Linux + 251) +#define __NR_mkdirat (__NR_Linux + 252) +#define __NR_mknodat (__NR_Linux + 253) +#define __NR_fchownat (__NR_Linux + 254) +#define __NR_futimesat (__NR_Linux + 255) +#define __NR_newfstatat (__NR_Linux + 256) +#define __NR_unlinkat (__NR_Linux + 257) +#define __NR_renameat (__NR_Linux + 258) +#define __NR_linkat (__NR_Linux + 259) +#define __NR_symlinkat (__NR_Linux + 260) +#define __NR_readlinkat (__NR_Linux + 261) +#define __NR_fchmodat (__NR_Linux + 262) +#define __NR_faccessat (__NR_Linux + 263) +#define __NR_pselect6 (__NR_Linux + 264) +#define __NR_ppoll (__NR_Linux + 265) +#define __NR_unshare (__NR_Linux + 266) +#define __NR_splice (__NR_Linux + 267) +#define __NR_sync_file_range (__NR_Linux + 268) +#define __NR_tee (__NR_Linux + 269) +#define __NR_vmsplice (__NR_Linux + 270) +#define __NR_move_pages (__NR_Linux + 271) +#define __NR_set_robust_list (__NR_Linux + 272) +#define __NR_get_robust_list (__NR_Linux + 273) +#define __NR_kexec_load (__NR_Linux + 274) +#define __NR_getcpu (__NR_Linux + 275) +#define __NR_epoll_pwait (__NR_Linux + 276) +#define __NR_ioprio_set (__NR_Linux + 277) +#define __NR_ioprio_get (__NR_Linux + 278) +#define __NR_utimensat (__NR_Linux + 279) +#define __NR_signalfd (__NR_Linux + 280) +#define __NR_timerfd (__NR_Linux + 281) +#define __NR_eventfd (__NR_Linux + 282) +#define __NR_fallocate (__NR_Linux + 283) +#define __NR_timerfd_create (__NR_Linux + 284) +#define __NR_timerfd_gettime (__NR_Linux + 285) +#define __NR_timerfd_settime (__NR_Linux + 286) +#define __NR_signalfd4 (__NR_Linux + 287) +#define __NR_eventfd2 (__NR_Linux + 288) +#define __NR_epoll_create1 (__NR_Linux + 289) +#define __NR_dup3 (__NR_Linux + 290) +#define __NR_pipe2 (__NR_Linux + 291) +#define __NR_inotify_init1 (__NR_Linux + 292) +#define __NR_preadv (__NR_Linux + 293) +#define __NR_pwritev (__NR_Linux + 294) +#define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) +#define __NR_perf_event_open (__NR_Linux + 296) +#define __NR_accept4 (__NR_Linux + 297) +#define __NR_recvmmsg (__NR_Linux + 298) +#define __NR_getdents64 (__NR_Linux + 299) +#define __NR_fanotify_init (__NR_Linux + 300) +#define __NR_fanotify_mark (__NR_Linux + 301) +#define __NR_prlimit64 (__NR_Linux + 302) +#define __NR_name_to_handle_at (__NR_Linux + 303) +#define __NR_open_by_handle_at (__NR_Linux + 304) +#define __NR_clock_adjtime (__NR_Linux + 305) +#define __NR_syncfs (__NR_Linux + 306) +#define __NR_sendmmsg (__NR_Linux + 307) +#define __NR_setns (__NR_Linux + 308) +#define __NR_process_vm_readv (__NR_Linux + 309) +#define __NR_process_vm_writev (__NR_Linux + 310) +#define __NR_kcmp (__NR_Linux + 311) +#define __NR_finit_module (__NR_Linux + 312) +#define __NR_sched_setattr (__NR_Linux + 313) +#define __NR_sched_getattr (__NR_Linux + 314) +#define __NR_renameat2 (__NR_Linux + 315) +#define __NR_seccomp (__NR_Linux + 316) +#define __NR_getrandom (__NR_Linux + 317) +#define __NR_memfd_create (__NR_Linux + 318) +#define __NR_bpf (__NR_Linux + 319) +#define __NR_execveat (__NR_Linux + 320) +#define __NR_userfaultfd (__NR_Linux + 321) +#define __NR_membarrier (__NR_Linux + 322) +#define __NR_mlock2 (__NR_Linux + 323) +#define __NR_copy_file_range (__NR_Linux + 324) +#define __NR_preadv2 (__NR_Linux + 325) +#define __NR_pwritev2 (__NR_Linux + 326) +#define __NR_pkey_mprotect (__NR_Linux + 327) +#define __NR_pkey_alloc (__NR_Linux + 328) +#define __NR_pkey_free (__NR_Linux + 329) +#define __NR_statx (__NR_Linux + 330) +#define __NR_rseq (__NR_Linux + 331) +#define __NR_io_pgetevents (__NR_Linux + 332) +#define __NR_clock_gettime64 (__NR_Linux + 403) +#define __NR_clock_settime64 (__NR_Linux + 404) +#define __NR_clock_adjtime64 (__NR_Linux + 405) +#define __NR_clock_getres_time64 (__NR_Linux + 406) +#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) +#define __NR_timer_gettime64 (__NR_Linux + 408) +#define __NR_timer_settime64 (__NR_Linux + 409) +#define __NR_timerfd_gettime64 (__NR_Linux + 410) +#define __NR_timerfd_settime64 (__NR_Linux + 411) +#define __NR_utimensat_time64 (__NR_Linux + 412) +#define __NR_pselect6_time64 (__NR_Linux + 413) +#define __NR_ppoll_time64 (__NR_Linux + 414) +#define __NR_io_pgetevents_time64 (__NR_Linux + 416) +#define __NR_recvmmsg_time64 (__NR_Linux + 417) +#define __NR_mq_timedsend_time64 (__NR_Linux + 418) +#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) +#define __NR_semtimedop_time64 (__NR_Linux + 420) +#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) +#define __NR_futex_time64 (__NR_Linux + 422) +#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) +#define __NR_pidfd_send_signal (__NR_Linux + 424) +#define __NR_io_uring_setup (__NR_Linux + 425) +#define __NR_io_uring_enter (__NR_Linux + 426) +#define __NR_io_uring_register (__NR_Linux + 427) +#define __NR_open_tree (__NR_Linux + 428) +#define __NR_move_mount (__NR_Linux + 429) +#define __NR_fsopen (__NR_Linux + 430) +#define __NR_fsconfig (__NR_Linux + 431) +#define __NR_fsmount (__NR_Linux + 432) +#define __NR_fspick (__NR_Linux + 433) +#define __NR_pidfd_open (__NR_Linux + 434) +#define __NR_clone3 (__NR_Linux + 435) +#define __NR_close_range (__NR_Linux + 436) +#define __NR_openat2 (__NR_Linux + 437) +#define __NR_pidfd_getfd (__NR_Linux + 438) +#define __NR_faccessat2 (__NR_Linux + 439) +#define __NR_process_madvise (__NR_Linux + 440) +#define __NR_epoll_pwait2 (__NR_Linux + 441) +#define __NR_mount_setattr (__NR_Linux + 442) +#define __NR_quotactl_fd (__NR_Linux + 443) +#define __NR_landlock_create_ruleset (__NR_Linux + 444) +#define __NR_landlock_add_rule (__NR_Linux + 445) +#define __NR_landlock_restrict_self (__NR_Linux + 446) - -#endif /* _ASM_MIPS_UNISTD_N32_H */ +#endif /* _ASM_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 683558a7f8a..780e0cead66 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -1,352 +1,356 @@ -#ifndef _ASM_MIPS_UNISTD_N64_H -#define _ASM_MIPS_UNISTD_N64_H +#ifndef _ASM_UNISTD_N64_H +#define _ASM_UNISTD_N64_H -#define __NR_read (__NR_Linux + 0) -#define __NR_write (__NR_Linux + 1) -#define __NR_open (__NR_Linux + 2) -#define __NR_close (__NR_Linux + 3) -#define __NR_stat (__NR_Linux + 4) -#define __NR_fstat (__NR_Linux + 5) -#define __NR_lstat (__NR_Linux + 6) -#define __NR_poll (__NR_Linux + 7) -#define __NR_lseek (__NR_Linux + 8) -#define __NR_mmap (__NR_Linux + 9) -#define __NR_mprotect (__NR_Linux + 10) -#define __NR_munmap (__NR_Linux + 11) -#define __NR_brk (__NR_Linux + 12) -#define __NR_rt_sigaction (__NR_Linux + 13) -#define __NR_rt_sigprocmask (__NR_Linux + 14) -#define __NR_ioctl (__NR_Linux + 15) -#define __NR_pread64 (__NR_Linux + 16) -#define __NR_pwrite64 (__NR_Linux + 17) -#define __NR_readv (__NR_Linux + 18) -#define __NR_writev (__NR_Linux + 19) -#define __NR_access (__NR_Linux + 20) -#define __NR_pipe (__NR_Linux + 21) -#define __NR__newselect (__NR_Linux + 22) -#define __NR_sched_yield (__NR_Linux + 23) -#define __NR_mremap (__NR_Linux + 24) -#define __NR_msync (__NR_Linux + 25) -#define __NR_mincore (__NR_Linux + 26) -#define __NR_madvise (__NR_Linux + 27) -#define __NR_shmget (__NR_Linux + 28) -#define __NR_shmat (__NR_Linux + 29) -#define __NR_shmctl (__NR_Linux + 30) -#define __NR_dup (__NR_Linux + 31) -#define __NR_dup2 (__NR_Linux + 32) -#define __NR_pause (__NR_Linux + 33) -#define __NR_nanosleep (__NR_Linux + 34) -#define __NR_getitimer (__NR_Linux + 35) -#define __NR_setitimer (__NR_Linux + 36) -#define __NR_alarm (__NR_Linux + 37) -#define __NR_getpid (__NR_Linux + 38) -#define __NR_sendfile (__NR_Linux + 39) -#define __NR_socket (__NR_Linux + 40) -#define __NR_connect (__NR_Linux + 41) -#define __NR_accept (__NR_Linux + 42) -#define __NR_sendto (__NR_Linux + 43) -#define __NR_recvfrom (__NR_Linux + 44) -#define __NR_sendmsg (__NR_Linux + 45) -#define __NR_recvmsg (__NR_Linux + 46) -#define __NR_shutdown (__NR_Linux + 47) -#define __NR_bind (__NR_Linux + 48) -#define __NR_listen (__NR_Linux + 49) -#define __NR_getsockname (__NR_Linux + 50) -#define __NR_getpeername (__NR_Linux + 51) -#define __NR_socketpair (__NR_Linux + 52) -#define __NR_setsockopt (__NR_Linux + 53) -#define __NR_getsockopt (__NR_Linux + 54) -#define __NR_clone (__NR_Linux + 55) -#define __NR_fork (__NR_Linux + 56) -#define __NR_execve (__NR_Linux + 57) -#define __NR_exit (__NR_Linux + 58) -#define __NR_wait4 (__NR_Linux + 59) -#define __NR_kill (__NR_Linux + 60) -#define __NR_uname (__NR_Linux + 61) -#define __NR_semget (__NR_Linux + 62) -#define __NR_semop (__NR_Linux + 63) -#define __NR_semctl (__NR_Linux + 64) -#define __NR_shmdt (__NR_Linux + 65) -#define __NR_msgget (__NR_Linux + 66) -#define __NR_msgsnd (__NR_Linux + 67) -#define __NR_msgrcv (__NR_Linux + 68) -#define __NR_msgctl (__NR_Linux + 69) -#define __NR_fcntl (__NR_Linux + 70) -#define __NR_flock (__NR_Linux + 71) -#define __NR_fsync (__NR_Linux + 72) -#define __NR_fdatasync (__NR_Linux + 73) -#define __NR_truncate (__NR_Linux + 74) -#define __NR_ftruncate (__NR_Linux + 75) -#define __NR_getdents (__NR_Linux + 76) -#define __NR_getcwd (__NR_Linux + 77) -#define __NR_chdir (__NR_Linux + 78) -#define __NR_fchdir (__NR_Linux + 79) -#define __NR_rename (__NR_Linux + 80) -#define __NR_mkdir (__NR_Linux + 81) -#define __NR_rmdir (__NR_Linux + 82) -#define __NR_creat (__NR_Linux + 83) -#define __NR_link (__NR_Linux + 84) -#define __NR_unlink (__NR_Linux + 85) -#define __NR_symlink (__NR_Linux + 86) -#define __NR_readlink (__NR_Linux + 87) -#define __NR_chmod (__NR_Linux + 88) -#define __NR_fchmod (__NR_Linux + 89) -#define __NR_chown (__NR_Linux + 90) -#define __NR_fchown (__NR_Linux + 91) -#define __NR_lchown (__NR_Linux + 92) -#define __NR_umask (__NR_Linux + 93) -#define __NR_gettimeofday (__NR_Linux + 94) -#define __NR_getrlimit (__NR_Linux + 95) -#define __NR_getrusage (__NR_Linux + 96) -#define __NR_sysinfo (__NR_Linux + 97) -#define __NR_times (__NR_Linux + 98) -#define __NR_ptrace (__NR_Linux + 99) -#define __NR_getuid (__NR_Linux + 100) -#define __NR_syslog (__NR_Linux + 101) -#define __NR_getgid (__NR_Linux + 102) -#define __NR_setuid (__NR_Linux + 103) -#define __NR_setgid (__NR_Linux + 104) -#define __NR_geteuid (__NR_Linux + 105) -#define __NR_getegid (__NR_Linux + 106) -#define __NR_setpgid (__NR_Linux + 107) -#define __NR_getppid (__NR_Linux + 108) -#define __NR_getpgrp (__NR_Linux + 109) -#define __NR_setsid (__NR_Linux + 110) -#define __NR_setreuid (__NR_Linux + 111) -#define __NR_setregid (__NR_Linux + 112) -#define __NR_getgroups (__NR_Linux + 113) -#define __NR_setgroups (__NR_Linux + 114) -#define __NR_setresuid (__NR_Linux + 115) -#define __NR_getresuid (__NR_Linux + 116) -#define __NR_setresgid (__NR_Linux + 117) -#define __NR_getresgid (__NR_Linux + 118) -#define __NR_getpgid (__NR_Linux + 119) -#define __NR_setfsuid (__NR_Linux + 120) -#define __NR_setfsgid (__NR_Linux + 121) -#define __NR_getsid (__NR_Linux + 122) -#define __NR_capget (__NR_Linux + 123) -#define __NR_capset (__NR_Linux + 124) -#define __NR_rt_sigpending (__NR_Linux + 125) -#define __NR_rt_sigtimedwait (__NR_Linux + 126) -#define __NR_rt_sigqueueinfo (__NR_Linux + 127) -#define __NR_rt_sigsuspend (__NR_Linux + 128) -#define __NR_sigaltstack (__NR_Linux + 129) -#define __NR_utime (__NR_Linux + 130) -#define __NR_mknod (__NR_Linux + 131) -#define __NR_personality (__NR_Linux + 132) -#define __NR_ustat (__NR_Linux + 133) -#define __NR_statfs (__NR_Linux + 134) -#define __NR_fstatfs (__NR_Linux + 135) -#define __NR_sysfs (__NR_Linux + 136) -#define __NR_getpriority (__NR_Linux + 137) -#define __NR_setpriority (__NR_Linux + 138) -#define __NR_sched_setparam (__NR_Linux + 139) -#define __NR_sched_getparam (__NR_Linux + 140) -#define __NR_sched_setscheduler (__NR_Linux + 141) -#define __NR_sched_getscheduler (__NR_Linux + 142) -#define __NR_sched_get_priority_max (__NR_Linux + 143) -#define __NR_sched_get_priority_min (__NR_Linux + 144) -#define __NR_sched_rr_get_interval (__NR_Linux + 145) -#define __NR_mlock (__NR_Linux + 146) -#define __NR_munlock (__NR_Linux + 147) -#define __NR_mlockall (__NR_Linux + 148) -#define __NR_munlockall (__NR_Linux + 149) -#define __NR_vhangup (__NR_Linux + 150) -#define __NR_pivot_root (__NR_Linux + 151) -#define __NR__sysctl (__NR_Linux + 152) -#define __NR_prctl (__NR_Linux + 153) -#define __NR_adjtimex (__NR_Linux + 154) -#define __NR_setrlimit (__NR_Linux + 155) -#define __NR_chroot (__NR_Linux + 156) -#define __NR_sync (__NR_Linux + 157) -#define __NR_acct (__NR_Linux + 158) -#define __NR_settimeofday (__NR_Linux + 159) -#define __NR_mount (__NR_Linux + 160) -#define __NR_umount2 (__NR_Linux + 161) -#define __NR_swapon (__NR_Linux + 162) -#define __NR_swapoff (__NR_Linux + 163) -#define __NR_reboot (__NR_Linux + 164) -#define __NR_sethostname (__NR_Linux + 165) -#define __NR_setdomainname (__NR_Linux + 166) -#define __NR_create_module (__NR_Linux + 167) -#define __NR_init_module (__NR_Linux + 168) -#define __NR_delete_module (__NR_Linux + 169) -#define __NR_get_kernel_syms (__NR_Linux + 170) -#define __NR_query_module (__NR_Linux + 171) -#define __NR_quotactl (__NR_Linux + 172) -#define __NR_nfsservctl (__NR_Linux + 173) -#define __NR_getpmsg (__NR_Linux + 174) -#define __NR_putpmsg (__NR_Linux + 175) -#define __NR_afs_syscall (__NR_Linux + 176) -#define __NR_reserved177 (__NR_Linux + 177) -#define __NR_gettid (__NR_Linux + 178) -#define __NR_readahead (__NR_Linux + 179) -#define __NR_setxattr (__NR_Linux + 180) -#define __NR_lsetxattr (__NR_Linux + 181) -#define __NR_fsetxattr (__NR_Linux + 182) -#define __NR_getxattr (__NR_Linux + 183) -#define __NR_lgetxattr (__NR_Linux + 184) -#define __NR_fgetxattr (__NR_Linux + 185) -#define __NR_listxattr (__NR_Linux + 186) -#define __NR_llistxattr (__NR_Linux + 187) -#define __NR_flistxattr (__NR_Linux + 188) -#define __NR_removexattr (__NR_Linux + 189) -#define __NR_lremovexattr (__NR_Linux + 190) -#define __NR_fremovexattr (__NR_Linux + 191) -#define __NR_tkill (__NR_Linux + 192) -#define __NR_reserved193 (__NR_Linux + 193) -#define __NR_futex (__NR_Linux + 194) -#define __NR_sched_setaffinity (__NR_Linux + 195) -#define __NR_sched_getaffinity (__NR_Linux + 196) -#define __NR_cacheflush (__NR_Linux + 197) -#define __NR_cachectl (__NR_Linux + 198) -#define __NR_sysmips (__NR_Linux + 199) -#define __NR_io_setup (__NR_Linux + 200) -#define __NR_io_destroy (__NR_Linux + 201) -#define __NR_io_getevents (__NR_Linux + 202) -#define __NR_io_submit (__NR_Linux + 203) -#define __NR_io_cancel (__NR_Linux + 204) -#define __NR_exit_group (__NR_Linux + 205) -#define __NR_lookup_dcookie (__NR_Linux + 206) -#define __NR_epoll_create (__NR_Linux + 207) -#define __NR_epoll_ctl (__NR_Linux + 208) -#define __NR_epoll_wait (__NR_Linux + 209) -#define __NR_remap_file_pages (__NR_Linux + 210) -#define __NR_rt_sigreturn (__NR_Linux + 211) -#define __NR_set_tid_address (__NR_Linux + 212) -#define __NR_restart_syscall (__NR_Linux + 213) -#define __NR_semtimedop (__NR_Linux + 214) -#define __NR_fadvise64 (__NR_Linux + 215) -#define __NR_timer_create (__NR_Linux + 216) -#define __NR_timer_settime (__NR_Linux + 217) -#define __NR_timer_gettime (__NR_Linux + 218) -#define __NR_timer_getoverrun (__NR_Linux + 219) -#define __NR_timer_delete (__NR_Linux + 220) -#define __NR_clock_settime (__NR_Linux + 221) -#define __NR_clock_gettime (__NR_Linux + 222) -#define __NR_clock_getres (__NR_Linux + 223) -#define __NR_clock_nanosleep (__NR_Linux + 224) -#define __NR_tgkill (__NR_Linux + 225) -#define __NR_utimes (__NR_Linux + 226) -#define __NR_mbind (__NR_Linux + 227) -#define __NR_get_mempolicy (__NR_Linux + 228) -#define __NR_set_mempolicy (__NR_Linux + 229) -#define __NR_mq_open (__NR_Linux + 230) -#define __NR_mq_unlink (__NR_Linux + 231) -#define __NR_mq_timedsend (__NR_Linux + 232) -#define __NR_mq_timedreceive (__NR_Linux + 233) -#define __NR_mq_notify (__NR_Linux + 234) -#define __NR_mq_getsetattr (__NR_Linux + 235) -#define __NR_vserver (__NR_Linux + 236) -#define __NR_waitid (__NR_Linux + 237) -#define __NR_add_key (__NR_Linux + 239) -#define __NR_request_key (__NR_Linux + 240) -#define __NR_keyctl (__NR_Linux + 241) -#define __NR_set_thread_area (__NR_Linux + 242) -#define __NR_inotify_init (__NR_Linux + 243) -#define __NR_inotify_add_watch (__NR_Linux + 244) -#define __NR_inotify_rm_watch (__NR_Linux + 245) -#define __NR_migrate_pages (__NR_Linux + 246) -#define __NR_openat (__NR_Linux + 247) -#define __NR_mkdirat (__NR_Linux + 248) -#define __NR_mknodat (__NR_Linux + 249) -#define __NR_fchownat (__NR_Linux + 250) -#define __NR_futimesat (__NR_Linux + 251) -#define __NR_newfstatat (__NR_Linux + 252) -#define __NR_unlinkat (__NR_Linux + 253) -#define __NR_renameat (__NR_Linux + 254) -#define __NR_linkat (__NR_Linux + 255) -#define __NR_symlinkat (__NR_Linux + 256) -#define __NR_readlinkat (__NR_Linux + 257) -#define __NR_fchmodat (__NR_Linux + 258) -#define __NR_faccessat (__NR_Linux + 259) -#define __NR_pselect6 (__NR_Linux + 260) -#define __NR_ppoll (__NR_Linux + 261) -#define __NR_unshare (__NR_Linux + 262) -#define __NR_splice (__NR_Linux + 263) -#define __NR_sync_file_range (__NR_Linux + 264) -#define __NR_tee (__NR_Linux + 265) -#define __NR_vmsplice (__NR_Linux + 266) -#define __NR_move_pages (__NR_Linux + 267) -#define __NR_set_robust_list (__NR_Linux + 268) -#define __NR_get_robust_list (__NR_Linux + 269) -#define __NR_kexec_load (__NR_Linux + 270) -#define __NR_getcpu (__NR_Linux + 271) -#define __NR_epoll_pwait (__NR_Linux + 272) -#define __NR_ioprio_set (__NR_Linux + 273) -#define __NR_ioprio_get (__NR_Linux + 274) -#define __NR_utimensat (__NR_Linux + 275) -#define __NR_signalfd (__NR_Linux + 276) -#define __NR_timerfd (__NR_Linux + 277) -#define __NR_eventfd (__NR_Linux + 278) -#define __NR_fallocate (__NR_Linux + 279) -#define __NR_timerfd_create (__NR_Linux + 280) -#define __NR_timerfd_gettime (__NR_Linux + 281) -#define __NR_timerfd_settime (__NR_Linux + 282) -#define __NR_signalfd4 (__NR_Linux + 283) -#define __NR_eventfd2 (__NR_Linux + 284) -#define __NR_epoll_create1 (__NR_Linux + 285) -#define __NR_dup3 (__NR_Linux + 286) -#define __NR_pipe2 (__NR_Linux + 287) -#define __NR_inotify_init1 (__NR_Linux + 288) -#define __NR_preadv (__NR_Linux + 289) -#define __NR_pwritev (__NR_Linux + 290) -#define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) -#define __NR_perf_event_open (__NR_Linux + 292) -#define __NR_accept4 (__NR_Linux + 293) -#define __NR_recvmmsg (__NR_Linux + 294) -#define __NR_fanotify_init (__NR_Linux + 295) -#define __NR_fanotify_mark (__NR_Linux + 296) -#define __NR_prlimit64 (__NR_Linux + 297) -#define __NR_name_to_handle_at (__NR_Linux + 298) -#define __NR_open_by_handle_at (__NR_Linux + 299) -#define __NR_clock_adjtime (__NR_Linux + 300) -#define __NR_syncfs (__NR_Linux + 301) -#define __NR_sendmmsg (__NR_Linux + 302) -#define __NR_setns (__NR_Linux + 303) -#define __NR_process_vm_readv (__NR_Linux + 304) -#define __NR_process_vm_writev (__NR_Linux + 305) -#define __NR_kcmp (__NR_Linux + 306) -#define __NR_finit_module (__NR_Linux + 307) -#define __NR_getdents64 (__NR_Linux + 308) -#define __NR_sched_setattr (__NR_Linux + 309) -#define __NR_sched_getattr (__NR_Linux + 310) -#define __NR_renameat2 (__NR_Linux + 311) -#define __NR_seccomp (__NR_Linux + 312) -#define __NR_getrandom (__NR_Linux + 313) -#define __NR_memfd_create (__NR_Linux + 314) -#define __NR_bpf (__NR_Linux + 315) -#define __NR_execveat (__NR_Linux + 316) -#define __NR_userfaultfd (__NR_Linux + 317) -#define __NR_membarrier (__NR_Linux + 318) -#define __NR_mlock2 (__NR_Linux + 319) -#define __NR_copy_file_range (__NR_Linux + 320) -#define __NR_preadv2 (__NR_Linux + 321) -#define __NR_pwritev2 (__NR_Linux + 322) -#define __NR_pkey_mprotect (__NR_Linux + 323) -#define __NR_pkey_alloc (__NR_Linux + 324) -#define __NR_pkey_free (__NR_Linux + 325) -#define __NR_statx (__NR_Linux + 326) -#define __NR_rseq (__NR_Linux + 327) -#define __NR_io_pgetevents (__NR_Linux + 328) -#define __NR_pidfd_send_signal (__NR_Linux + 424) -#define __NR_io_uring_setup (__NR_Linux + 425) -#define __NR_io_uring_enter (__NR_Linux + 426) -#define __NR_io_uring_register (__NR_Linux + 427) -#define __NR_open_tree (__NR_Linux + 428) -#define __NR_move_mount (__NR_Linux + 429) -#define __NR_fsopen (__NR_Linux + 430) -#define __NR_fsconfig (__NR_Linux + 431) -#define __NR_fsmount (__NR_Linux + 432) -#define __NR_fspick (__NR_Linux + 433) -#define __NR_pidfd_open (__NR_Linux + 434) -#define __NR_clone3 (__NR_Linux + 435) -#define __NR_close_range (__NR_Linux + 436) -#define __NR_openat2 (__NR_Linux + 437) -#define __NR_pidfd_getfd (__NR_Linux + 438) -#define __NR_faccessat2 (__NR_Linux + 439) -#define __NR_process_madvise (__NR_Linux + 440) -#define __NR_epoll_pwait2 (__NR_Linux + 441) +#define __NR_read (__NR_Linux + 0) +#define __NR_write (__NR_Linux + 1) +#define __NR_open (__NR_Linux + 2) +#define __NR_close (__NR_Linux + 3) +#define __NR_stat (__NR_Linux + 4) +#define __NR_fstat (__NR_Linux + 5) +#define __NR_lstat (__NR_Linux + 6) +#define __NR_poll (__NR_Linux + 7) +#define __NR_lseek (__NR_Linux + 8) +#define __NR_mmap (__NR_Linux + 9) +#define __NR_mprotect (__NR_Linux + 10) +#define __NR_munmap (__NR_Linux + 11) +#define __NR_brk (__NR_Linux + 12) +#define __NR_rt_sigaction (__NR_Linux + 13) +#define __NR_rt_sigprocmask (__NR_Linux + 14) +#define __NR_ioctl (__NR_Linux + 15) +#define __NR_pread64 (__NR_Linux + 16) +#define __NR_pwrite64 (__NR_Linux + 17) +#define __NR_readv (__NR_Linux + 18) +#define __NR_writev (__NR_Linux + 19) +#define __NR_access (__NR_Linux + 20) +#define __NR_pipe (__NR_Linux + 21) +#define __NR__newselect (__NR_Linux + 22) +#define __NR_sched_yield (__NR_Linux + 23) +#define __NR_mremap (__NR_Linux + 24) +#define __NR_msync (__NR_Linux + 25) +#define __NR_mincore (__NR_Linux + 26) +#define __NR_madvise (__NR_Linux + 27) +#define __NR_shmget (__NR_Linux + 28) +#define __NR_shmat (__NR_Linux + 29) +#define __NR_shmctl (__NR_Linux + 30) +#define __NR_dup (__NR_Linux + 31) +#define __NR_dup2 (__NR_Linux + 32) +#define __NR_pause (__NR_Linux + 33) +#define __NR_nanosleep (__NR_Linux + 34) +#define __NR_getitimer (__NR_Linux + 35) +#define __NR_setitimer (__NR_Linux + 36) +#define __NR_alarm (__NR_Linux + 37) +#define __NR_getpid (__NR_Linux + 38) +#define __NR_sendfile (__NR_Linux + 39) +#define __NR_socket (__NR_Linux + 40) +#define __NR_connect (__NR_Linux + 41) +#define __NR_accept (__NR_Linux + 42) +#define __NR_sendto (__NR_Linux + 43) +#define __NR_recvfrom (__NR_Linux + 44) +#define __NR_sendmsg (__NR_Linux + 45) +#define __NR_recvmsg (__NR_Linux + 46) +#define __NR_shutdown (__NR_Linux + 47) +#define __NR_bind (__NR_Linux + 48) +#define __NR_listen (__NR_Linux + 49) +#define __NR_getsockname (__NR_Linux + 50) +#define __NR_getpeername (__NR_Linux + 51) +#define __NR_socketpair (__NR_Linux + 52) +#define __NR_setsockopt (__NR_Linux + 53) +#define __NR_getsockopt (__NR_Linux + 54) +#define __NR_clone (__NR_Linux + 55) +#define __NR_fork (__NR_Linux + 56) +#define __NR_execve (__NR_Linux + 57) +#define __NR_exit (__NR_Linux + 58) +#define __NR_wait4 (__NR_Linux + 59) +#define __NR_kill (__NR_Linux + 60) +#define __NR_uname (__NR_Linux + 61) +#define __NR_semget (__NR_Linux + 62) +#define __NR_semop (__NR_Linux + 63) +#define __NR_semctl (__NR_Linux + 64) +#define __NR_shmdt (__NR_Linux + 65) +#define __NR_msgget (__NR_Linux + 66) +#define __NR_msgsnd (__NR_Linux + 67) +#define __NR_msgrcv (__NR_Linux + 68) +#define __NR_msgctl (__NR_Linux + 69) +#define __NR_fcntl (__NR_Linux + 70) +#define __NR_flock (__NR_Linux + 71) +#define __NR_fsync (__NR_Linux + 72) +#define __NR_fdatasync (__NR_Linux + 73) +#define __NR_truncate (__NR_Linux + 74) +#define __NR_ftruncate (__NR_Linux + 75) +#define __NR_getdents (__NR_Linux + 76) +#define __NR_getcwd (__NR_Linux + 77) +#define __NR_chdir (__NR_Linux + 78) +#define __NR_fchdir (__NR_Linux + 79) +#define __NR_rename (__NR_Linux + 80) +#define __NR_mkdir (__NR_Linux + 81) +#define __NR_rmdir (__NR_Linux + 82) +#define __NR_creat (__NR_Linux + 83) +#define __NR_link (__NR_Linux + 84) +#define __NR_unlink (__NR_Linux + 85) +#define __NR_symlink (__NR_Linux + 86) +#define __NR_readlink (__NR_Linux + 87) +#define __NR_chmod (__NR_Linux + 88) +#define __NR_fchmod (__NR_Linux + 89) +#define __NR_chown (__NR_Linux + 90) +#define __NR_fchown (__NR_Linux + 91) +#define __NR_lchown (__NR_Linux + 92) +#define __NR_umask (__NR_Linux + 93) +#define __NR_gettimeofday (__NR_Linux + 94) +#define __NR_getrlimit (__NR_Linux + 95) +#define __NR_getrusage (__NR_Linux + 96) +#define __NR_sysinfo (__NR_Linux + 97) +#define __NR_times (__NR_Linux + 98) +#define __NR_ptrace (__NR_Linux + 99) +#define __NR_getuid (__NR_Linux + 100) +#define __NR_syslog (__NR_Linux + 101) +#define __NR_getgid (__NR_Linux + 102) +#define __NR_setuid (__NR_Linux + 103) +#define __NR_setgid (__NR_Linux + 104) +#define __NR_geteuid (__NR_Linux + 105) +#define __NR_getegid (__NR_Linux + 106) +#define __NR_setpgid (__NR_Linux + 107) +#define __NR_getppid (__NR_Linux + 108) +#define __NR_getpgrp (__NR_Linux + 109) +#define __NR_setsid (__NR_Linux + 110) +#define __NR_setreuid (__NR_Linux + 111) +#define __NR_setregid (__NR_Linux + 112) +#define __NR_getgroups (__NR_Linux + 113) +#define __NR_setgroups (__NR_Linux + 114) +#define __NR_setresuid (__NR_Linux + 115) +#define __NR_getresuid (__NR_Linux + 116) +#define __NR_setresgid (__NR_Linux + 117) +#define __NR_getresgid (__NR_Linux + 118) +#define __NR_getpgid (__NR_Linux + 119) +#define __NR_setfsuid (__NR_Linux + 120) +#define __NR_setfsgid (__NR_Linux + 121) +#define __NR_getsid (__NR_Linux + 122) +#define __NR_capget (__NR_Linux + 123) +#define __NR_capset (__NR_Linux + 124) +#define __NR_rt_sigpending (__NR_Linux + 125) +#define __NR_rt_sigtimedwait (__NR_Linux + 126) +#define __NR_rt_sigqueueinfo (__NR_Linux + 127) +#define __NR_rt_sigsuspend (__NR_Linux + 128) +#define __NR_sigaltstack (__NR_Linux + 129) +#define __NR_utime (__NR_Linux + 130) +#define __NR_mknod (__NR_Linux + 131) +#define __NR_personality (__NR_Linux + 132) +#define __NR_ustat (__NR_Linux + 133) +#define __NR_statfs (__NR_Linux + 134) +#define __NR_fstatfs (__NR_Linux + 135) +#define __NR_sysfs (__NR_Linux + 136) +#define __NR_getpriority (__NR_Linux + 137) +#define __NR_setpriority (__NR_Linux + 138) +#define __NR_sched_setparam (__NR_Linux + 139) +#define __NR_sched_getparam (__NR_Linux + 140) +#define __NR_sched_setscheduler (__NR_Linux + 141) +#define __NR_sched_getscheduler (__NR_Linux + 142) +#define __NR_sched_get_priority_max (__NR_Linux + 143) +#define __NR_sched_get_priority_min (__NR_Linux + 144) +#define __NR_sched_rr_get_interval (__NR_Linux + 145) +#define __NR_mlock (__NR_Linux + 146) +#define __NR_munlock (__NR_Linux + 147) +#define __NR_mlockall (__NR_Linux + 148) +#define __NR_munlockall (__NR_Linux + 149) +#define __NR_vhangup (__NR_Linux + 150) +#define __NR_pivot_root (__NR_Linux + 151) +#define __NR__sysctl (__NR_Linux + 152) +#define __NR_prctl (__NR_Linux + 153) +#define __NR_adjtimex (__NR_Linux + 154) +#define __NR_setrlimit (__NR_Linux + 155) +#define __NR_chroot (__NR_Linux + 156) +#define __NR_sync (__NR_Linux + 157) +#define __NR_acct (__NR_Linux + 158) +#define __NR_settimeofday (__NR_Linux + 159) +#define __NR_mount (__NR_Linux + 160) +#define __NR_umount2 (__NR_Linux + 161) +#define __NR_swapon (__NR_Linux + 162) +#define __NR_swapoff (__NR_Linux + 163) +#define __NR_reboot (__NR_Linux + 164) +#define __NR_sethostname (__NR_Linux + 165) +#define __NR_setdomainname (__NR_Linux + 166) +#define __NR_create_module (__NR_Linux + 167) +#define __NR_init_module (__NR_Linux + 168) +#define __NR_delete_module (__NR_Linux + 169) +#define __NR_get_kernel_syms (__NR_Linux + 170) +#define __NR_query_module (__NR_Linux + 171) +#define __NR_quotactl (__NR_Linux + 172) +#define __NR_nfsservctl (__NR_Linux + 173) +#define __NR_getpmsg (__NR_Linux + 174) +#define __NR_putpmsg (__NR_Linux + 175) +#define __NR_afs_syscall (__NR_Linux + 176) +#define __NR_reserved177 (__NR_Linux + 177) +#define __NR_gettid (__NR_Linux + 178) +#define __NR_readahead (__NR_Linux + 179) +#define __NR_setxattr (__NR_Linux + 180) +#define __NR_lsetxattr (__NR_Linux + 181) +#define __NR_fsetxattr (__NR_Linux + 182) +#define __NR_getxattr (__NR_Linux + 183) +#define __NR_lgetxattr (__NR_Linux + 184) +#define __NR_fgetxattr (__NR_Linux + 185) +#define __NR_listxattr (__NR_Linux + 186) +#define __NR_llistxattr (__NR_Linux + 187) +#define __NR_flistxattr (__NR_Linux + 188) +#define __NR_removexattr (__NR_Linux + 189) +#define __NR_lremovexattr (__NR_Linux + 190) +#define __NR_fremovexattr (__NR_Linux + 191) +#define __NR_tkill (__NR_Linux + 192) +#define __NR_reserved193 (__NR_Linux + 193) +#define __NR_futex (__NR_Linux + 194) +#define __NR_sched_setaffinity (__NR_Linux + 195) +#define __NR_sched_getaffinity (__NR_Linux + 196) +#define __NR_cacheflush (__NR_Linux + 197) +#define __NR_cachectl (__NR_Linux + 198) +#define __NR_sysmips (__NR_Linux + 199) +#define __NR_io_setup (__NR_Linux + 200) +#define __NR_io_destroy (__NR_Linux + 201) +#define __NR_io_getevents (__NR_Linux + 202) +#define __NR_io_submit (__NR_Linux + 203) +#define __NR_io_cancel (__NR_Linux + 204) +#define __NR_exit_group (__NR_Linux + 205) +#define __NR_lookup_dcookie (__NR_Linux + 206) +#define __NR_epoll_create (__NR_Linux + 207) +#define __NR_epoll_ctl (__NR_Linux + 208) +#define __NR_epoll_wait (__NR_Linux + 209) +#define __NR_remap_file_pages (__NR_Linux + 210) +#define __NR_rt_sigreturn (__NR_Linux + 211) +#define __NR_set_tid_address (__NR_Linux + 212) +#define __NR_restart_syscall (__NR_Linux + 213) +#define __NR_semtimedop (__NR_Linux + 214) +#define __NR_fadvise64 (__NR_Linux + 215) +#define __NR_timer_create (__NR_Linux + 216) +#define __NR_timer_settime (__NR_Linux + 217) +#define __NR_timer_gettime (__NR_Linux + 218) +#define __NR_timer_getoverrun (__NR_Linux + 219) +#define __NR_timer_delete (__NR_Linux + 220) +#define __NR_clock_settime (__NR_Linux + 221) +#define __NR_clock_gettime (__NR_Linux + 222) +#define __NR_clock_getres (__NR_Linux + 223) +#define __NR_clock_nanosleep (__NR_Linux + 224) +#define __NR_tgkill (__NR_Linux + 225) +#define __NR_utimes (__NR_Linux + 226) +#define __NR_mbind (__NR_Linux + 227) +#define __NR_get_mempolicy (__NR_Linux + 228) +#define __NR_set_mempolicy (__NR_Linux + 229) +#define __NR_mq_open (__NR_Linux + 230) +#define __NR_mq_unlink (__NR_Linux + 231) +#define __NR_mq_timedsend (__NR_Linux + 232) +#define __NR_mq_timedreceive (__NR_Linux + 233) +#define __NR_mq_notify (__NR_Linux + 234) +#define __NR_mq_getsetattr (__NR_Linux + 235) +#define __NR_vserver (__NR_Linux + 236) +#define __NR_waitid (__NR_Linux + 237) +#define __NR_add_key (__NR_Linux + 239) +#define __NR_request_key (__NR_Linux + 240) +#define __NR_keyctl (__NR_Linux + 241) +#define __NR_set_thread_area (__NR_Linux + 242) +#define __NR_inotify_init (__NR_Linux + 243) +#define __NR_inotify_add_watch (__NR_Linux + 244) +#define __NR_inotify_rm_watch (__NR_Linux + 245) +#define __NR_migrate_pages (__NR_Linux + 246) +#define __NR_openat (__NR_Linux + 247) +#define __NR_mkdirat (__NR_Linux + 248) +#define __NR_mknodat (__NR_Linux + 249) +#define __NR_fchownat (__NR_Linux + 250) +#define __NR_futimesat (__NR_Linux + 251) +#define __NR_newfstatat (__NR_Linux + 252) +#define __NR_unlinkat (__NR_Linux + 253) +#define __NR_renameat (__NR_Linux + 254) +#define __NR_linkat (__NR_Linux + 255) +#define __NR_symlinkat (__NR_Linux + 256) +#define __NR_readlinkat (__NR_Linux + 257) +#define __NR_fchmodat (__NR_Linux + 258) +#define __NR_faccessat (__NR_Linux + 259) +#define __NR_pselect6 (__NR_Linux + 260) +#define __NR_ppoll (__NR_Linux + 261) +#define __NR_unshare (__NR_Linux + 262) +#define __NR_splice (__NR_Linux + 263) +#define __NR_sync_file_range (__NR_Linux + 264) +#define __NR_tee (__NR_Linux + 265) +#define __NR_vmsplice (__NR_Linux + 266) +#define __NR_move_pages (__NR_Linux + 267) +#define __NR_set_robust_list (__NR_Linux + 268) +#define __NR_get_robust_list (__NR_Linux + 269) +#define __NR_kexec_load (__NR_Linux + 270) +#define __NR_getcpu (__NR_Linux + 271) +#define __NR_epoll_pwait (__NR_Linux + 272) +#define __NR_ioprio_set (__NR_Linux + 273) +#define __NR_ioprio_get (__NR_Linux + 274) +#define __NR_utimensat (__NR_Linux + 275) +#define __NR_signalfd (__NR_Linux + 276) +#define __NR_timerfd (__NR_Linux + 277) +#define __NR_eventfd (__NR_Linux + 278) +#define __NR_fallocate (__NR_Linux + 279) +#define __NR_timerfd_create (__NR_Linux + 280) +#define __NR_timerfd_gettime (__NR_Linux + 281) +#define __NR_timerfd_settime (__NR_Linux + 282) +#define __NR_signalfd4 (__NR_Linux + 283) +#define __NR_eventfd2 (__NR_Linux + 284) +#define __NR_epoll_create1 (__NR_Linux + 285) +#define __NR_dup3 (__NR_Linux + 286) +#define __NR_pipe2 (__NR_Linux + 287) +#define __NR_inotify_init1 (__NR_Linux + 288) +#define __NR_preadv (__NR_Linux + 289) +#define __NR_pwritev (__NR_Linux + 290) +#define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) +#define __NR_perf_event_open (__NR_Linux + 292) +#define __NR_accept4 (__NR_Linux + 293) +#define __NR_recvmmsg (__NR_Linux + 294) +#define __NR_fanotify_init (__NR_Linux + 295) +#define __NR_fanotify_mark (__NR_Linux + 296) +#define __NR_prlimit64 (__NR_Linux + 297) +#define __NR_name_to_handle_at (__NR_Linux + 298) +#define __NR_open_by_handle_at (__NR_Linux + 299) +#define __NR_clock_adjtime (__NR_Linux + 300) +#define __NR_syncfs (__NR_Linux + 301) +#define __NR_sendmmsg (__NR_Linux + 302) +#define __NR_setns (__NR_Linux + 303) +#define __NR_process_vm_readv (__NR_Linux + 304) +#define __NR_process_vm_writev (__NR_Linux + 305) +#define __NR_kcmp (__NR_Linux + 306) +#define __NR_finit_module (__NR_Linux + 307) +#define __NR_getdents64 (__NR_Linux + 308) +#define __NR_sched_setattr (__NR_Linux + 309) +#define __NR_sched_getattr (__NR_Linux + 310) +#define __NR_renameat2 (__NR_Linux + 311) +#define __NR_seccomp (__NR_Linux + 312) +#define __NR_getrandom (__NR_Linux + 313) +#define __NR_memfd_create (__NR_Linux + 314) +#define __NR_bpf (__NR_Linux + 315) +#define __NR_execveat (__NR_Linux + 316) +#define __NR_userfaultfd (__NR_Linux + 317) +#define __NR_membarrier (__NR_Linux + 318) +#define __NR_mlock2 (__NR_Linux + 319) +#define __NR_copy_file_range (__NR_Linux + 320) +#define __NR_preadv2 (__NR_Linux + 321) +#define __NR_pwritev2 (__NR_Linux + 322) +#define __NR_pkey_mprotect (__NR_Linux + 323) +#define __NR_pkey_alloc (__NR_Linux + 324) +#define __NR_pkey_free (__NR_Linux + 325) +#define __NR_statx (__NR_Linux + 326) +#define __NR_rseq (__NR_Linux + 327) +#define __NR_io_pgetevents (__NR_Linux + 328) +#define __NR_pidfd_send_signal (__NR_Linux + 424) +#define __NR_io_uring_setup (__NR_Linux + 425) +#define __NR_io_uring_enter (__NR_Linux + 426) +#define __NR_io_uring_register (__NR_Linux + 427) +#define __NR_open_tree (__NR_Linux + 428) +#define __NR_move_mount (__NR_Linux + 429) +#define __NR_fsopen (__NR_Linux + 430) +#define __NR_fsconfig (__NR_Linux + 431) +#define __NR_fsmount (__NR_Linux + 432) +#define __NR_fspick (__NR_Linux + 433) +#define __NR_pidfd_open (__NR_Linux + 434) +#define __NR_clone3 (__NR_Linux + 435) +#define __NR_close_range (__NR_Linux + 436) +#define __NR_openat2 (__NR_Linux + 437) +#define __NR_pidfd_getfd (__NR_Linux + 438) +#define __NR_faccessat2 (__NR_Linux + 439) +#define __NR_process_madvise (__NR_Linux + 440) +#define __NR_epoll_pwait2 (__NR_Linux + 441) +#define __NR_mount_setattr (__NR_Linux + 442) +#define __NR_quotactl_fd (__NR_Linux + 443) +#define __NR_landlock_create_ruleset (__NR_Linux + 444) +#define __NR_landlock_add_rule (__NR_Linux + 445) +#define __NR_landlock_restrict_self (__NR_Linux + 446) - -#endif /* _ASM_MIPS_UNISTD_N64_H */ +#endif /* _ASM_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index ca6a7e5c0b9..06a2b3b55e6 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -1,422 +1,426 @@ -#ifndef _ASM_MIPS_UNISTD_O32_H -#define _ASM_MIPS_UNISTD_O32_H +#ifndef _ASM_UNISTD_O32_H +#define _ASM_UNISTD_O32_H -#define __NR_syscall (__NR_Linux + 0) -#define __NR_exit (__NR_Linux + 1) -#define __NR_fork (__NR_Linux + 2) -#define __NR_read (__NR_Linux + 3) -#define __NR_write (__NR_Linux + 4) -#define __NR_open (__NR_Linux + 5) -#define __NR_close (__NR_Linux + 6) -#define __NR_waitpid (__NR_Linux + 7) -#define __NR_creat (__NR_Linux + 8) -#define __NR_link (__NR_Linux + 9) -#define __NR_unlink (__NR_Linux + 10) -#define __NR_execve (__NR_Linux + 11) -#define __NR_chdir (__NR_Linux + 12) -#define __NR_time (__NR_Linux + 13) -#define __NR_mknod (__NR_Linux + 14) -#define __NR_chmod (__NR_Linux + 15) -#define __NR_lchown (__NR_Linux + 16) -#define __NR_break (__NR_Linux + 17) -#define __NR_unused18 (__NR_Linux + 18) -#define __NR_lseek (__NR_Linux + 19) -#define __NR_getpid (__NR_Linux + 20) -#define __NR_mount (__NR_Linux + 21) -#define __NR_umount (__NR_Linux + 22) -#define __NR_setuid (__NR_Linux + 23) -#define __NR_getuid (__NR_Linux + 24) -#define __NR_stime (__NR_Linux + 25) -#define __NR_ptrace (__NR_Linux + 26) -#define __NR_alarm (__NR_Linux + 27) -#define __NR_unused28 (__NR_Linux + 28) -#define __NR_pause (__NR_Linux + 29) -#define __NR_utime (__NR_Linux + 30) -#define __NR_stty (__NR_Linux + 31) -#define __NR_gtty (__NR_Linux + 32) -#define __NR_access (__NR_Linux + 33) -#define __NR_nice (__NR_Linux + 34) -#define __NR_ftime (__NR_Linux + 35) -#define __NR_sync (__NR_Linux + 36) -#define __NR_kill (__NR_Linux + 37) -#define __NR_rename (__NR_Linux + 38) -#define __NR_mkdir (__NR_Linux + 39) -#define __NR_rmdir (__NR_Linux + 40) -#define __NR_dup (__NR_Linux + 41) -#define __NR_pipe (__NR_Linux + 42) -#define __NR_times (__NR_Linux + 43) -#define __NR_prof (__NR_Linux + 44) -#define __NR_brk (__NR_Linux + 45) -#define __NR_setgid (__NR_Linux + 46) -#define __NR_getgid (__NR_Linux + 47) -#define __NR_signal (__NR_Linux + 48) -#define __NR_geteuid (__NR_Linux + 49) -#define __NR_getegid (__NR_Linux + 50) -#define __NR_acct (__NR_Linux + 51) -#define __NR_umount2 (__NR_Linux + 52) -#define __NR_lock (__NR_Linux + 53) -#define __NR_ioctl (__NR_Linux + 54) -#define __NR_fcntl (__NR_Linux + 55) -#define __NR_mpx (__NR_Linux + 56) -#define __NR_setpgid (__NR_Linux + 57) -#define __NR_ulimit (__NR_Linux + 58) -#define __NR_unused59 (__NR_Linux + 59) -#define __NR_umask (__NR_Linux + 60) -#define __NR_chroot (__NR_Linux + 61) -#define __NR_ustat (__NR_Linux + 62) -#define __NR_dup2 (__NR_Linux + 63) -#define __NR_getppid (__NR_Linux + 64) -#define __NR_getpgrp (__NR_Linux + 65) -#define __NR_setsid (__NR_Linux + 66) -#define __NR_sigaction (__NR_Linux + 67) -#define __NR_sgetmask (__NR_Linux + 68) -#define __NR_ssetmask (__NR_Linux + 69) -#define __NR_setreuid (__NR_Linux + 70) -#define __NR_setregid (__NR_Linux + 71) -#define __NR_sigsuspend (__NR_Linux + 72) -#define __NR_sigpending (__NR_Linux + 73) -#define __NR_sethostname (__NR_Linux + 74) -#define __NR_setrlimit (__NR_Linux + 75) -#define __NR_getrlimit (__NR_Linux + 76) -#define __NR_getrusage (__NR_Linux + 77) -#define __NR_gettimeofday (__NR_Linux + 78) -#define __NR_settimeofday (__NR_Linux + 79) -#define __NR_getgroups (__NR_Linux + 80) -#define __NR_setgroups (__NR_Linux + 81) -#define __NR_reserved82 (__NR_Linux + 82) -#define __NR_symlink (__NR_Linux + 83) -#define __NR_unused84 (__NR_Linux + 84) -#define __NR_readlink (__NR_Linux + 85) -#define __NR_uselib (__NR_Linux + 86) -#define __NR_swapon (__NR_Linux + 87) -#define __NR_reboot (__NR_Linux + 88) -#define __NR_readdir (__NR_Linux + 89) -#define __NR_mmap (__NR_Linux + 90) -#define __NR_munmap (__NR_Linux + 91) -#define __NR_truncate (__NR_Linux + 92) -#define __NR_ftruncate (__NR_Linux + 93) -#define __NR_fchmod (__NR_Linux + 94) -#define __NR_fchown (__NR_Linux + 95) -#define __NR_getpriority (__NR_Linux + 96) -#define __NR_setpriority (__NR_Linux + 97) -#define __NR_profil (__NR_Linux + 98) -#define __NR_statfs (__NR_Linux + 99) -#define __NR_fstatfs (__NR_Linux + 100) -#define __NR_ioperm (__NR_Linux + 101) -#define __NR_socketcall (__NR_Linux + 102) -#define __NR_syslog (__NR_Linux + 103) -#define __NR_setitimer (__NR_Linux + 104) -#define __NR_getitimer (__NR_Linux + 105) -#define __NR_stat (__NR_Linux + 106) -#define __NR_lstat (__NR_Linux + 107) -#define __NR_fstat (__NR_Linux + 108) -#define __NR_unused109 (__NR_Linux + 109) -#define __NR_iopl (__NR_Linux + 110) -#define __NR_vhangup (__NR_Linux + 111) -#define __NR_idle (__NR_Linux + 112) -#define __NR_vm86 (__NR_Linux + 113) -#define __NR_wait4 (__NR_Linux + 114) -#define __NR_swapoff (__NR_Linux + 115) -#define __NR_sysinfo (__NR_Linux + 116) -#define __NR_ipc (__NR_Linux + 117) -#define __NR_fsync (__NR_Linux + 118) -#define __NR_sigreturn (__NR_Linux + 119) -#define __NR_clone (__NR_Linux + 120) -#define __NR_setdomainname (__NR_Linux + 121) -#define __NR_uname (__NR_Linux + 122) -#define __NR_modify_ldt (__NR_Linux + 123) -#define __NR_adjtimex (__NR_Linux + 124) -#define __NR_mprotect (__NR_Linux + 125) -#define __NR_sigprocmask (__NR_Linux + 126) -#define __NR_create_module (__NR_Linux + 127) -#define __NR_init_module (__NR_Linux + 128) -#define __NR_delete_module (__NR_Linux + 129) -#define __NR_get_kernel_syms (__NR_Linux + 130) -#define __NR_quotactl (__NR_Linux + 131) -#define __NR_getpgid (__NR_Linux + 132) -#define __NR_fchdir (__NR_Linux + 133) -#define __NR_bdflush (__NR_Linux + 134) -#define __NR_sysfs (__NR_Linux + 135) -#define __NR_personality (__NR_Linux + 136) -#define __NR_afs_syscall (__NR_Linux + 137) -#define __NR_setfsuid (__NR_Linux + 138) -#define __NR_setfsgid (__NR_Linux + 139) -#define __NR__llseek (__NR_Linux + 140) -#define __NR_getdents (__NR_Linux + 141) -#define __NR__newselect (__NR_Linux + 142) -#define __NR_flock (__NR_Linux + 143) -#define __NR_msync (__NR_Linux + 144) -#define __NR_readv (__NR_Linux + 145) -#define __NR_writev (__NR_Linux + 146) -#define __NR_cacheflush (__NR_Linux + 147) -#define __NR_cachectl (__NR_Linux + 148) -#define __NR_sysmips (__NR_Linux + 149) -#define __NR_unused150 (__NR_Linux + 150) -#define __NR_getsid (__NR_Linux + 151) -#define __NR_fdatasync (__NR_Linux + 152) -#define __NR__sysctl (__NR_Linux + 153) -#define __NR_mlock (__NR_Linux + 154) -#define __NR_munlock (__NR_Linux + 155) -#define __NR_mlockall (__NR_Linux + 156) -#define __NR_munlockall (__NR_Linux + 157) -#define __NR_sched_setparam (__NR_Linux + 158) -#define __NR_sched_getparam (__NR_Linux + 159) -#define __NR_sched_setscheduler (__NR_Linux + 160) -#define __NR_sched_getscheduler (__NR_Linux + 161) -#define __NR_sched_yield (__NR_Linux + 162) -#define __NR_sched_get_priority_max (__NR_Linux + 163) -#define __NR_sched_get_priority_min (__NR_Linux + 164) -#define __NR_sched_rr_get_interval (__NR_Linux + 165) -#define __NR_nanosleep (__NR_Linux + 166) -#define __NR_mremap (__NR_Linux + 167) -#define __NR_accept (__NR_Linux + 168) -#define __NR_bind (__NR_Linux + 169) -#define __NR_connect (__NR_Linux + 170) -#define __NR_getpeername (__NR_Linux + 171) -#define __NR_getsockname (__NR_Linux + 172) -#define __NR_getsockopt (__NR_Linux + 173) -#define __NR_listen (__NR_Linux + 174) -#define __NR_recv (__NR_Linux + 175) -#define __NR_recvfrom (__NR_Linux + 176) -#define __NR_recvmsg (__NR_Linux + 177) -#define __NR_send (__NR_Linux + 178) -#define __NR_sendmsg (__NR_Linux + 179) -#define __NR_sendto (__NR_Linux + 180) -#define __NR_setsockopt (__NR_Linux + 181) -#define __NR_shutdown (__NR_Linux + 182) -#define __NR_socket (__NR_Linux + 183) -#define __NR_socketpair (__NR_Linux + 184) -#define __NR_setresuid (__NR_Linux + 185) -#define __NR_getresuid (__NR_Linux + 186) -#define __NR_query_module (__NR_Linux + 187) -#define __NR_poll (__NR_Linux + 188) -#define __NR_nfsservctl (__NR_Linux + 189) -#define __NR_setresgid (__NR_Linux + 190) -#define __NR_getresgid (__NR_Linux + 191) -#define __NR_prctl (__NR_Linux + 192) -#define __NR_rt_sigreturn (__NR_Linux + 193) -#define __NR_rt_sigaction (__NR_Linux + 194) -#define __NR_rt_sigprocmask (__NR_Linux + 195) -#define __NR_rt_sigpending (__NR_Linux + 196) -#define __NR_rt_sigtimedwait (__NR_Linux + 197) -#define __NR_rt_sigqueueinfo (__NR_Linux + 198) -#define __NR_rt_sigsuspend (__NR_Linux + 199) -#define __NR_pread64 (__NR_Linux + 200) -#define __NR_pwrite64 (__NR_Linux + 201) -#define __NR_chown (__NR_Linux + 202) -#define __NR_getcwd (__NR_Linux + 203) -#define __NR_capget (__NR_Linux + 204) -#define __NR_capset (__NR_Linux + 205) -#define __NR_sigaltstack (__NR_Linux + 206) -#define __NR_sendfile (__NR_Linux + 207) -#define __NR_getpmsg (__NR_Linux + 208) -#define __NR_putpmsg (__NR_Linux + 209) -#define __NR_mmap2 (__NR_Linux + 210) -#define __NR_truncate64 (__NR_Linux + 211) -#define __NR_ftruncate64 (__NR_Linux + 212) -#define __NR_stat64 (__NR_Linux + 213) -#define __NR_lstat64 (__NR_Linux + 214) -#define __NR_fstat64 (__NR_Linux + 215) -#define __NR_pivot_root (__NR_Linux + 216) -#define __NR_mincore (__NR_Linux + 217) -#define __NR_madvise (__NR_Linux + 218) -#define __NR_getdents64 (__NR_Linux + 219) -#define __NR_fcntl64 (__NR_Linux + 220) -#define __NR_reserved221 (__NR_Linux + 221) -#define __NR_gettid (__NR_Linux + 222) -#define __NR_readahead (__NR_Linux + 223) -#define __NR_setxattr (__NR_Linux + 224) -#define __NR_lsetxattr (__NR_Linux + 225) -#define __NR_fsetxattr (__NR_Linux + 226) -#define __NR_getxattr (__NR_Linux + 227) -#define __NR_lgetxattr (__NR_Linux + 228) -#define __NR_fgetxattr (__NR_Linux + 229) -#define __NR_listxattr (__NR_Linux + 230) -#define __NR_llistxattr (__NR_Linux + 231) -#define __NR_flistxattr (__NR_Linux + 232) -#define __NR_removexattr (__NR_Linux + 233) -#define __NR_lremovexattr (__NR_Linux + 234) -#define __NR_fremovexattr (__NR_Linux + 235) -#define __NR_tkill (__NR_Linux + 236) -#define __NR_sendfile64 (__NR_Linux + 237) -#define __NR_futex (__NR_Linux + 238) -#define __NR_sched_setaffinity (__NR_Linux + 239) -#define __NR_sched_getaffinity (__NR_Linux + 240) -#define __NR_io_setup (__NR_Linux + 241) -#define __NR_io_destroy (__NR_Linux + 242) -#define __NR_io_getevents (__NR_Linux + 243) -#define __NR_io_submit (__NR_Linux + 244) -#define __NR_io_cancel (__NR_Linux + 245) -#define __NR_exit_group (__NR_Linux + 246) -#define __NR_lookup_dcookie (__NR_Linux + 247) -#define __NR_epoll_create (__NR_Linux + 248) -#define __NR_epoll_ctl (__NR_Linux + 249) -#define __NR_epoll_wait (__NR_Linux + 250) -#define __NR_remap_file_pages (__NR_Linux + 251) -#define __NR_set_tid_address (__NR_Linux + 252) -#define __NR_restart_syscall (__NR_Linux + 253) -#define __NR_fadvise64 (__NR_Linux + 254) -#define __NR_statfs64 (__NR_Linux + 255) -#define __NR_fstatfs64 (__NR_Linux + 256) -#define __NR_timer_create (__NR_Linux + 257) -#define __NR_timer_settime (__NR_Linux + 258) -#define __NR_timer_gettime (__NR_Linux + 259) -#define __NR_timer_getoverrun (__NR_Linux + 260) -#define __NR_timer_delete (__NR_Linux + 261) -#define __NR_clock_settime (__NR_Linux + 262) -#define __NR_clock_gettime (__NR_Linux + 263) -#define __NR_clock_getres (__NR_Linux + 264) -#define __NR_clock_nanosleep (__NR_Linux + 265) -#define __NR_tgkill (__NR_Linux + 266) -#define __NR_utimes (__NR_Linux + 267) -#define __NR_mbind (__NR_Linux + 268) -#define __NR_get_mempolicy (__NR_Linux + 269) -#define __NR_set_mempolicy (__NR_Linux + 270) -#define __NR_mq_open (__NR_Linux + 271) -#define __NR_mq_unlink (__NR_Linux + 272) -#define __NR_mq_timedsend (__NR_Linux + 273) -#define __NR_mq_timedreceive (__NR_Linux + 274) -#define __NR_mq_notify (__NR_Linux + 275) -#define __NR_mq_getsetattr (__NR_Linux + 276) -#define __NR_vserver (__NR_Linux + 277) -#define __NR_waitid (__NR_Linux + 278) -#define __NR_add_key (__NR_Linux + 280) -#define __NR_request_key (__NR_Linux + 281) -#define __NR_keyctl (__NR_Linux + 282) -#define __NR_set_thread_area (__NR_Linux + 283) -#define __NR_inotify_init (__NR_Linux + 284) -#define __NR_inotify_add_watch (__NR_Linux + 285) -#define __NR_inotify_rm_watch (__NR_Linux + 286) -#define __NR_migrate_pages (__NR_Linux + 287) -#define __NR_openat (__NR_Linux + 288) -#define __NR_mkdirat (__NR_Linux + 289) -#define __NR_mknodat (__NR_Linux + 290) -#define __NR_fchownat (__NR_Linux + 291) -#define __NR_futimesat (__NR_Linux + 292) -#define __NR_fstatat64 (__NR_Linux + 293) -#define __NR_unlinkat (__NR_Linux + 294) -#define __NR_renameat (__NR_Linux + 295) -#define __NR_linkat (__NR_Linux + 296) -#define __NR_symlinkat (__NR_Linux + 297) -#define __NR_readlinkat (__NR_Linux + 298) -#define __NR_fchmodat (__NR_Linux + 299) -#define __NR_faccessat (__NR_Linux + 300) -#define __NR_pselect6 (__NR_Linux + 301) -#define __NR_ppoll (__NR_Linux + 302) -#define __NR_unshare (__NR_Linux + 303) -#define __NR_splice (__NR_Linux + 304) -#define __NR_sync_file_range (__NR_Linux + 305) -#define __NR_tee (__NR_Linux + 306) -#define __NR_vmsplice (__NR_Linux + 307) -#define __NR_move_pages (__NR_Linux + 308) -#define __NR_set_robust_list (__NR_Linux + 309) -#define __NR_get_robust_list (__NR_Linux + 310) -#define __NR_kexec_load (__NR_Linux + 311) -#define __NR_getcpu (__NR_Linux + 312) -#define __NR_epoll_pwait (__NR_Linux + 313) -#define __NR_ioprio_set (__NR_Linux + 314) -#define __NR_ioprio_get (__NR_Linux + 315) -#define __NR_utimensat (__NR_Linux + 316) -#define __NR_signalfd (__NR_Linux + 317) -#define __NR_timerfd (__NR_Linux + 318) -#define __NR_eventfd (__NR_Linux + 319) -#define __NR_fallocate (__NR_Linux + 320) -#define __NR_timerfd_create (__NR_Linux + 321) -#define __NR_timerfd_gettime (__NR_Linux + 322) -#define __NR_timerfd_settime (__NR_Linux + 323) -#define __NR_signalfd4 (__NR_Linux + 324) -#define __NR_eventfd2 (__NR_Linux + 325) -#define __NR_epoll_create1 (__NR_Linux + 326) -#define __NR_dup3 (__NR_Linux + 327) -#define __NR_pipe2 (__NR_Linux + 328) -#define __NR_inotify_init1 (__NR_Linux + 329) -#define __NR_preadv (__NR_Linux + 330) -#define __NR_pwritev (__NR_Linux + 331) -#define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) -#define __NR_perf_event_open (__NR_Linux + 333) -#define __NR_accept4 (__NR_Linux + 334) -#define __NR_recvmmsg (__NR_Linux + 335) -#define __NR_fanotify_init (__NR_Linux + 336) -#define __NR_fanotify_mark (__NR_Linux + 337) -#define __NR_prlimit64 (__NR_Linux + 338) -#define __NR_name_to_handle_at (__NR_Linux + 339) -#define __NR_open_by_handle_at (__NR_Linux + 340) -#define __NR_clock_adjtime (__NR_Linux + 341) -#define __NR_syncfs (__NR_Linux + 342) -#define __NR_sendmmsg (__NR_Linux + 343) -#define __NR_setns (__NR_Linux + 344) -#define __NR_process_vm_readv (__NR_Linux + 345) -#define __NR_process_vm_writev (__NR_Linux + 346) -#define __NR_kcmp (__NR_Linux + 347) -#define __NR_finit_module (__NR_Linux + 348) -#define __NR_sched_setattr (__NR_Linux + 349) -#define __NR_sched_getattr (__NR_Linux + 350) -#define __NR_renameat2 (__NR_Linux + 351) -#define __NR_seccomp (__NR_Linux + 352) -#define __NR_getrandom (__NR_Linux + 353) -#define __NR_memfd_create (__NR_Linux + 354) -#define __NR_bpf (__NR_Linux + 355) -#define __NR_execveat (__NR_Linux + 356) -#define __NR_userfaultfd (__NR_Linux + 357) -#define __NR_membarrier (__NR_Linux + 358) -#define __NR_mlock2 (__NR_Linux + 359) -#define __NR_copy_file_range (__NR_Linux + 360) -#define __NR_preadv2 (__NR_Linux + 361) -#define __NR_pwritev2 (__NR_Linux + 362) -#define __NR_pkey_mprotect (__NR_Linux + 363) -#define __NR_pkey_alloc (__NR_Linux + 364) -#define __NR_pkey_free (__NR_Linux + 365) -#define __NR_statx (__NR_Linux + 366) -#define __NR_rseq (__NR_Linux + 367) -#define __NR_io_pgetevents (__NR_Linux + 368) -#define __NR_semget (__NR_Linux + 393) -#define __NR_semctl (__NR_Linux + 394) -#define __NR_shmget (__NR_Linux + 395) -#define __NR_shmctl (__NR_Linux + 396) -#define __NR_shmat (__NR_Linux + 397) -#define __NR_shmdt (__NR_Linux + 398) -#define __NR_msgget (__NR_Linux + 399) -#define __NR_msgsnd (__NR_Linux + 400) -#define __NR_msgrcv (__NR_Linux + 401) -#define __NR_msgctl (__NR_Linux + 402) -#define __NR_clock_gettime64 (__NR_Linux + 403) -#define __NR_clock_settime64 (__NR_Linux + 404) -#define __NR_clock_adjtime64 (__NR_Linux + 405) -#define __NR_clock_getres_time64 (__NR_Linux + 406) -#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) -#define __NR_timer_gettime64 (__NR_Linux + 408) -#define __NR_timer_settime64 (__NR_Linux + 409) -#define __NR_timerfd_gettime64 (__NR_Linux + 410) -#define __NR_timerfd_settime64 (__NR_Linux + 411) -#define __NR_utimensat_time64 (__NR_Linux + 412) -#define __NR_pselect6_time64 (__NR_Linux + 413) -#define __NR_ppoll_time64 (__NR_Linux + 414) -#define __NR_io_pgetevents_time64 (__NR_Linux + 416) -#define __NR_recvmmsg_time64 (__NR_Linux + 417) -#define __NR_mq_timedsend_time64 (__NR_Linux + 418) -#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) -#define __NR_semtimedop_time64 (__NR_Linux + 420) -#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) -#define __NR_futex_time64 (__NR_Linux + 422) -#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) -#define __NR_pidfd_send_signal (__NR_Linux + 424) -#define __NR_io_uring_setup (__NR_Linux + 425) -#define __NR_io_uring_enter (__NR_Linux + 426) -#define __NR_io_uring_register (__NR_Linux + 427) -#define __NR_open_tree (__NR_Linux + 428) -#define __NR_move_mount (__NR_Linux + 429) -#define __NR_fsopen (__NR_Linux + 430) -#define __NR_fsconfig (__NR_Linux + 431) -#define __NR_fsmount (__NR_Linux + 432) -#define __NR_fspick (__NR_Linux + 433) -#define __NR_pidfd_open (__NR_Linux + 434) -#define __NR_clone3 (__NR_Linux + 435) -#define __NR_close_range (__NR_Linux + 436) -#define __NR_openat2 (__NR_Linux + 437) -#define __NR_pidfd_getfd (__NR_Linux + 438) -#define __NR_faccessat2 (__NR_Linux + 439) -#define __NR_process_madvise (__NR_Linux + 440) -#define __NR_epoll_pwait2 (__NR_Linux + 441) +#define __NR_syscall (__NR_Linux + 0) +#define __NR_exit (__NR_Linux + 1) +#define __NR_fork (__NR_Linux + 2) +#define __NR_read (__NR_Linux + 3) +#define __NR_write (__NR_Linux + 4) +#define __NR_open (__NR_Linux + 5) +#define __NR_close (__NR_Linux + 6) +#define __NR_waitpid (__NR_Linux + 7) +#define __NR_creat (__NR_Linux + 8) +#define __NR_link (__NR_Linux + 9) +#define __NR_unlink (__NR_Linux + 10) +#define __NR_execve (__NR_Linux + 11) +#define __NR_chdir (__NR_Linux + 12) +#define __NR_time (__NR_Linux + 13) +#define __NR_mknod (__NR_Linux + 14) +#define __NR_chmod (__NR_Linux + 15) +#define __NR_lchown (__NR_Linux + 16) +#define __NR_break (__NR_Linux + 17) +#define __NR_unused18 (__NR_Linux + 18) +#define __NR_lseek (__NR_Linux + 19) +#define __NR_getpid (__NR_Linux + 20) +#define __NR_mount (__NR_Linux + 21) +#define __NR_umount (__NR_Linux + 22) +#define __NR_setuid (__NR_Linux + 23) +#define __NR_getuid (__NR_Linux + 24) +#define __NR_stime (__NR_Linux + 25) +#define __NR_ptrace (__NR_Linux + 26) +#define __NR_alarm (__NR_Linux + 27) +#define __NR_unused28 (__NR_Linux + 28) +#define __NR_pause (__NR_Linux + 29) +#define __NR_utime (__NR_Linux + 30) +#define __NR_stty (__NR_Linux + 31) +#define __NR_gtty (__NR_Linux + 32) +#define __NR_access (__NR_Linux + 33) +#define __NR_nice (__NR_Linux + 34) +#define __NR_ftime (__NR_Linux + 35) +#define __NR_sync (__NR_Linux + 36) +#define __NR_kill (__NR_Linux + 37) +#define __NR_rename (__NR_Linux + 38) +#define __NR_mkdir (__NR_Linux + 39) +#define __NR_rmdir (__NR_Linux + 40) +#define __NR_dup (__NR_Linux + 41) +#define __NR_pipe (__NR_Linux + 42) +#define __NR_times (__NR_Linux + 43) +#define __NR_prof (__NR_Linux + 44) +#define __NR_brk (__NR_Linux + 45) +#define __NR_setgid (__NR_Linux + 46) +#define __NR_getgid (__NR_Linux + 47) +#define __NR_signal (__NR_Linux + 48) +#define __NR_geteuid (__NR_Linux + 49) +#define __NR_getegid (__NR_Linux + 50) +#define __NR_acct (__NR_Linux + 51) +#define __NR_umount2 (__NR_Linux + 52) +#define __NR_lock (__NR_Linux + 53) +#define __NR_ioctl (__NR_Linux + 54) +#define __NR_fcntl (__NR_Linux + 55) +#define __NR_mpx (__NR_Linux + 56) +#define __NR_setpgid (__NR_Linux + 57) +#define __NR_ulimit (__NR_Linux + 58) +#define __NR_unused59 (__NR_Linux + 59) +#define __NR_umask (__NR_Linux + 60) +#define __NR_chroot (__NR_Linux + 61) +#define __NR_ustat (__NR_Linux + 62) +#define __NR_dup2 (__NR_Linux + 63) +#define __NR_getppid (__NR_Linux + 64) +#define __NR_getpgrp (__NR_Linux + 65) +#define __NR_setsid (__NR_Linux + 66) +#define __NR_sigaction (__NR_Linux + 67) +#define __NR_sgetmask (__NR_Linux + 68) +#define __NR_ssetmask (__NR_Linux + 69) +#define __NR_setreuid (__NR_Linux + 70) +#define __NR_setregid (__NR_Linux + 71) +#define __NR_sigsuspend (__NR_Linux + 72) +#define __NR_sigpending (__NR_Linux + 73) +#define __NR_sethostname (__NR_Linux + 74) +#define __NR_setrlimit (__NR_Linux + 75) +#define __NR_getrlimit (__NR_Linux + 76) +#define __NR_getrusage (__NR_Linux + 77) +#define __NR_gettimeofday (__NR_Linux + 78) +#define __NR_settimeofday (__NR_Linux + 79) +#define __NR_getgroups (__NR_Linux + 80) +#define __NR_setgroups (__NR_Linux + 81) +#define __NR_reserved82 (__NR_Linux + 82) +#define __NR_symlink (__NR_Linux + 83) +#define __NR_unused84 (__NR_Linux + 84) +#define __NR_readlink (__NR_Linux + 85) +#define __NR_uselib (__NR_Linux + 86) +#define __NR_swapon (__NR_Linux + 87) +#define __NR_reboot (__NR_Linux + 88) +#define __NR_readdir (__NR_Linux + 89) +#define __NR_mmap (__NR_Linux + 90) +#define __NR_munmap (__NR_Linux + 91) +#define __NR_truncate (__NR_Linux + 92) +#define __NR_ftruncate (__NR_Linux + 93) +#define __NR_fchmod (__NR_Linux + 94) +#define __NR_fchown (__NR_Linux + 95) +#define __NR_getpriority (__NR_Linux + 96) +#define __NR_setpriority (__NR_Linux + 97) +#define __NR_profil (__NR_Linux + 98) +#define __NR_statfs (__NR_Linux + 99) +#define __NR_fstatfs (__NR_Linux + 100) +#define __NR_ioperm (__NR_Linux + 101) +#define __NR_socketcall (__NR_Linux + 102) +#define __NR_syslog (__NR_Linux + 103) +#define __NR_setitimer (__NR_Linux + 104) +#define __NR_getitimer (__NR_Linux + 105) +#define __NR_stat (__NR_Linux + 106) +#define __NR_lstat (__NR_Linux + 107) +#define __NR_fstat (__NR_Linux + 108) +#define __NR_unused109 (__NR_Linux + 109) +#define __NR_iopl (__NR_Linux + 110) +#define __NR_vhangup (__NR_Linux + 111) +#define __NR_idle (__NR_Linux + 112) +#define __NR_vm86 (__NR_Linux + 113) +#define __NR_wait4 (__NR_Linux + 114) +#define __NR_swapoff (__NR_Linux + 115) +#define __NR_sysinfo (__NR_Linux + 116) +#define __NR_ipc (__NR_Linux + 117) +#define __NR_fsync (__NR_Linux + 118) +#define __NR_sigreturn (__NR_Linux + 119) +#define __NR_clone (__NR_Linux + 120) +#define __NR_setdomainname (__NR_Linux + 121) +#define __NR_uname (__NR_Linux + 122) +#define __NR_modify_ldt (__NR_Linux + 123) +#define __NR_adjtimex (__NR_Linux + 124) +#define __NR_mprotect (__NR_Linux + 125) +#define __NR_sigprocmask (__NR_Linux + 126) +#define __NR_create_module (__NR_Linux + 127) +#define __NR_init_module (__NR_Linux + 128) +#define __NR_delete_module (__NR_Linux + 129) +#define __NR_get_kernel_syms (__NR_Linux + 130) +#define __NR_quotactl (__NR_Linux + 131) +#define __NR_getpgid (__NR_Linux + 132) +#define __NR_fchdir (__NR_Linux + 133) +#define __NR_bdflush (__NR_Linux + 134) +#define __NR_sysfs (__NR_Linux + 135) +#define __NR_personality (__NR_Linux + 136) +#define __NR_afs_syscall (__NR_Linux + 137) +#define __NR_setfsuid (__NR_Linux + 138) +#define __NR_setfsgid (__NR_Linux + 139) +#define __NR__llseek (__NR_Linux + 140) +#define __NR_getdents (__NR_Linux + 141) +#define __NR__newselect (__NR_Linux + 142) +#define __NR_flock (__NR_Linux + 143) +#define __NR_msync (__NR_Linux + 144) +#define __NR_readv (__NR_Linux + 145) +#define __NR_writev (__NR_Linux + 146) +#define __NR_cacheflush (__NR_Linux + 147) +#define __NR_cachectl (__NR_Linux + 148) +#define __NR_sysmips (__NR_Linux + 149) +#define __NR_unused150 (__NR_Linux + 150) +#define __NR_getsid (__NR_Linux + 151) +#define __NR_fdatasync (__NR_Linux + 152) +#define __NR__sysctl (__NR_Linux + 153) +#define __NR_mlock (__NR_Linux + 154) +#define __NR_munlock (__NR_Linux + 155) +#define __NR_mlockall (__NR_Linux + 156) +#define __NR_munlockall (__NR_Linux + 157) +#define __NR_sched_setparam (__NR_Linux + 158) +#define __NR_sched_getparam (__NR_Linux + 159) +#define __NR_sched_setscheduler (__NR_Linux + 160) +#define __NR_sched_getscheduler (__NR_Linux + 161) +#define __NR_sched_yield (__NR_Linux + 162) +#define __NR_sched_get_priority_max (__NR_Linux + 163) +#define __NR_sched_get_priority_min (__NR_Linux + 164) +#define __NR_sched_rr_get_interval (__NR_Linux + 165) +#define __NR_nanosleep (__NR_Linux + 166) +#define __NR_mremap (__NR_Linux + 167) +#define __NR_accept (__NR_Linux + 168) +#define __NR_bind (__NR_Linux + 169) +#define __NR_connect (__NR_Linux + 170) +#define __NR_getpeername (__NR_Linux + 171) +#define __NR_getsockname (__NR_Linux + 172) +#define __NR_getsockopt (__NR_Linux + 173) +#define __NR_listen (__NR_Linux + 174) +#define __NR_recv (__NR_Linux + 175) +#define __NR_recvfrom (__NR_Linux + 176) +#define __NR_recvmsg (__NR_Linux + 177) +#define __NR_send (__NR_Linux + 178) +#define __NR_sendmsg (__NR_Linux + 179) +#define __NR_sendto (__NR_Linux + 180) +#define __NR_setsockopt (__NR_Linux + 181) +#define __NR_shutdown (__NR_Linux + 182) +#define __NR_socket (__NR_Linux + 183) +#define __NR_socketpair (__NR_Linux + 184) +#define __NR_setresuid (__NR_Linux + 185) +#define __NR_getresuid (__NR_Linux + 186) +#define __NR_query_module (__NR_Linux + 187) +#define __NR_poll (__NR_Linux + 188) +#define __NR_nfsservctl (__NR_Linux + 189) +#define __NR_setresgid (__NR_Linux + 190) +#define __NR_getresgid (__NR_Linux + 191) +#define __NR_prctl (__NR_Linux + 192) +#define __NR_rt_sigreturn (__NR_Linux + 193) +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) +#define __NR_rt_sigpending (__NR_Linux + 196) +#define __NR_rt_sigtimedwait (__NR_Linux + 197) +#define __NR_rt_sigqueueinfo (__NR_Linux + 198) +#define __NR_rt_sigsuspend (__NR_Linux + 199) +#define __NR_pread64 (__NR_Linux + 200) +#define __NR_pwrite64 (__NR_Linux + 201) +#define __NR_chown (__NR_Linux + 202) +#define __NR_getcwd (__NR_Linux + 203) +#define __NR_capget (__NR_Linux + 204) +#define __NR_capset (__NR_Linux + 205) +#define __NR_sigaltstack (__NR_Linux + 206) +#define __NR_sendfile (__NR_Linux + 207) +#define __NR_getpmsg (__NR_Linux + 208) +#define __NR_putpmsg (__NR_Linux + 209) +#define __NR_mmap2 (__NR_Linux + 210) +#define __NR_truncate64 (__NR_Linux + 211) +#define __NR_ftruncate64 (__NR_Linux + 212) +#define __NR_stat64 (__NR_Linux + 213) +#define __NR_lstat64 (__NR_Linux + 214) +#define __NR_fstat64 (__NR_Linux + 215) +#define __NR_pivot_root (__NR_Linux + 216) +#define __NR_mincore (__NR_Linux + 217) +#define __NR_madvise (__NR_Linux + 218) +#define __NR_getdents64 (__NR_Linux + 219) +#define __NR_fcntl64 (__NR_Linux + 220) +#define __NR_reserved221 (__NR_Linux + 221) +#define __NR_gettid (__NR_Linux + 222) +#define __NR_readahead (__NR_Linux + 223) +#define __NR_setxattr (__NR_Linux + 224) +#define __NR_lsetxattr (__NR_Linux + 225) +#define __NR_fsetxattr (__NR_Linux + 226) +#define __NR_getxattr (__NR_Linux + 227) +#define __NR_lgetxattr (__NR_Linux + 228) +#define __NR_fgetxattr (__NR_Linux + 229) +#define __NR_listxattr (__NR_Linux + 230) +#define __NR_llistxattr (__NR_Linux + 231) +#define __NR_flistxattr (__NR_Linux + 232) +#define __NR_removexattr (__NR_Linux + 233) +#define __NR_lremovexattr (__NR_Linux + 234) +#define __NR_fremovexattr (__NR_Linux + 235) +#define __NR_tkill (__NR_Linux + 236) +#define __NR_sendfile64 (__NR_Linux + 237) +#define __NR_futex (__NR_Linux + 238) +#define __NR_sched_setaffinity (__NR_Linux + 239) +#define __NR_sched_getaffinity (__NR_Linux + 240) +#define __NR_io_setup (__NR_Linux + 241) +#define __NR_io_destroy (__NR_Linux + 242) +#define __NR_io_getevents (__NR_Linux + 243) +#define __NR_io_submit (__NR_Linux + 244) +#define __NR_io_cancel (__NR_Linux + 245) +#define __NR_exit_group (__NR_Linux + 246) +#define __NR_lookup_dcookie (__NR_Linux + 247) +#define __NR_epoll_create (__NR_Linux + 248) +#define __NR_epoll_ctl (__NR_Linux + 249) +#define __NR_epoll_wait (__NR_Linux + 250) +#define __NR_remap_file_pages (__NR_Linux + 251) +#define __NR_set_tid_address (__NR_Linux + 252) +#define __NR_restart_syscall (__NR_Linux + 253) +#define __NR_fadvise64 (__NR_Linux + 254) +#define __NR_statfs64 (__NR_Linux + 255) +#define __NR_fstatfs64 (__NR_Linux + 256) +#define __NR_timer_create (__NR_Linux + 257) +#define __NR_timer_settime (__NR_Linux + 258) +#define __NR_timer_gettime (__NR_Linux + 259) +#define __NR_timer_getoverrun (__NR_Linux + 260) +#define __NR_timer_delete (__NR_Linux + 261) +#define __NR_clock_settime (__NR_Linux + 262) +#define __NR_clock_gettime (__NR_Linux + 263) +#define __NR_clock_getres (__NR_Linux + 264) +#define __NR_clock_nanosleep (__NR_Linux + 265) +#define __NR_tgkill (__NR_Linux + 266) +#define __NR_utimes (__NR_Linux + 267) +#define __NR_mbind (__NR_Linux + 268) +#define __NR_get_mempolicy (__NR_Linux + 269) +#define __NR_set_mempolicy (__NR_Linux + 270) +#define __NR_mq_open (__NR_Linux + 271) +#define __NR_mq_unlink (__NR_Linux + 272) +#define __NR_mq_timedsend (__NR_Linux + 273) +#define __NR_mq_timedreceive (__NR_Linux + 274) +#define __NR_mq_notify (__NR_Linux + 275) +#define __NR_mq_getsetattr (__NR_Linux + 276) +#define __NR_vserver (__NR_Linux + 277) +#define __NR_waitid (__NR_Linux + 278) +#define __NR_add_key (__NR_Linux + 280) +#define __NR_request_key (__NR_Linux + 281) +#define __NR_keyctl (__NR_Linux + 282) +#define __NR_set_thread_area (__NR_Linux + 283) +#define __NR_inotify_init (__NR_Linux + 284) +#define __NR_inotify_add_watch (__NR_Linux + 285) +#define __NR_inotify_rm_watch (__NR_Linux + 286) +#define __NR_migrate_pages (__NR_Linux + 287) +#define __NR_openat (__NR_Linux + 288) +#define __NR_mkdirat (__NR_Linux + 289) +#define __NR_mknodat (__NR_Linux + 290) +#define __NR_fchownat (__NR_Linux + 291) +#define __NR_futimesat (__NR_Linux + 292) +#define __NR_fstatat64 (__NR_Linux + 293) +#define __NR_unlinkat (__NR_Linux + 294) +#define __NR_renameat (__NR_Linux + 295) +#define __NR_linkat (__NR_Linux + 296) +#define __NR_symlinkat (__NR_Linux + 297) +#define __NR_readlinkat (__NR_Linux + 298) +#define __NR_fchmodat (__NR_Linux + 299) +#define __NR_faccessat (__NR_Linux + 300) +#define __NR_pselect6 (__NR_Linux + 301) +#define __NR_ppoll (__NR_Linux + 302) +#define __NR_unshare (__NR_Linux + 303) +#define __NR_splice (__NR_Linux + 304) +#define __NR_sync_file_range (__NR_Linux + 305) +#define __NR_tee (__NR_Linux + 306) +#define __NR_vmsplice (__NR_Linux + 307) +#define __NR_move_pages (__NR_Linux + 308) +#define __NR_set_robust_list (__NR_Linux + 309) +#define __NR_get_robust_list (__NR_Linux + 310) +#define __NR_kexec_load (__NR_Linux + 311) +#define __NR_getcpu (__NR_Linux + 312) +#define __NR_epoll_pwait (__NR_Linux + 313) +#define __NR_ioprio_set (__NR_Linux + 314) +#define __NR_ioprio_get (__NR_Linux + 315) +#define __NR_utimensat (__NR_Linux + 316) +#define __NR_signalfd (__NR_Linux + 317) +#define __NR_timerfd (__NR_Linux + 318) +#define __NR_eventfd (__NR_Linux + 319) +#define __NR_fallocate (__NR_Linux + 320) +#define __NR_timerfd_create (__NR_Linux + 321) +#define __NR_timerfd_gettime (__NR_Linux + 322) +#define __NR_timerfd_settime (__NR_Linux + 323) +#define __NR_signalfd4 (__NR_Linux + 324) +#define __NR_eventfd2 (__NR_Linux + 325) +#define __NR_epoll_create1 (__NR_Linux + 326) +#define __NR_dup3 (__NR_Linux + 327) +#define __NR_pipe2 (__NR_Linux + 328) +#define __NR_inotify_init1 (__NR_Linux + 329) +#define __NR_preadv (__NR_Linux + 330) +#define __NR_pwritev (__NR_Linux + 331) +#define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) +#define __NR_perf_event_open (__NR_Linux + 333) +#define __NR_accept4 (__NR_Linux + 334) +#define __NR_recvmmsg (__NR_Linux + 335) +#define __NR_fanotify_init (__NR_Linux + 336) +#define __NR_fanotify_mark (__NR_Linux + 337) +#define __NR_prlimit64 (__NR_Linux + 338) +#define __NR_name_to_handle_at (__NR_Linux + 339) +#define __NR_open_by_handle_at (__NR_Linux + 340) +#define __NR_clock_adjtime (__NR_Linux + 341) +#define __NR_syncfs (__NR_Linux + 342) +#define __NR_sendmmsg (__NR_Linux + 343) +#define __NR_setns (__NR_Linux + 344) +#define __NR_process_vm_readv (__NR_Linux + 345) +#define __NR_process_vm_writev (__NR_Linux + 346) +#define __NR_kcmp (__NR_Linux + 347) +#define __NR_finit_module (__NR_Linux + 348) +#define __NR_sched_setattr (__NR_Linux + 349) +#define __NR_sched_getattr (__NR_Linux + 350) +#define __NR_renameat2 (__NR_Linux + 351) +#define __NR_seccomp (__NR_Linux + 352) +#define __NR_getrandom (__NR_Linux + 353) +#define __NR_memfd_create (__NR_Linux + 354) +#define __NR_bpf (__NR_Linux + 355) +#define __NR_execveat (__NR_Linux + 356) +#define __NR_userfaultfd (__NR_Linux + 357) +#define __NR_membarrier (__NR_Linux + 358) +#define __NR_mlock2 (__NR_Linux + 359) +#define __NR_copy_file_range (__NR_Linux + 360) +#define __NR_preadv2 (__NR_Linux + 361) +#define __NR_pwritev2 (__NR_Linux + 362) +#define __NR_pkey_mprotect (__NR_Linux + 363) +#define __NR_pkey_alloc (__NR_Linux + 364) +#define __NR_pkey_free (__NR_Linux + 365) +#define __NR_statx (__NR_Linux + 366) +#define __NR_rseq (__NR_Linux + 367) +#define __NR_io_pgetevents (__NR_Linux + 368) +#define __NR_semget (__NR_Linux + 393) +#define __NR_semctl (__NR_Linux + 394) +#define __NR_shmget (__NR_Linux + 395) +#define __NR_shmctl (__NR_Linux + 396) +#define __NR_shmat (__NR_Linux + 397) +#define __NR_shmdt (__NR_Linux + 398) +#define __NR_msgget (__NR_Linux + 399) +#define __NR_msgsnd (__NR_Linux + 400) +#define __NR_msgrcv (__NR_Linux + 401) +#define __NR_msgctl (__NR_Linux + 402) +#define __NR_clock_gettime64 (__NR_Linux + 403) +#define __NR_clock_settime64 (__NR_Linux + 404) +#define __NR_clock_adjtime64 (__NR_Linux + 405) +#define __NR_clock_getres_time64 (__NR_Linux + 406) +#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) +#define __NR_timer_gettime64 (__NR_Linux + 408) +#define __NR_timer_settime64 (__NR_Linux + 409) +#define __NR_timerfd_gettime64 (__NR_Linux + 410) +#define __NR_timerfd_settime64 (__NR_Linux + 411) +#define __NR_utimensat_time64 (__NR_Linux + 412) +#define __NR_pselect6_time64 (__NR_Linux + 413) +#define __NR_ppoll_time64 (__NR_Linux + 414) +#define __NR_io_pgetevents_time64 (__NR_Linux + 416) +#define __NR_recvmmsg_time64 (__NR_Linux + 417) +#define __NR_mq_timedsend_time64 (__NR_Linux + 418) +#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) +#define __NR_semtimedop_time64 (__NR_Linux + 420) +#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) +#define __NR_futex_time64 (__NR_Linux + 422) +#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) +#define __NR_pidfd_send_signal (__NR_Linux + 424) +#define __NR_io_uring_setup (__NR_Linux + 425) +#define __NR_io_uring_enter (__NR_Linux + 426) +#define __NR_io_uring_register (__NR_Linux + 427) +#define __NR_open_tree (__NR_Linux + 428) +#define __NR_move_mount (__NR_Linux + 429) +#define __NR_fsopen (__NR_Linux + 430) +#define __NR_fsconfig (__NR_Linux + 431) +#define __NR_fsmount (__NR_Linux + 432) +#define __NR_fspick (__NR_Linux + 433) +#define __NR_pidfd_open (__NR_Linux + 434) +#define __NR_clone3 (__NR_Linux + 435) +#define __NR_close_range (__NR_Linux + 436) +#define __NR_openat2 (__NR_Linux + 437) +#define __NR_pidfd_getfd (__NR_Linux + 438) +#define __NR_faccessat2 (__NR_Linux + 439) +#define __NR_process_madvise (__NR_Linux + 440) +#define __NR_epoll_pwait2 (__NR_Linux + 441) +#define __NR_mount_setattr (__NR_Linux + 442) +#define __NR_quotactl_fd (__NR_Linux + 443) +#define __NR_landlock_create_ruleset (__NR_Linux + 444) +#define __NR_landlock_add_rule (__NR_Linux + 445) +#define __NR_landlock_restrict_self (__NR_Linux + 446) - -#endif /* _ASM_MIPS_UNISTD_O32_H */ +#endif /* _ASM_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index c3af3f324c5..9f18fa090f1 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) +#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) +#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index 4624c900436..cd5a8a41b26 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -1,429 +1,434 @@ -#ifndef _ASM_POWERPC_UNISTD_32_H -#define _ASM_POWERPC_UNISTD_32_H +#ifndef _ASM_UNISTD_32_H +#define _ASM_UNISTD_32_H -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lchown 16 -#define __NR_break 17 -#define __NR_oldstat 18 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_oldfstat 28 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_stty 31 -#define __NR_gtty 32 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_ftime 35 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_prof 44 -#define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_signal 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_lock 53 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_mpx 56 -#define __NR_setpgid 57 -#define __NR_ulimit 58 -#define __NR_oldolduname 59 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sgetmask 68 -#define __NR_ssetmask 69 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_select 82 -#define __NR_symlink 83 -#define __NR_oldlstat 84 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_profil 98 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_ioperm 101 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_olduname 109 -#define __NR_iopl 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_vm86 113 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_modify_ldt 123 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR_getdents 141 -#define __NR__newselect 142 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_query_module 166 -#define __NR_poll 167 -#define __NR_nfsservctl 168 -#define __NR_setresgid 169 -#define __NR_getresgid 170 -#define __NR_prctl 171 -#define __NR_rt_sigreturn 172 -#define __NR_rt_sigaction 173 -#define __NR_rt_sigprocmask 174 -#define __NR_rt_sigpending 175 -#define __NR_rt_sigtimedwait 176 -#define __NR_rt_sigqueueinfo 177 -#define __NR_rt_sigsuspend 178 -#define __NR_pread64 179 -#define __NR_pwrite64 180 -#define __NR_chown 181 -#define __NR_getcwd 182 -#define __NR_capget 183 -#define __NR_capset 184 -#define __NR_sigaltstack 185 -#define __NR_sendfile 186 -#define __NR_getpmsg 187 -#define __NR_putpmsg 188 -#define __NR_vfork 189 -#define __NR_ugetrlimit 190 -#define __NR_readahead 191 -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_pciconfig_read 198 -#define __NR_pciconfig_write 199 -#define __NR_pciconfig_iobase 200 -#define __NR_multiplexer 201 -#define __NR_getdents64 202 -#define __NR_pivot_root 203 -#define __NR_fcntl64 204 -#define __NR_madvise 205 -#define __NR_mincore 206 -#define __NR_gettid 207 -#define __NR_tkill 208 -#define __NR_setxattr 209 -#define __NR_lsetxattr 210 -#define __NR_fsetxattr 211 -#define __NR_getxattr 212 -#define __NR_lgetxattr 213 -#define __NR_fgetxattr 214 -#define __NR_listxattr 215 -#define __NR_llistxattr 216 -#define __NR_flistxattr 217 -#define __NR_removexattr 218 -#define __NR_lremovexattr 219 -#define __NR_fremovexattr 220 -#define __NR_futex 221 -#define __NR_sched_setaffinity 222 -#define __NR_sched_getaffinity 223 -#define __NR_tuxcall 225 -#define __NR_sendfile64 226 -#define __NR_io_setup 227 -#define __NR_io_destroy 228 -#define __NR_io_getevents 229 -#define __NR_io_submit 230 -#define __NR_io_cancel 231 -#define __NR_set_tid_address 232 -#define __NR_fadvise64 233 -#define __NR_exit_group 234 -#define __NR_lookup_dcookie 235 -#define __NR_epoll_create 236 -#define __NR_epoll_ctl 237 -#define __NR_epoll_wait 238 -#define __NR_remap_file_pages 239 -#define __NR_timer_create 240 -#define __NR_timer_settime 241 -#define __NR_timer_gettime 242 -#define __NR_timer_getoverrun 243 -#define __NR_timer_delete 244 -#define __NR_clock_settime 245 -#define __NR_clock_gettime 246 -#define __NR_clock_getres 247 -#define __NR_clock_nanosleep 248 -#define __NR_swapcontext 249 -#define __NR_tgkill 250 -#define __NR_utimes 251 -#define __NR_statfs64 252 -#define __NR_fstatfs64 253 -#define __NR_fadvise64_64 254 -#define __NR_rtas 255 -#define __NR_sys_debug_setcontext 256 -#define __NR_migrate_pages 258 -#define __NR_mbind 259 -#define __NR_get_mempolicy 260 -#define __NR_set_mempolicy 261 -#define __NR_mq_open 262 -#define __NR_mq_unlink 263 -#define __NR_mq_timedsend 264 -#define __NR_mq_timedreceive 265 -#define __NR_mq_notify 266 -#define __NR_mq_getsetattr 267 -#define __NR_kexec_load 268 -#define __NR_add_key 269 -#define __NR_request_key 270 -#define __NR_keyctl 271 -#define __NR_waitid 272 -#define __NR_ioprio_set 273 -#define __NR_ioprio_get 274 -#define __NR_inotify_init 275 -#define __NR_inotify_add_watch 276 -#define __NR_inotify_rm_watch 277 -#define __NR_spu_run 278 -#define __NR_spu_create 279 -#define __NR_pselect6 280 -#define __NR_ppoll 281 -#define __NR_unshare 282 -#define __NR_splice 283 -#define __NR_tee 284 -#define __NR_vmsplice 285 -#define __NR_openat 286 -#define __NR_mkdirat 287 -#define __NR_mknodat 288 -#define __NR_fchownat 289 -#define __NR_futimesat 290 -#define __NR_fstatat64 291 -#define __NR_unlinkat 292 -#define __NR_renameat 293 -#define __NR_linkat 294 -#define __NR_symlinkat 295 -#define __NR_readlinkat 296 -#define __NR_fchmodat 297 -#define __NR_faccessat 298 -#define __NR_get_robust_list 299 -#define __NR_set_robust_list 300 -#define __NR_move_pages 301 -#define __NR_getcpu 302 -#define __NR_epoll_pwait 303 -#define __NR_utimensat 304 -#define __NR_signalfd 305 -#define __NR_timerfd_create 306 -#define __NR_eventfd 307 -#define __NR_sync_file_range2 308 -#define __NR_fallocate 309 -#define __NR_subpage_prot 310 -#define __NR_timerfd_settime 311 -#define __NR_timerfd_gettime 312 -#define __NR_signalfd4 313 -#define __NR_eventfd2 314 -#define __NR_epoll_create1 315 -#define __NR_dup3 316 -#define __NR_pipe2 317 -#define __NR_inotify_init1 318 -#define __NR_perf_event_open 319 -#define __NR_preadv 320 -#define __NR_pwritev 321 -#define __NR_rt_tgsigqueueinfo 322 -#define __NR_fanotify_init 323 -#define __NR_fanotify_mark 324 -#define __NR_prlimit64 325 -#define __NR_socket 326 -#define __NR_bind 327 -#define __NR_connect 328 -#define __NR_listen 329 -#define __NR_accept 330 -#define __NR_getsockname 331 -#define __NR_getpeername 332 -#define __NR_socketpair 333 -#define __NR_send 334 -#define __NR_sendto 335 -#define __NR_recv 336 -#define __NR_recvfrom 337 -#define __NR_shutdown 338 -#define __NR_setsockopt 339 -#define __NR_getsockopt 340 -#define __NR_sendmsg 341 -#define __NR_recvmsg 342 -#define __NR_recvmmsg 343 -#define __NR_accept4 344 -#define __NR_name_to_handle_at 345 -#define __NR_open_by_handle_at 346 -#define __NR_clock_adjtime 347 -#define __NR_syncfs 348 -#define __NR_sendmmsg 349 -#define __NR_setns 350 -#define __NR_process_vm_readv 351 -#define __NR_process_vm_writev 352 -#define __NR_finit_module 353 -#define __NR_kcmp 354 -#define __NR_sched_setattr 355 -#define __NR_sched_getattr 356 -#define __NR_renameat2 357 -#define __NR_seccomp 358 -#define __NR_getrandom 359 -#define __NR_memfd_create 360 -#define __NR_bpf 361 -#define __NR_execveat 362 -#define __NR_switch_endian 363 -#define __NR_userfaultfd 364 -#define __NR_membarrier 365 -#define __NR_mlock2 378 -#define __NR_copy_file_range 379 -#define __NR_preadv2 380 -#define __NR_pwritev2 381 -#define __NR_kexec_file_load 382 -#define __NR_statx 383 -#define __NR_pkey_alloc 384 -#define __NR_pkey_free 385 -#define __NR_pkey_mprotect 386 -#define __NR_rseq 387 -#define __NR_io_pgetevents 388 -#define __NR_semget 393 -#define __NR_semctl 394 -#define __NR_shmget 395 -#define __NR_shmctl 396 -#define __NR_shmat 397 -#define __NR_shmdt 398 -#define __NR_msgget 399 -#define __NR_msgsnd 400 -#define __NR_msgrcv 401 -#define __NR_msgctl 402 -#define __NR_clock_gettime64 403 -#define __NR_clock_settime64 404 -#define __NR_clock_adjtime64 405 -#define __NR_clock_getres_time64 406 -#define __NR_clock_nanosleep_time64 407 -#define __NR_timer_gettime64 408 -#define __NR_timer_settime64 409 -#define __NR_timerfd_gettime64 410 -#define __NR_timerfd_settime64 411 -#define __NR_utimensat_time64 412 -#define __NR_pselect6_time64 413 -#define __NR_ppoll_time64 414 -#define __NR_io_pgetevents_time64 416 -#define __NR_recvmmsg_time64 417 -#define __NR_mq_timedsend_time64 418 -#define __NR_mq_timedreceive_time64 419 -#define __NR_semtimedop_time64 420 -#define __NR_rt_sigtimedwait_time64 421 -#define __NR_futex_time64 422 -#define __NR_sched_rr_get_interval_time64 423 -#define __NR_pidfd_send_signal 424 -#define __NR_io_uring_setup 425 -#define __NR_io_uring_enter 426 -#define __NR_io_uring_register 427 -#define __NR_open_tree 428 -#define __NR_move_mount 429 -#define __NR_fsopen 430 -#define __NR_fsconfig 431 -#define __NR_fsmount 432 -#define __NR_fspick 433 -#define __NR_pidfd_open 434 -#define __NR_clone3 435 -#define __NR_close_range 436 -#define __NR_openat2 437 -#define __NR_pidfd_getfd 438 -#define __NR_faccessat2 439 -#define __NR_process_madvise 440 -#define __NR_epoll_pwait2 441 +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_query_module 166 +#define __NR_poll 167 +#define __NR_nfsservctl 168 +#define __NR_setresgid 169 +#define __NR_getresgid 170 +#define __NR_prctl 171 +#define __NR_rt_sigreturn 172 +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigpending 175 +#define __NR_rt_sigtimedwait 176 +#define __NR_rt_sigqueueinfo 177 +#define __NR_rt_sigsuspend 178 +#define __NR_pread64 179 +#define __NR_pwrite64 180 +#define __NR_chown 181 +#define __NR_getcwd 182 +#define __NR_capget 183 +#define __NR_capset 184 +#define __NR_sigaltstack 185 +#define __NR_sendfile 186 +#define __NR_getpmsg 187 +#define __NR_putpmsg 188 +#define __NR_vfork 189 +#define __NR_ugetrlimit 190 +#define __NR_readahead 191 +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_pciconfig_read 198 +#define __NR_pciconfig_write 199 +#define __NR_pciconfig_iobase 200 +#define __NR_multiplexer 201 +#define __NR_getdents64 202 +#define __NR_pivot_root 203 +#define __NR_fcntl64 204 +#define __NR_madvise 205 +#define __NR_mincore 206 +#define __NR_gettid 207 +#define __NR_tkill 208 +#define __NR_setxattr 209 +#define __NR_lsetxattr 210 +#define __NR_fsetxattr 211 +#define __NR_getxattr 212 +#define __NR_lgetxattr 213 +#define __NR_fgetxattr 214 +#define __NR_listxattr 215 +#define __NR_llistxattr 216 +#define __NR_flistxattr 217 +#define __NR_removexattr 218 +#define __NR_lremovexattr 219 +#define __NR_fremovexattr 220 +#define __NR_futex 221 +#define __NR_sched_setaffinity 222 +#define __NR_sched_getaffinity 223 +#define __NR_tuxcall 225 +#define __NR_sendfile64 226 +#define __NR_io_setup 227 +#define __NR_io_destroy 228 +#define __NR_io_getevents 229 +#define __NR_io_submit 230 +#define __NR_io_cancel 231 +#define __NR_set_tid_address 232 +#define __NR_fadvise64 233 +#define __NR_exit_group 234 +#define __NR_lookup_dcookie 235 +#define __NR_epoll_create 236 +#define __NR_epoll_ctl 237 +#define __NR_epoll_wait 238 +#define __NR_remap_file_pages 239 +#define __NR_timer_create 240 +#define __NR_timer_settime 241 +#define __NR_timer_gettime 242 +#define __NR_timer_getoverrun 243 +#define __NR_timer_delete 244 +#define __NR_clock_settime 245 +#define __NR_clock_gettime 246 +#define __NR_clock_getres 247 +#define __NR_clock_nanosleep 248 +#define __NR_swapcontext 249 +#define __NR_tgkill 250 +#define __NR_utimes 251 +#define __NR_statfs64 252 +#define __NR_fstatfs64 253 +#define __NR_fadvise64_64 254 +#define __NR_rtas 255 +#define __NR_sys_debug_setcontext 256 +#define __NR_migrate_pages 258 +#define __NR_mbind 259 +#define __NR_get_mempolicy 260 +#define __NR_set_mempolicy 261 +#define __NR_mq_open 262 +#define __NR_mq_unlink 263 +#define __NR_mq_timedsend 264 +#define __NR_mq_timedreceive 265 +#define __NR_mq_notify 266 +#define __NR_mq_getsetattr 267 +#define __NR_kexec_load 268 +#define __NR_add_key 269 +#define __NR_request_key 270 +#define __NR_keyctl 271 +#define __NR_waitid 272 +#define __NR_ioprio_set 273 +#define __NR_ioprio_get 274 +#define __NR_inotify_init 275 +#define __NR_inotify_add_watch 276 +#define __NR_inotify_rm_watch 277 +#define __NR_spu_run 278 +#define __NR_spu_create 279 +#define __NR_pselect6 280 +#define __NR_ppoll 281 +#define __NR_unshare 282 +#define __NR_splice 283 +#define __NR_tee 284 +#define __NR_vmsplice 285 +#define __NR_openat 286 +#define __NR_mkdirat 287 +#define __NR_mknodat 288 +#define __NR_fchownat 289 +#define __NR_futimesat 290 +#define __NR_fstatat64 291 +#define __NR_unlinkat 292 +#define __NR_renameat 293 +#define __NR_linkat 294 +#define __NR_symlinkat 295 +#define __NR_readlinkat 296 +#define __NR_fchmodat 297 +#define __NR_faccessat 298 +#define __NR_get_robust_list 299 +#define __NR_set_robust_list 300 +#define __NR_move_pages 301 +#define __NR_getcpu 302 +#define __NR_epoll_pwait 303 +#define __NR_utimensat 304 +#define __NR_signalfd 305 +#define __NR_timerfd_create 306 +#define __NR_eventfd 307 +#define __NR_sync_file_range2 308 +#define __NR_fallocate 309 +#define __NR_subpage_prot 310 +#define __NR_timerfd_settime 311 +#define __NR_timerfd_gettime 312 +#define __NR_signalfd4 313 +#define __NR_eventfd2 314 +#define __NR_epoll_create1 315 +#define __NR_dup3 316 +#define __NR_pipe2 317 +#define __NR_inotify_init1 318 +#define __NR_perf_event_open 319 +#define __NR_preadv 320 +#define __NR_pwritev 321 +#define __NR_rt_tgsigqueueinfo 322 +#define __NR_fanotify_init 323 +#define __NR_fanotify_mark 324 +#define __NR_prlimit64 325 +#define __NR_socket 326 +#define __NR_bind 327 +#define __NR_connect 328 +#define __NR_listen 329 +#define __NR_accept 330 +#define __NR_getsockname 331 +#define __NR_getpeername 332 +#define __NR_socketpair 333 +#define __NR_send 334 +#define __NR_sendto 335 +#define __NR_recv 336 +#define __NR_recvfrom 337 +#define __NR_shutdown 338 +#define __NR_setsockopt 339 +#define __NR_getsockopt 340 +#define __NR_sendmsg 341 +#define __NR_recvmsg 342 +#define __NR_recvmmsg 343 +#define __NR_accept4 344 +#define __NR_name_to_handle_at 345 +#define __NR_open_by_handle_at 346 +#define __NR_clock_adjtime 347 +#define __NR_syncfs 348 +#define __NR_sendmmsg 349 +#define __NR_setns 350 +#define __NR_process_vm_readv 351 +#define __NR_process_vm_writev 352 +#define __NR_finit_module 353 +#define __NR_kcmp 354 +#define __NR_sched_setattr 355 +#define __NR_sched_getattr 356 +#define __NR_renameat2 357 +#define __NR_seccomp 358 +#define __NR_getrandom 359 +#define __NR_memfd_create 360 +#define __NR_bpf 361 +#define __NR_execveat 362 +#define __NR_switch_endian 363 +#define __NR_userfaultfd 364 +#define __NR_membarrier 365 +#define __NR_mlock2 378 +#define __NR_copy_file_range 379 +#define __NR_preadv2 380 +#define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 +#define __NR_statx 383 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 +#define __NR_pkey_mprotect 386 +#define __NR_rseq 387 +#define __NR_io_pgetevents 388 +#define __NR_semget 393 +#define __NR_semctl 394 +#define __NR_shmget 395 +#define __NR_shmctl 396 +#define __NR_shmat 397 +#define __NR_shmdt 398 +#define __NR_msgget 399 +#define __NR_msgsnd 400 +#define __NR_msgrcv 401 +#define __NR_msgctl 402 +#define __NR_clock_gettime64 403 +#define __NR_clock_settime64 404 +#define __NR_clock_adjtime64 405 +#define __NR_clock_getres_time64 406 +#define __NR_clock_nanosleep_time64 407 +#define __NR_timer_gettime64 408 +#define __NR_timer_settime64 409 +#define __NR_timerfd_gettime64 410 +#define __NR_timerfd_settime64 411 +#define __NR_utimensat_time64 412 +#define __NR_pselect6_time64 413 +#define __NR_ppoll_time64 414 +#define __NR_io_pgetevents_time64 416 +#define __NR_recvmmsg_time64 417 +#define __NR_mq_timedsend_time64 418 +#define __NR_mq_timedreceive_time64 419 +#define __NR_semtimedop_time64 420 +#define __NR_rt_sigtimedwait_time64 421 +#define __NR_futex_time64 422 +#define __NR_sched_rr_get_interval_time64 423 +#define __NR_pidfd_send_signal 424 +#define __NR_io_uring_setup 425 +#define __NR_io_uring_enter 426 +#define __NR_io_uring_register 427 +#define __NR_open_tree 428 +#define __NR_move_mount 429 +#define __NR_fsopen 430 +#define __NR_fsconfig 431 +#define __NR_fsmount 432 +#define __NR_fspick 433 +#define __NR_pidfd_open 434 +#define __NR_clone3 435 +#define __NR_close_range 436 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 +#define __NR_process_madvise 440 +#define __NR_epoll_pwait2 441 +#define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 +#define __NR_landlock_create_ruleset 444 +#define __NR_landlock_add_rule 445 +#define __NR_landlock_restrict_self 446 -#endif /* _ASM_POWERPC_UNISTD_32_H */ +#endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index 7e851b30bb1..8458effa8d8 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -1,401 +1,406 @@ -#ifndef _ASM_POWERPC_UNISTD_64_H -#define _ASM_POWERPC_UNISTD_64_H +#ifndef _ASM_UNISTD_64_H +#define _ASM_UNISTD_64_H -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lchown 16 -#define __NR_break 17 -#define __NR_oldstat 18 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_oldfstat 28 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_stty 31 -#define __NR_gtty 32 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_ftime 35 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_prof 44 -#define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_signal 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_lock 53 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_mpx 56 -#define __NR_setpgid 57 -#define __NR_ulimit 58 -#define __NR_oldolduname 59 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sgetmask 68 -#define __NR_ssetmask 69 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_select 82 -#define __NR_symlink 83 -#define __NR_oldlstat 84 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_profil 98 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_ioperm 101 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_olduname 109 -#define __NR_iopl 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_vm86 113 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_modify_ldt 123 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR_getdents 141 -#define __NR__newselect 142 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_query_module 166 -#define __NR_poll 167 -#define __NR_nfsservctl 168 -#define __NR_setresgid 169 -#define __NR_getresgid 170 -#define __NR_prctl 171 -#define __NR_rt_sigreturn 172 -#define __NR_rt_sigaction 173 -#define __NR_rt_sigprocmask 174 -#define __NR_rt_sigpending 175 -#define __NR_rt_sigtimedwait 176 -#define __NR_rt_sigqueueinfo 177 -#define __NR_rt_sigsuspend 178 -#define __NR_pread64 179 -#define __NR_pwrite64 180 -#define __NR_chown 181 -#define __NR_getcwd 182 -#define __NR_capget 183 -#define __NR_capset 184 -#define __NR_sigaltstack 185 -#define __NR_sendfile 186 -#define __NR_getpmsg 187 -#define __NR_putpmsg 188 -#define __NR_vfork 189 -#define __NR_ugetrlimit 190 -#define __NR_readahead 191 -#define __NR_pciconfig_read 198 -#define __NR_pciconfig_write 199 -#define __NR_pciconfig_iobase 200 -#define __NR_multiplexer 201 -#define __NR_getdents64 202 -#define __NR_pivot_root 203 -#define __NR_madvise 205 -#define __NR_mincore 206 -#define __NR_gettid 207 -#define __NR_tkill 208 -#define __NR_setxattr 209 -#define __NR_lsetxattr 210 -#define __NR_fsetxattr 211 -#define __NR_getxattr 212 -#define __NR_lgetxattr 213 -#define __NR_fgetxattr 214 -#define __NR_listxattr 215 -#define __NR_llistxattr 216 -#define __NR_flistxattr 217 -#define __NR_removexattr 218 -#define __NR_lremovexattr 219 -#define __NR_fremovexattr 220 -#define __NR_futex 221 -#define __NR_sched_setaffinity 222 -#define __NR_sched_getaffinity 223 -#define __NR_tuxcall 225 -#define __NR_io_setup 227 -#define __NR_io_destroy 228 -#define __NR_io_getevents 229 -#define __NR_io_submit 230 -#define __NR_io_cancel 231 -#define __NR_set_tid_address 232 -#define __NR_fadvise64 233 -#define __NR_exit_group 234 -#define __NR_lookup_dcookie 235 -#define __NR_epoll_create 236 -#define __NR_epoll_ctl 237 -#define __NR_epoll_wait 238 -#define __NR_remap_file_pages 239 -#define __NR_timer_create 240 -#define __NR_timer_settime 241 -#define __NR_timer_gettime 242 -#define __NR_timer_getoverrun 243 -#define __NR_timer_delete 244 -#define __NR_clock_settime 245 -#define __NR_clock_gettime 246 -#define __NR_clock_getres 247 -#define __NR_clock_nanosleep 248 -#define __NR_swapcontext 249 -#define __NR_tgkill 250 -#define __NR_utimes 251 -#define __NR_statfs64 252 -#define __NR_fstatfs64 253 -#define __NR_rtas 255 -#define __NR_sys_debug_setcontext 256 -#define __NR_migrate_pages 258 -#define __NR_mbind 259 -#define __NR_get_mempolicy 260 -#define __NR_set_mempolicy 261 -#define __NR_mq_open 262 -#define __NR_mq_unlink 263 -#define __NR_mq_timedsend 264 -#define __NR_mq_timedreceive 265 -#define __NR_mq_notify 266 -#define __NR_mq_getsetattr 267 -#define __NR_kexec_load 268 -#define __NR_add_key 269 -#define __NR_request_key 270 -#define __NR_keyctl 271 -#define __NR_waitid 272 -#define __NR_ioprio_set 273 -#define __NR_ioprio_get 274 -#define __NR_inotify_init 275 -#define __NR_inotify_add_watch 276 -#define __NR_inotify_rm_watch 277 -#define __NR_spu_run 278 -#define __NR_spu_create 279 -#define __NR_pselect6 280 -#define __NR_ppoll 281 -#define __NR_unshare 282 -#define __NR_splice 283 -#define __NR_tee 284 -#define __NR_vmsplice 285 -#define __NR_openat 286 -#define __NR_mkdirat 287 -#define __NR_mknodat 288 -#define __NR_fchownat 289 -#define __NR_futimesat 290 -#define __NR_newfstatat 291 -#define __NR_unlinkat 292 -#define __NR_renameat 293 -#define __NR_linkat 294 -#define __NR_symlinkat 295 -#define __NR_readlinkat 296 -#define __NR_fchmodat 297 -#define __NR_faccessat 298 -#define __NR_get_robust_list 299 -#define __NR_set_robust_list 300 -#define __NR_move_pages 301 -#define __NR_getcpu 302 -#define __NR_epoll_pwait 303 -#define __NR_utimensat 304 -#define __NR_signalfd 305 -#define __NR_timerfd_create 306 -#define __NR_eventfd 307 -#define __NR_sync_file_range2 308 -#define __NR_fallocate 309 -#define __NR_subpage_prot 310 -#define __NR_timerfd_settime 311 -#define __NR_timerfd_gettime 312 -#define __NR_signalfd4 313 -#define __NR_eventfd2 314 -#define __NR_epoll_create1 315 -#define __NR_dup3 316 -#define __NR_pipe2 317 -#define __NR_inotify_init1 318 -#define __NR_perf_event_open 319 -#define __NR_preadv 320 -#define __NR_pwritev 321 -#define __NR_rt_tgsigqueueinfo 322 -#define __NR_fanotify_init 323 -#define __NR_fanotify_mark 324 -#define __NR_prlimit64 325 -#define __NR_socket 326 -#define __NR_bind 327 -#define __NR_connect 328 -#define __NR_listen 329 -#define __NR_accept 330 -#define __NR_getsockname 331 -#define __NR_getpeername 332 -#define __NR_socketpair 333 -#define __NR_send 334 -#define __NR_sendto 335 -#define __NR_recv 336 -#define __NR_recvfrom 337 -#define __NR_shutdown 338 -#define __NR_setsockopt 339 -#define __NR_getsockopt 340 -#define __NR_sendmsg 341 -#define __NR_recvmsg 342 -#define __NR_recvmmsg 343 -#define __NR_accept4 344 -#define __NR_name_to_handle_at 345 -#define __NR_open_by_handle_at 346 -#define __NR_clock_adjtime 347 -#define __NR_syncfs 348 -#define __NR_sendmmsg 349 -#define __NR_setns 350 -#define __NR_process_vm_readv 351 -#define __NR_process_vm_writev 352 -#define __NR_finit_module 353 -#define __NR_kcmp 354 -#define __NR_sched_setattr 355 -#define __NR_sched_getattr 356 -#define __NR_renameat2 357 -#define __NR_seccomp 358 -#define __NR_getrandom 359 -#define __NR_memfd_create 360 -#define __NR_bpf 361 -#define __NR_execveat 362 -#define __NR_switch_endian 363 -#define __NR_userfaultfd 364 -#define __NR_membarrier 365 -#define __NR_mlock2 378 -#define __NR_copy_file_range 379 -#define __NR_preadv2 380 -#define __NR_pwritev2 381 -#define __NR_kexec_file_load 382 -#define __NR_statx 383 -#define __NR_pkey_alloc 384 -#define __NR_pkey_free 385 -#define __NR_pkey_mprotect 386 -#define __NR_rseq 387 -#define __NR_io_pgetevents 388 -#define __NR_semtimedop 392 -#define __NR_semget 393 -#define __NR_semctl 394 -#define __NR_shmget 395 -#define __NR_shmctl 396 -#define __NR_shmat 397 -#define __NR_shmdt 398 -#define __NR_msgget 399 -#define __NR_msgsnd 400 -#define __NR_msgrcv 401 -#define __NR_msgctl 402 -#define __NR_pidfd_send_signal 424 -#define __NR_io_uring_setup 425 -#define __NR_io_uring_enter 426 -#define __NR_io_uring_register 427 -#define __NR_open_tree 428 -#define __NR_move_mount 429 -#define __NR_fsopen 430 -#define __NR_fsconfig 431 -#define __NR_fsmount 432 -#define __NR_fspick 433 -#define __NR_pidfd_open 434 -#define __NR_clone3 435 -#define __NR_close_range 436 -#define __NR_openat2 437 -#define __NR_pidfd_getfd 438 -#define __NR_faccessat2 439 -#define __NR_process_madvise 440 -#define __NR_epoll_pwait2 441 +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_query_module 166 +#define __NR_poll 167 +#define __NR_nfsservctl 168 +#define __NR_setresgid 169 +#define __NR_getresgid 170 +#define __NR_prctl 171 +#define __NR_rt_sigreturn 172 +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigpending 175 +#define __NR_rt_sigtimedwait 176 +#define __NR_rt_sigqueueinfo 177 +#define __NR_rt_sigsuspend 178 +#define __NR_pread64 179 +#define __NR_pwrite64 180 +#define __NR_chown 181 +#define __NR_getcwd 182 +#define __NR_capget 183 +#define __NR_capset 184 +#define __NR_sigaltstack 185 +#define __NR_sendfile 186 +#define __NR_getpmsg 187 +#define __NR_putpmsg 188 +#define __NR_vfork 189 +#define __NR_ugetrlimit 190 +#define __NR_readahead 191 +#define __NR_pciconfig_read 198 +#define __NR_pciconfig_write 199 +#define __NR_pciconfig_iobase 200 +#define __NR_multiplexer 201 +#define __NR_getdents64 202 +#define __NR_pivot_root 203 +#define __NR_madvise 205 +#define __NR_mincore 206 +#define __NR_gettid 207 +#define __NR_tkill 208 +#define __NR_setxattr 209 +#define __NR_lsetxattr 210 +#define __NR_fsetxattr 211 +#define __NR_getxattr 212 +#define __NR_lgetxattr 213 +#define __NR_fgetxattr 214 +#define __NR_listxattr 215 +#define __NR_llistxattr 216 +#define __NR_flistxattr 217 +#define __NR_removexattr 218 +#define __NR_lremovexattr 219 +#define __NR_fremovexattr 220 +#define __NR_futex 221 +#define __NR_sched_setaffinity 222 +#define __NR_sched_getaffinity 223 +#define __NR_tuxcall 225 +#define __NR_io_setup 227 +#define __NR_io_destroy 228 +#define __NR_io_getevents 229 +#define __NR_io_submit 230 +#define __NR_io_cancel 231 +#define __NR_set_tid_address 232 +#define __NR_fadvise64 233 +#define __NR_exit_group 234 +#define __NR_lookup_dcookie 235 +#define __NR_epoll_create 236 +#define __NR_epoll_ctl 237 +#define __NR_epoll_wait 238 +#define __NR_remap_file_pages 239 +#define __NR_timer_create 240 +#define __NR_timer_settime 241 +#define __NR_timer_gettime 242 +#define __NR_timer_getoverrun 243 +#define __NR_timer_delete 244 +#define __NR_clock_settime 245 +#define __NR_clock_gettime 246 +#define __NR_clock_getres 247 +#define __NR_clock_nanosleep 248 +#define __NR_swapcontext 249 +#define __NR_tgkill 250 +#define __NR_utimes 251 +#define __NR_statfs64 252 +#define __NR_fstatfs64 253 +#define __NR_rtas 255 +#define __NR_sys_debug_setcontext 256 +#define __NR_migrate_pages 258 +#define __NR_mbind 259 +#define __NR_get_mempolicy 260 +#define __NR_set_mempolicy 261 +#define __NR_mq_open 262 +#define __NR_mq_unlink 263 +#define __NR_mq_timedsend 264 +#define __NR_mq_timedreceive 265 +#define __NR_mq_notify 266 +#define __NR_mq_getsetattr 267 +#define __NR_kexec_load 268 +#define __NR_add_key 269 +#define __NR_request_key 270 +#define __NR_keyctl 271 +#define __NR_waitid 272 +#define __NR_ioprio_set 273 +#define __NR_ioprio_get 274 +#define __NR_inotify_init 275 +#define __NR_inotify_add_watch 276 +#define __NR_inotify_rm_watch 277 +#define __NR_spu_run 278 +#define __NR_spu_create 279 +#define __NR_pselect6 280 +#define __NR_ppoll 281 +#define __NR_unshare 282 +#define __NR_splice 283 +#define __NR_tee 284 +#define __NR_vmsplice 285 +#define __NR_openat 286 +#define __NR_mkdirat 287 +#define __NR_mknodat 288 +#define __NR_fchownat 289 +#define __NR_futimesat 290 +#define __NR_newfstatat 291 +#define __NR_unlinkat 292 +#define __NR_renameat 293 +#define __NR_linkat 294 +#define __NR_symlinkat 295 +#define __NR_readlinkat 296 +#define __NR_fchmodat 297 +#define __NR_faccessat 298 +#define __NR_get_robust_list 299 +#define __NR_set_robust_list 300 +#define __NR_move_pages 301 +#define __NR_getcpu 302 +#define __NR_epoll_pwait 303 +#define __NR_utimensat 304 +#define __NR_signalfd 305 +#define __NR_timerfd_create 306 +#define __NR_eventfd 307 +#define __NR_sync_file_range2 308 +#define __NR_fallocate 309 +#define __NR_subpage_prot 310 +#define __NR_timerfd_settime 311 +#define __NR_timerfd_gettime 312 +#define __NR_signalfd4 313 +#define __NR_eventfd2 314 +#define __NR_epoll_create1 315 +#define __NR_dup3 316 +#define __NR_pipe2 317 +#define __NR_inotify_init1 318 +#define __NR_perf_event_open 319 +#define __NR_preadv 320 +#define __NR_pwritev 321 +#define __NR_rt_tgsigqueueinfo 322 +#define __NR_fanotify_init 323 +#define __NR_fanotify_mark 324 +#define __NR_prlimit64 325 +#define __NR_socket 326 +#define __NR_bind 327 +#define __NR_connect 328 +#define __NR_listen 329 +#define __NR_accept 330 +#define __NR_getsockname 331 +#define __NR_getpeername 332 +#define __NR_socketpair 333 +#define __NR_send 334 +#define __NR_sendto 335 +#define __NR_recv 336 +#define __NR_recvfrom 337 +#define __NR_shutdown 338 +#define __NR_setsockopt 339 +#define __NR_getsockopt 340 +#define __NR_sendmsg 341 +#define __NR_recvmsg 342 +#define __NR_recvmmsg 343 +#define __NR_accept4 344 +#define __NR_name_to_handle_at 345 +#define __NR_open_by_handle_at 346 +#define __NR_clock_adjtime 347 +#define __NR_syncfs 348 +#define __NR_sendmmsg 349 +#define __NR_setns 350 +#define __NR_process_vm_readv 351 +#define __NR_process_vm_writev 352 +#define __NR_finit_module 353 +#define __NR_kcmp 354 +#define __NR_sched_setattr 355 +#define __NR_sched_getattr 356 +#define __NR_renameat2 357 +#define __NR_seccomp 358 +#define __NR_getrandom 359 +#define __NR_memfd_create 360 +#define __NR_bpf 361 +#define __NR_execveat 362 +#define __NR_switch_endian 363 +#define __NR_userfaultfd 364 +#define __NR_membarrier 365 +#define __NR_mlock2 378 +#define __NR_copy_file_range 379 +#define __NR_preadv2 380 +#define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 +#define __NR_statx 383 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 +#define __NR_pkey_mprotect 386 +#define __NR_rseq 387 +#define __NR_io_pgetevents 388 +#define __NR_semtimedop 392 +#define __NR_semget 393 +#define __NR_semctl 394 +#define __NR_shmget 395 +#define __NR_shmctl 396 +#define __NR_shmat 397 +#define __NR_shmdt 398 +#define __NR_msgget 399 +#define __NR_msgsnd 400 +#define __NR_msgrcv 401 +#define __NR_msgctl 402 +#define __NR_pidfd_send_signal 424 +#define __NR_io_uring_setup 425 +#define __NR_io_uring_enter 426 +#define __NR_io_uring_register 427 +#define __NR_open_tree 428 +#define __NR_move_mount 429 +#define __NR_fsopen 430 +#define __NR_fsconfig 431 +#define __NR_fsmount 432 +#define __NR_fspick 433 +#define __NR_pidfd_open 434 +#define __NR_clone3 435 +#define __NR_close_range 436 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 +#define __NR_process_madvise 440 +#define __NR_epoll_pwait2 441 +#define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 +#define __NR_landlock_create_ruleset 444 +#define __NR_landlock_add_rule 445 +#define __NR_landlock_restrict_self 446 -#endif /* _ASM_POWERPC_UNISTD_64_H */ +#endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index c94d2c3a22d..0c3cd299e42 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -414,5 +414,10 @@ #define __NR_faccessat2 439 #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 +#define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 +#define __NR_landlock_create_ruleset 444 +#define __NR_landlock_add_rule 445 +#define __NR_landlock_restrict_self 446 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 984a06b7ebe..8dfc08b5e62 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -362,5 +362,10 @@ #define __NR_faccessat2 439 #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 +#define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 +#define __NR_landlock_create_ruleset 444 +#define __NR_landlock_add_rule 445 +#define __NR_landlock_restrict_self 446 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 8e76d3701db..a6c327f8ad9 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -112,6 +112,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -158,6 +159,19 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +struct kvm_sregs2 { + /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 flags; + __u64 pdptrs[4]; +}; +#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1 + /* for KVM_GET_FPU and KVM_SET_FPU */ struct kvm_fpu { __u8 fpr[8][16]; @@ -436,6 +450,8 @@ struct kvm_vmx_nested_state_hdr { __u16 flags; } smm; + __u16 pad; + __u32 flags; __u64 preemption_timer_deadline; }; diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 18fb99dfa28..66e96c0c685 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNISTD_32_H -#define _ASM_X86_UNISTD_32_H 1 +#ifndef _ASM_UNISTD_32_H +#define _ASM_UNISTD_32_H #define __NR_restart_syscall 0 #define __NR_exit 1 @@ -432,6 +432,11 @@ #define __NR_faccessat2 439 #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 +#define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 +#define __NR_landlock_create_ruleset 444 +#define __NR_landlock_add_rule 445 +#define __NR_landlock_restrict_self 446 -#endif /* _ASM_X86_UNISTD_32_H */ +#endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index bde959328d6..b8ff6f14ee8 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNISTD_64_H -#define _ASM_X86_UNISTD_64_H 1 +#ifndef _ASM_UNISTD_64_H +#define _ASM_UNISTD_64_H #define __NR_read 0 #define __NR_write 1 @@ -354,6 +354,11 @@ #define __NR_faccessat2 439 #define __NR_process_madvise 440 #define __NR_epoll_pwait2 441 +#define __NR_mount_setattr 442 +#define __NR_quotactl_fd 443 +#define __NR_landlock_create_ruleset 444 +#define __NR_landlock_add_rule 445 +#define __NR_landlock_restrict_self 446 -#endif /* _ASM_X86_UNISTD_64_H */ +#endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 4ff6b17d3bb..06a1097c15e 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNISTD_X32_H -#define _ASM_X86_UNISTD_X32_H 1 +#ifndef _ASM_UNISTD_X32_H +#define _ASM_UNISTD_X32_H #define __NR_read (__X32_SYSCALL_BIT + 0) #define __NR_write (__X32_SYSCALL_BIT + 1) @@ -307,6 +307,11 @@ #define __NR_faccessat2 (__X32_SYSCALL_BIT + 439) #define __NR_process_madvise (__X32_SYSCALL_BIT + 440) #define __NR_epoll_pwait2 (__X32_SYSCALL_BIT + 441) +#define __NR_mount_setattr (__X32_SYSCALL_BIT + 442) +#define __NR_quotactl_fd (__X32_SYSCALL_BIT + 443) +#define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444) +#define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445) +#define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) @@ -345,4 +350,4 @@ #define __NR_pwritev2 (__X32_SYSCALL_BIT + 547) -#endif /* _ASM_X86_UNISTD_X32_H */ +#endif /* _ASM_UNISTD_X32_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 020b62a619a..bcaf66cc4d2 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -8,6 +8,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ +#include #include #include @@ -216,6 +217,20 @@ struct kvm_hyperv_exit { } u; }; +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -251,6 +266,9 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -262,6 +280,9 @@ struct kvm_hyperv_exit { /* Encounter unexpected vm-exit reason */ #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 +/* Flags that describe what fields in emulation_failure hold valid data. */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -365,6 +386,25 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* + * KVM_INTERNAL_ERROR_EMULATION + * + * "struct emulation_failure" is an overlay of "struct internal" + * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of + * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error + * sub-types, this struct is ABI! It also needs to be backwards + * compatible with "struct internal". Take special care that + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + __u8 insn_size; + __u8 insn_bytes[15]; + } emulation_failure; /* KVM_EXIT_OSI */ struct { __u64 gprs[32]; @@ -427,6 +467,8 @@ struct kvm_run { __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; /* Fix the size of the union. */ char padding[256]; }; @@ -573,6 +615,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; @@ -1056,6 +1099,19 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 +#define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_HYPERV_ENFORCE_CPUID 199 +#define KVM_CAP_SREGS2 200 +#define KVM_CAP_EXIT_HYPERCALL 201 +#define KVM_CAP_PPC_RPT_INVALIDATE 202 +#define KVM_CAP_BINARY_STATS_FD 203 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 +#define KVM_CAP_ARM_MTE 205 #ifdef KVM_CAP_IRQ_ROUTING @@ -1129,6 +1185,11 @@ struct kvm_x86_mce { #endif #ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -1396,6 +1457,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) @@ -1563,6 +1625,60 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_DIRTY_LOG_RING */ #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) +/* Per-VM Xen attributes */ +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + struct { + __u64 gfn; + } shared_info; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 + +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) +#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1591,6 +1707,10 @@ enum sev_cmd_id { KVM_SEV_DBG_ENCRYPT, /* Guest certificates commands */ KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, KVM_SEV_NR_MAX, }; @@ -1643,6 +1763,51 @@ struct kvm_sev_dbg { __u32 len; }; +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1748,8 +1913,8 @@ struct kvm_hyperv_eventfd { * conversion after harvesting an entry. Also, it must not skip any * dirty bits, so that dirty bits are always harvested in sequence. */ -#define KVM_DIRTY_GFN_F_DIRTY BIT(0) -#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) +#define KVM_DIRTY_GFN_F_RESET _BITUL(1) #define KVM_DIRTY_GFN_F_MASK 0x3 /* @@ -1764,4 +1929,79 @@ struct kvm_dirty_gfn { __u64 offset; }; +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + +/** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. + * @name_size: The size in bytes of the memory which contains statistics + * name string including trailing '\0'. The memory is allocated + * at the send of statistics descriptor. + * @num_desc: The number of statistics the vm or vcpu has. + * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed + * by vm/vcpu stats fd. + * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file + * pointd by vm/vcpu stats fd. + * @data_offset: The offset of the vm/vcpu stats' data block in the file + * pointed by vm/vcpu stats fd. + * + * This is the header userspace needs to read from stats fd before any other + * readings. It is used by userspace to discover all the information about the + * vm/vcpu's binary statistics. + * Userspace reads this header from the start of the vm/vcpu's stats fd. + */ +struct kvm_stats_header { + __u32 flags; + __u32 name_size; + __u32 num_desc; + __u32 id_offset; + __u32 desc_offset; + __u32 data_offset; +}; + +#define KVM_STATS_TYPE_SHIFT 0 +#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK + +#define KVM_STATS_UNIT_SHIFT 4 +#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES + +#define KVM_STATS_BASE_SHIFT 8 +#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2 + +/** + * struct kvm_stats_desc - Descriptor of a KVM statistics. + * @flags: Annotations of the stats, like type, unit, etc. + * @exponent: Used together with @flags to determine the unit. + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in + * struture kvm or kvm_vcpu. + * @unused: Unused field for future usage. Always 0 for now. + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +struct kvm_stats_desc { + __u32 flags; + __s16 exponent; + __u16 size; + __u32 offset; + __u32 unused; + char name[]; +}; + +#define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + #endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 1ba9a9feeb8..8479af5f4c7 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -19,15 +19,20 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) +#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ + UFFDIO_REGISTER_MODE_WP | \ + UFFDIO_REGISTER_MODE_MINOR) #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ - UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ - UFFD_FEATURE_THREAD_ID) + UFFD_FEATURE_THREAD_ID | \ + UFFD_FEATURE_MINOR_HUGETLBFS | \ + UFFD_FEATURE_MINOR_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -36,10 +41,12 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE | \ - (__u64)1 << _UFFDIO_WRITEPROTECT) + (__u64)1 << _UFFDIO_WRITEPROTECT | \ + (__u64)1 << _UFFDIO_CONTINUE) #define UFFD_API_RANGE_IOCTLS_BASIC \ ((__u64)1 << _UFFDIO_WAKE | \ - (__u64)1 << _UFFDIO_COPY) + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_CONTINUE) /* * Valid ioctl command number range with this API is from 0x00 to @@ -55,6 +62,7 @@ #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) #define _UFFDIO_WRITEPROTECT (0x06) +#define _UFFDIO_CONTINUE (0x07) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -73,6 +81,8 @@ struct uffdio_zeropage) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) +#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) /* read() structure */ struct uffd_msg { @@ -127,6 +137,7 @@ struct uffd_msg { /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ +#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ struct uffdio_api { /* userland asks for an API number and the features to enable */ @@ -171,6 +182,13 @@ struct uffdio_api { * * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will * be returned, if feature is not requested 0 will be returned. + * + * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults + * can be intercepted (via REGISTER_MODE_MINOR) for + * hugetlbfs-backed pages. + * + * UFFD_FEATURE_MINOR_SHMEM indicates the same support as + * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -181,6 +199,8 @@ struct uffdio_api { #define UFFD_FEATURE_EVENT_UNMAP (1<<6) #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) +#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) +#define UFFD_FEATURE_MINOR_SHMEM (1<<10) __u64 features; __u64 ioctls; @@ -195,6 +215,7 @@ struct uffdio_register { struct uffdio_range range; #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) +#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) __u64 mode; /* @@ -257,6 +278,18 @@ struct uffdio_writeprotect { __u64 mode; }; +struct uffdio_continue { + struct uffdio_range range; +#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * Fields below here are written by the ioctl and must be at the end: + * the copy_from_user will not read past here. + */ + __s64 mapped; +}; + /* * Flags for the userfaultfd(2) system call itself. */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 609099e455c..e680594f27b 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -46,6 +46,12 @@ */ #define VFIO_NOIOMMU_IOMMU 8 +/* Supports VFIO_DMA_UNMAP_FLAG_ALL */ +#define VFIO_UNMAP_ALL 9 + +/* Supports the vaddr flag for DMA map and unmap */ +#define VFIO_UPDATE_VADDR 10 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -329,6 +335,8 @@ struct vfio_region_info_cap_type { /* 10de vendor PCI sub-types */ /* * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. + * + * Deprecated, region no longer provided */ #define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) @@ -336,6 +344,8 @@ struct vfio_region_info_cap_type { /* * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU * to do TLB invalidation on a GPU. + * + * Deprecated, region no longer provided */ #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) @@ -635,6 +645,8 @@ struct vfio_device_migration_info { * Capability with compressed real address (aka SSA - small system address) * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing * and by the userspace to associate a NVLink bridge with a GPU. + * + * Deprecated, capability no longer provided */ #define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 @@ -649,6 +661,8 @@ struct vfio_region_info_cap_nvlink2_ssatgt { * property in the device tree. The value is fixed in the hardware * and failing to provide the correct value results in the link * not working with no indication from the driver why. + * + * Deprecated, capability no longer provided */ #define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 @@ -1074,12 +1088,22 @@ struct vfio_iommu_type1_info_dma_avail { * * Map process virtual addresses to IO virtual addresses using the * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. + * + * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and + * unblock translation of host virtual addresses in the iova range. The vaddr + * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To + * maintain memory consistency within the user application, the updated vaddr + * must address the same memory object as originally mapped. Failure to do so + * will result in user memory corruption and/or device misbehavior. iova and + * size must match those in the original MAP_DMA call. Protection is not + * changed, and the READ & WRITE flags must be 0. */ struct vfio_iommu_type1_dma_map { __u32 argsz; __u32 flags; #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ +#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2) __u64 vaddr; /* Process virtual address */ __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ @@ -1102,6 +1126,7 @@ struct vfio_bitmap { * field. No guarantee is made to the user that arbitrary unmaps of iova * or size different from those used in the original mapping call will * succeed. + * * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap * before unmapping IO virtual addresses. When this flag is set, the user must * provide a struct vfio_bitmap in data[]. User must provide zero-allocated @@ -1111,11 +1136,21 @@ struct vfio_bitmap { * indicates that the page at that offset from iova is dirty. A Bitmap of the * pages in the range of unmapped size is returned in the user-provided * vfio_bitmap.data. + * + * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size + * must be 0. This cannot be combined with the get-dirty-bitmap flag. + * + * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host + * virtual addresses in the iova range. Tasks that attempt to translate an + * iova's vaddr will block. DMA to already-mapped pages continues. This + * cannot be combined with the get-dirty-bitmap flag. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; __u32 flags; #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) +#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1) +#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2) __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ __u8 data[]; diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c index ee72a1c20f0..97e0728b679 100644 --- a/linux-user/aarch64/cpu_loop.c +++ b/linux-user/aarch64/cpu_loop.c @@ -20,7 +20,9 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu.h" +#include "user-internals.h" #include "cpu_loop-common.h" +#include "signal-common.h" #include "qemu/guest-random.h" #include "semihosting/common-semi.h" #include "target/arm/syndrome.h" @@ -77,9 +79,8 @@ void cpu_loop(CPUARMState *env) { CPUState *cs = env_cpu(env); - int trapnr, ec, fsc; + int trapnr, ec, fsc, si_code, si_signo; abi_long ret; - target_siginfo_t info; for (;;) { cpu_exec_start(cs); @@ -108,18 +109,10 @@ void cpu_loop(CPUARMState *env) /* just indicate that signals should be handled asap */ break; case EXCP_UDEF: - info.si_signo = TARGET_SIGILL; - info.si_errno = 0; - info.si_code = TARGET_ILL_ILLOPN; - info._sifields._sigfault._addr = env->pc; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->pc); break; case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info._sifields._sigfault._addr = env->exception.vaddress; - /* We should only arrive here with EC in {DATAABORT, INSNABORT}. */ ec = syn_get_ec(env->exception.syndrome); assert(ec == EC_DATAABORT || ec == EC_INSNABORT); @@ -128,27 +121,30 @@ void cpu_loop(CPUARMState *env) fsc = extract32(env->exception.syndrome, 0, 6); switch (fsc) { case 0x04 ... 0x07: /* Translation fault, level {0-3} */ - info.si_code = TARGET_SEGV_MAPERR; + si_signo = TARGET_SIGSEGV; + si_code = TARGET_SEGV_MAPERR; break; case 0x09 ... 0x0b: /* Access flag fault, level {1-3} */ case 0x0d ... 0x0f: /* Permission fault, level {1-3} */ - info.si_code = TARGET_SEGV_ACCERR; + si_signo = TARGET_SIGSEGV; + si_code = TARGET_SEGV_ACCERR; break; case 0x11: /* Synchronous Tag Check Fault */ - info.si_code = TARGET_SEGV_MTESERR; + si_signo = TARGET_SIGSEGV; + si_code = TARGET_SEGV_MTESERR; + break; + case 0x21: /* Alignment fault */ + si_signo = TARGET_SIGBUS; + si_code = TARGET_BUS_ADRALN; break; default: g_assert_not_reached(); } - - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + force_sig_fault(si_signo, si_code, env->exception.vaddress); break; case EXCP_DEBUG: case EXCP_BKPT: - info.si_signo = TARGET_SIGTRAP; - info.si_errno = 0; - info.si_code = TARGET_TRAP_BRKPT; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->pc); break; case EXCP_SEMIHOST: env->xregs[0] = do_common_semihosting(cs); @@ -168,11 +164,7 @@ void cpu_loop(CPUARMState *env) /* Check for MTE asynchronous faults */ if (unlikely(env->cp15.tfsr_el[0])) { env->cp15.tfsr_el[0] = 0; - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info._sifields._sigfault._addr = 0; - info.si_code = TARGET_SEGV_MTEAERR; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MTEAERR, 0); } process_pending_signals(env); diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c index b591790c22e..29c52db3f13 100644 --- a/linux-user/aarch64/signal.c +++ b/linux-user/aarch64/signal.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" #include "qemu.h" +#include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" @@ -108,7 +109,6 @@ struct target_rt_sigframe { struct target_rt_frame_record { uint64_t fp; uint64_t lr; - uint32_t tramp[2]; }; static void target_setup_general_frame(struct target_rt_sigframe *sf, @@ -460,9 +460,9 @@ static void target_setup_frame(int usig, struct target_sigaction *ka, layout.total_size = MAX(layout.total_size, sizeof(struct target_rt_sigframe)); - /* Reserve space for the return code. On a real system this would - * be within the VDSO. So, despite the name this is not a "real" - * record within the frame. + /* + * Reserve space for the standard frame unwind pair: fp, lr. + * Despite the name this is not a "real" record within the frame. */ fr_ofs = layout.total_size; layout.total_size += sizeof(struct target_rt_frame_record); @@ -495,15 +495,7 @@ static void target_setup_frame(int usig, struct target_sigaction *ka, if (ka->sa_flags & TARGET_SA_RESTORER) { return_addr = ka->sa_restorer; } else { - /* - * mov x8,#__NR_rt_sigreturn; svc #0 - * Since these are instructions they need to be put as little-endian - * regardless of target default or current CPU endianness. - */ - __put_user_e(0xd2801168, &fr->tramp[0], le); - __put_user_e(0xd4000001, &fr->tramp[1], le); - return_addr = frame_addr + fr_ofs - + offsetof(struct target_rt_frame_record, tramp); + return_addr = default_rt_sigreturn; } env->xregs[0] = usig; env->xregs[29] = frame_addr + fr_ofs; @@ -561,11 +553,7 @@ long do_rt_sigreturn(CPUARMState *env) goto badframe; } - if (do_sigaltstack(frame_addr + - offsetof(struct target_rt_sigframe, uc.tuc_stack), - 0, get_sp_from_cpustate(env)) == -EFAULT) { - goto badframe; - } + target_restore_altstack(&frame->uc.tuc_stack, env); unlock_user_struct(frame, frame_addr, 0); return -TARGET_QEMU_ESIGRETURN; @@ -580,3 +568,20 @@ long do_sigreturn(CPUARMState *env) { return do_rt_sigreturn(env); } + +void setup_sigtramp(abi_ulong sigtramp_page) +{ + uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 8, 0); + assert(tramp != NULL); + + /* + * mov x8,#__NR_rt_sigreturn; svc #0 + * Since these are instructions they need to be put as little-endian + * regardless of target default or current CPU endianness. + */ + __put_user_e(0xd2801168, &tramp[0], le); + __put_user_e(0xd4000001, &tramp[1], le); + + default_rt_sigreturn = sigtramp_page; + unlock_user(tramp, sigtramp_page, 8); +} diff --git a/linux-user/aarch64/syscall_nr.h b/linux-user/aarch64/syscall_nr.h index 6fd5b331e78..12ef002d60f 100644 --- a/linux-user/aarch64/syscall_nr.h +++ b/linux-user/aarch64/syscall_nr.h @@ -302,6 +302,12 @@ #define TARGET_NR_openat2 437 #define TARGET_NR_pidfd_getfd 438 #define TARGET_NR_faccessat2 439 -#define TARGET_NR_syscalls 440 +#define TARGET_NR_process_madvise 440 +#define TARGET_NR_epoll_pwait2 441 +#define TARGET_NR_mount_setattr 442 +#define TARGET_NR_landlock_create_ruleset 444 +#define TARGET_NR_landlock_add_rule 445 +#define TARGET_NR_landlock_restrict_self 446 +#define TARGET_NR_syscalls 447 #endif /* LINUX_USER_AARCH64_SYSCALL_NR_H */ diff --git a/linux-user/aarch64/target_errno_defs.h b/linux-user/aarch64/target_errno_defs.h new file mode 100644 index 00000000000..461b5477284 --- /dev/null +++ b/linux-user/aarch64/target_errno_defs.h @@ -0,0 +1,7 @@ +#ifndef AARCH64_TARGET_ERRNO_DEFS_H +#define AARCH64_TARGET_ERRNO_DEFS_H + +/* Target uses generic errno */ +#include "../generic/target_errno_defs.h" + +#endif diff --git a/linux-user/aarch64/target_signal.h b/linux-user/aarch64/target_signal.h index 18013e1b235..7580d99403c 100644 --- a/linux-user/aarch64/target_signal.h +++ b/linux-user/aarch64/target_signal.h @@ -25,4 +25,6 @@ typedef struct target_sigaltstack { #define TARGET_SEGV_MTESERR 9 /* Synchronous ARM MTE exception */ #define TARGET_ARCH_HAS_SETUP_FRAME +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1 + #endif /* AARCH64_TARGET_SIGNAL_H */ diff --git a/linux-user/alpha/cpu_loop.c b/linux-user/alpha/cpu_loop.c index 7ce2461a028..4029849d5cd 100644 --- a/linux-user/alpha/cpu_loop.c +++ b/linux-user/alpha/cpu_loop.c @@ -20,7 +20,9 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu.h" +#include "user-internals.h" #include "cpu_loop-common.h" +#include "signal-common.h" void cpu_loop(CPUAlphaState *env) { @@ -52,21 +54,6 @@ void cpu_loop(CPUAlphaState *env) fprintf(stderr, "External interrupt. Exit\n"); exit(EXIT_FAILURE); break; - case EXCP_MMFAULT: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = (page_get_flags(env->trap_arg0) & PAGE_VALID - ? TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR); - info._sifields._sigfault._addr = env->trap_arg0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; - case EXCP_UNALIGN: - info.si_signo = TARGET_SIGBUS; - info.si_errno = 0; - info.si_code = TARGET_BUS_ADRALN; - info._sifields._sigfault._addr = env->trap_arg0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case EXCP_OPCDEC: do_sigill: info.si_signo = TARGET_SIGILL; diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c index c5c27ce0841..bbe3dd175a7 100644 --- a/linux-user/alpha/signal.c +++ b/linux-user/alpha/signal.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" #include "qemu.h" +#include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" @@ -54,13 +55,11 @@ struct target_ucontext { struct target_sigframe { struct target_sigcontext sc; - unsigned int retcode[3]; }; struct target_rt_sigframe { target_siginfo_t info; struct target_ucontext uc; - unsigned int retcode[3]; }; #define INSN_MOV_R30_R16 0x47fe0410 @@ -138,15 +137,10 @@ void setup_frame(int sig, struct target_sigaction *ka, setup_sigcontext(&frame->sc, env, frame_addr, set); - if (ka->sa_restorer) { - r26 = ka->sa_restorer; + if (ka->ka_restorer) { + r26 = ka->ka_restorer; } else { - __put_user(INSN_MOV_R30_R16, &frame->retcode[0]); - __put_user(INSN_LDI_R0 + TARGET_NR_sigreturn, - &frame->retcode[1]); - __put_user(INSN_CALLSYS, &frame->retcode[2]); - /* imb() */ - r26 = frame_addr + offsetof(struct target_sigframe, retcode); + r26 = default_sigreturn; } unlock_user_struct(frame, frame_addr, 1); @@ -192,15 +186,10 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } - if (ka->sa_restorer) { - r26 = ka->sa_restorer; + if (ka->ka_restorer) { + r26 = ka->ka_restorer; } else { - __put_user(INSN_MOV_R30_R16, &frame->retcode[0]); - __put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn, - &frame->retcode[1]); - __put_user(INSN_CALLSYS, &frame->retcode[2]); - /* imb(); */ - r26 = frame_addr + offsetof(struct target_sigframe, retcode); + r26 = default_rt_sigreturn; } if (err) { @@ -257,11 +246,7 @@ long do_rt_sigreturn(CPUAlphaState *env) set_sigmask(&set); restore_sigcontext(env, &frame->uc.tuc_mcontext); - if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, - uc.tuc_stack), - 0, env->ir[IR_SP]) == -EFAULT) { - goto badframe; - } + target_restore_altstack(&frame->uc.tuc_stack, env); unlock_user_struct(frame, frame_addr, 0); return -TARGET_QEMU_ESIGRETURN; @@ -272,3 +257,21 @@ long do_rt_sigreturn(CPUAlphaState *env) force_sig(TARGET_SIGSEGV); return -TARGET_QEMU_ESIGRETURN; } + +void setup_sigtramp(abi_ulong sigtramp_page) +{ + uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6 * 4, 0); + assert(tramp != NULL); + + default_sigreturn = sigtramp_page; + __put_user(INSN_MOV_R30_R16, &tramp[0]); + __put_user(INSN_LDI_R0 + TARGET_NR_sigreturn, &tramp[1]); + __put_user(INSN_CALLSYS, &tramp[2]); + + default_rt_sigreturn = sigtramp_page + 3 * 4; + __put_user(INSN_MOV_R30_R16, &tramp[3]); + __put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn, &tramp[4]); + __put_user(INSN_CALLSYS, &tramp[5]); + + unlock_user(tramp, sigtramp_page, 6 * 4); +} diff --git a/linux-user/alpha/syscall.tbl b/linux-user/alpha/syscall.tbl index ec8bed9e7b7..3000a2e8ee2 100644 --- a/linux-user/alpha/syscall.tbl +++ b/linux-user/alpha/syscall.tbl @@ -479,3 +479,10 @@ 547 common openat2 sys_openat2 548 common pidfd_getfd sys_pidfd_getfd 549 common faccessat2 sys_faccessat2 +550 common process_madvise sys_process_madvise +551 common epoll_pwait2 sys_epoll_pwait2 +552 common mount_setattr sys_mount_setattr +# 553 reserved for quotactl_path +554 common landlock_create_ruleset sys_landlock_create_ruleset +555 common landlock_add_rule sys_landlock_add_rule +556 common landlock_restrict_self sys_landlock_restrict_self diff --git a/linux-user/alpha/target_errno_defs.h b/linux-user/alpha/target_errno_defs.h new file mode 100644 index 00000000000..07924b13aaf --- /dev/null +++ b/linux-user/alpha/target_errno_defs.h @@ -0,0 +1,204 @@ +#ifndef ALPHA_TARGET_ERRNO_DEFS_H +#define ALPHA_TARGET_ERRNO_DEFS_H + +#include "../generic/target_errno_defs.h" + +/* + * Generic target errno overridden with definitions taken + * from asm-alpha/errno.h + */ +#undef TARGET_EWOULDBLOCK +#define TARGET_EWOULDBLOCK TARGET_EAGAIN +#undef TARGET_EDEADLK +#define TARGET_EDEADLK 11 +#undef TARGET_EAGAIN +#define TARGET_EAGAIN 35 +#undef TARGET_EINPROGRESS +#define TARGET_EINPROGRESS 36 +#undef TARGET_EALREADY +#define TARGET_EALREADY 37 +#undef TARGET_ENOTSOCK +#define TARGET_ENOTSOCK 38 +#undef TARGET_EDESTADDRREQ +#define TARGET_EDESTADDRREQ 39 +#undef TARGET_EMSGSIZE +#define TARGET_EMSGSIZE 40 +#undef TARGET_EPROTOTYPE +#define TARGET_EPROTOTYPE 41 +#undef TARGET_ENOPROTOOPT +#define TARGET_ENOPROTOOPT 42 +#undef TARGET_EPROTONOSUPPORT +#define TARGET_EPROTONOSUPPORT 43 +#undef TARGET_ESOCKTNOSUPPORT +#define TARGET_ESOCKTNOSUPPORT 44 +#undef TARGET_EOPNOTSUPP +#define TARGET_EOPNOTSUPP 45 +#undef TARGET_EPFNOSUPPORT +#define TARGET_EPFNOSUPPORT 46 +#undef TARGET_EAFNOSUPPORT +#define TARGET_EAFNOSUPPORT 47 +#undef TARGET_EADDRINUSE +#define TARGET_EADDRINUSE 48 +#undef TARGET_EADDRNOTAVAIL +#define TARGET_EADDRNOTAVAIL 49 +#undef TARGET_ENETDOWN +#define TARGET_ENETDOWN 50 +#undef TARGET_ENETUNREACH +#define TARGET_ENETUNREACH 51 +#undef TARGET_ENETRESET +#define TARGET_ENETRESET 52 +#undef TARGET_ECONNABORTED +#define TARGET_ECONNABORTED 53 +#undef TARGET_ECONNRESET +#define TARGET_ECONNRESET 54 +#undef TARGET_ENOBUFS +#define TARGET_ENOBUFS 55 +#undef TARGET_EISCONN +#define TARGET_EISCONN 56 +#undef TARGET_ENOTCONN +#define TARGET_ENOTCONN 57 +#undef TARGET_ESHUTDOWN +#define TARGET_ESHUTDOWN 58 +#undef TARGET_ETOOMANYREFS +#define TARGET_ETOOMANYREFS 59 +#undef TARGET_ETIMEDOUT +#define TARGET_ETIMEDOUT 60 +#undef TARGET_ECONNREFUSED +#define TARGET_ECONNREFUSED 61 +#undef TARGET_ELOOP +#define TARGET_ELOOP 62 +#undef TARGET_ENAMETOOLONG +#define TARGET_ENAMETOOLONG 63 +#undef TARGET_EHOSTDOWN +#define TARGET_EHOSTDOWN 64 +#undef TARGET_EHOSTUNREACH +#define TARGET_EHOSTUNREACH 65 +#undef TARGET_ENOTEMPTY +#define TARGET_ENOTEMPTY 66 +/* Unused 67 */ +#undef TARGET_EUSERS +#define TARGET_EUSERS 68 +#undef TARGET_EDQUOT +#define TARGET_EDQUOT 69 +#undef TARGET_ESTALE +#define TARGET_ESTALE 70 +#undef TARGET_EREMOTE +#define TARGET_EREMOTE 71 +/* Unused 72-76 */ +#undef TARGET_ENOLCK +#define TARGET_ENOLCK 77 +#undef TARGET_ENOSYS +#define TARGET_ENOSYS 78 +/* Unused 79 */ +#undef TARGET_ENOMSG +#define TARGET_ENOMSG 80 +#undef TARGET_EIDRM +#define TARGET_EIDRM 81 +#undef TARGET_ENOSR +#define TARGET_ENOSR 82 +#undef TARGET_ETIME +#define TARGET_ETIME 83 +#undef TARGET_EBADMSG +#define TARGET_EBADMSG 84 +#undef TARGET_EPROTO +#define TARGET_EPROTO 85 +#undef TARGET_ENODATA +#define TARGET_ENODATA 86 +#undef TARGET_ENOSTR +#define TARGET_ENOSTR 87 +#undef TARGET_ECHRNG +#define TARGET_ECHRNG 88 +#undef TARGET_EL2NSYNC +#define TARGET_EL2NSYNC 89 +#undef TARGET_EL3HLT +#define TARGET_EL3HLT 90 +#undef TARGET_EL3RST +#define TARGET_EL3RST 91 +#undef TARGET_ENOPKG +#define TARGET_ENOPKG 92 +#undef TARGET_ELNRNG +#define TARGET_ELNRNG 93 +#undef TARGET_EUNATCH +#define TARGET_EUNATCH 94 +#undef TARGET_ENOCSI +#define TARGET_ENOCSI 95 +#undef TARGET_EL2HLT +#define TARGET_EL2HLT 96 +#undef TARGET_EBADE +#define TARGET_EBADE 97 +#undef TARGET_EBADR +#define TARGET_EBADR 98 +#undef TARGET_EXFULL +#define TARGET_EXFULL 99 +#undef TARGET_ENOANO +#define TARGET_ENOANO 100 +#undef TARGET_EBADRQC +#define TARGET_EBADRQC 101 +#undef TARGET_EBADSLT +#define TARGET_EBADSLT 102 +/* Unused 103 */ +#undef TARGET_EBFONT +#define TARGET_EBFONT 104 +#undef TARGET_ENONET +#define TARGET_ENONET 105 +#undef TARGET_ENOLINK +#define TARGET_ENOLINK 106 +#undef TARGET_EADV +#define TARGET_EADV 107 +#undef TARGET_ESRMNT +#define TARGET_ESRMNT 108 +#undef TARGET_ECOMM +#define TARGET_ECOMM 109 +#undef TARGET_EMULTIHOP +#define TARGET_EMULTIHOP 110 +#undef TARGET_EDOTDOT +#define TARGET_EDOTDOT 111 +#undef TARGET_EOVERFLOW +#define TARGET_EOVERFLOW 112 +#undef TARGET_ENOTUNIQ +#define TARGET_ENOTUNIQ 113 +#undef TARGET_EBADFD +#define TARGET_EBADFD 114 +#undef TARGET_EREMCHG +#define TARGET_EREMCHG 115 +#undef TARGET_EILSEQ +#define TARGET_EILSEQ 116 +/* Same as default 117-121 */ +#undef TARGET_ELIBACC +#define TARGET_ELIBACC 122 +#undef TARGET_ELIBBAD +#define TARGET_ELIBBAD 123 +#undef TARGET_ELIBSCN +#define TARGET_ELIBSCN 124 +#undef TARGET_ELIBMAX +#define TARGET_ELIBMAX 125 +#undef TARGET_ELIBEXEC +#define TARGET_ELIBEXEC 126 +#undef TARGET_ERESTART +#define TARGET_ERESTART 127 +#undef TARGET_ESTRPIPE +#define TARGET_ESTRPIPE 128 +#undef TARGET_ENOMEDIUM +#define TARGET_ENOMEDIUM 129 +#undef TARGET_EMEDIUMTYPE +#define TARGET_EMEDIUMTYPE 130 +#undef TARGET_ECANCELED +#define TARGET_ECANCELED 131 +#undef TARGET_ENOKEY +#define TARGET_ENOKEY 132 +#undef TARGET_EKEYEXPIRED +#define TARGET_EKEYEXPIRED 133 +#undef TARGET_EKEYREVOKED +#define TARGET_EKEYREVOKED 134 +#undef TARGET_EKEYREJECTED +#define TARGET_EKEYREJECTED 135 +#undef TARGET_EOWNERDEAD +#define TARGET_EOWNERDEAD 136 +#undef TARGET_ENOTRECOVERABLE +#define TARGET_ENOTRECOVERABLE 137 +#undef TARGET_ERFKILL +#define TARGET_ERFKILL 138 +#undef TARGET_EHWPOISON +#define TARGET_EHWPOISON 139 + +#endif diff --git a/linux-user/alpha/target_signal.h b/linux-user/alpha/target_signal.h index 0b90d3a8970..0b6a39de657 100644 --- a/linux-user/alpha/target_signal.h +++ b/linux-user/alpha/target_signal.h @@ -92,6 +92,8 @@ typedef struct target_sigaltstack { #define TARGET_GEN_SUBRNG7 -25 #define TARGET_ARCH_HAS_SETUP_FRAME +#define TARGET_ARCH_HAS_KA_RESTORER +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1 /* bit-flags */ #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */ diff --git a/linux-user/alpha/target_syscall.h b/linux-user/alpha/target_syscall.h index fd389422e31..03091bf0a82 100644 --- a/linux-user/alpha/target_syscall.h +++ b/linux-user/alpha/target_syscall.h @@ -44,200 +44,6 @@ struct target_pt_regs { #define UNAME_MACHINE "alpha" #define UNAME_MINIMUM_RELEASE "2.6.32" -#undef TARGET_EDEADLK -#define TARGET_EDEADLK 11 -#undef TARGET_EAGAIN -#define TARGET_EAGAIN 35 -#undef TARGET_EINPROGRESS -#define TARGET_EINPROGRESS 36 -#undef TARGET_EALREADY -#define TARGET_EALREADY 37 -#undef TARGET_ENOTSOCK -#define TARGET_ENOTSOCK 38 -#undef TARGET_EDESTADDRREQ -#define TARGET_EDESTADDRREQ 39 -#undef TARGET_EMSGSIZE -#define TARGET_EMSGSIZE 40 -#undef TARGET_EPROTOTYPE -#define TARGET_EPROTOTYPE 41 -#undef TARGET_ENOPROTOOPT -#define TARGET_ENOPROTOOPT 42 -#undef TARGET_EPROTONOSUPPORT -#define TARGET_EPROTONOSUPPORT 43 -#undef TARGET_ESOCKTNOSUPPORT -#define TARGET_ESOCKTNOSUPPORT 44 -#undef TARGET_EOPNOTSUPP -#define TARGET_EOPNOTSUPP 45 -#undef TARGET_EPFNOSUPPORT -#define TARGET_EPFNOSUPPORT 46 -#undef TARGET_EAFNOSUPPORT -#define TARGET_EAFNOSUPPORT 47 -#undef TARGET_EADDRINUSE -#define TARGET_EADDRINUSE 48 -#undef TARGET_EADDRNOTAVAIL -#define TARGET_EADDRNOTAVAIL 49 -#undef TARGET_ENETDOWN -#define TARGET_ENETDOWN 50 -#undef TARGET_ENETUNREACH -#define TARGET_ENETUNREACH 51 -#undef TARGET_ENETRESET -#define TARGET_ENETRESET 52 -#undef TARGET_ECONNABORTED -#define TARGET_ECONNABORTED 53 -#undef TARGET_ECONNRESET -#define TARGET_ECONNRESET 54 -#undef TARGET_ENOBUFS -#define TARGET_ENOBUFS 55 -#undef TARGET_EISCONN -#define TARGET_EISCONN 56 -#undef TARGET_ENOTCONN -#define TARGET_ENOTCONN 57 -#undef TARGET_ESHUTDOWN -#define TARGET_ESHUTDOWN 58 -#undef TARGET_ETOOMANYREFS -#define TARGET_ETOOMANYREFS 59 -#undef TARGET_ETIMEDOUT -#define TARGET_ETIMEDOUT 60 -#undef TARGET_ECONNREFUSED -#define TARGET_ECONNREFUSED 61 -#undef TARGET_ELOOP -#define TARGET_ELOOP 62 -#undef TARGET_ENAMETOOLONG -#define TARGET_ENAMETOOLONG 63 -#undef TARGET_EHOSTDOWN -#define TARGET_EHOSTDOWN 64 -#undef TARGET_EHOSTUNREACH -#define TARGET_EHOSTUNREACH 65 -#undef TARGET_ENOTEMPTY -#define TARGET_ENOTEMPTY 66 -// Unused 67 -#undef TARGET_EUSERS -#define TARGET_EUSERS 68 -#undef TARGET_EDQUOT -#define TARGET_EDQUOT 69 -#undef TARGET_ESTALE -#define TARGET_ESTALE 70 -#undef TARGET_EREMOTE -#define TARGET_EREMOTE 71 -// Unused 72-76 -#undef TARGET_ENOLCK -#define TARGET_ENOLCK 77 -#undef TARGET_ENOSYS -#define TARGET_ENOSYS 78 -// Unused 79 -#undef TARGET_ENOMSG -#define TARGET_ENOMSG 80 -#undef TARGET_EIDRM -#define TARGET_EIDRM 81 -#undef TARGET_ENOSR -#define TARGET_ENOSR 82 -#undef TARGET_ETIME -#define TARGET_ETIME 83 -#undef TARGET_EBADMSG -#define TARGET_EBADMSG 84 -#undef TARGET_EPROTO -#define TARGET_EPROTO 85 -#undef TARGET_ENODATA -#define TARGET_ENODATA 86 -#undef TARGET_ENOSTR -#define TARGET_ENOSTR 87 -#undef TARGET_ECHRNG -#define TARGET_ECHRNG 88 -#undef TARGET_EL2NSYNC -#define TARGET_EL2NSYNC 89 -#undef TARGET_EL3HLT -#define TARGET_EL3HLT 90 -#undef TARGET_EL3RST -#define TARGET_EL3RST 91 -#undef TARGET_ENOPKG -#define TARGET_ENOPKG 92 -#undef TARGET_ELNRNG -#define TARGET_ELNRNG 93 -#undef TARGET_EUNATCH -#define TARGET_EUNATCH 94 -#undef TARGET_ENOCSI -#define TARGET_ENOCSI 95 -#undef TARGET_EL2HLT -#define TARGET_EL2HLT 96 -#undef TARGET_EBADE -#define TARGET_EBADE 97 -#undef TARGET_EBADR -#define TARGET_EBADR 98 -#undef TARGET_EXFULL -#define TARGET_EXFULL 99 -#undef TARGET_ENOANO -#define TARGET_ENOANO 100 -#undef TARGET_EBADRQC -#define TARGET_EBADRQC 101 -#undef TARGET_EBADSLT -#define TARGET_EBADSLT 102 -// Unused 103 -#undef TARGET_EBFONT -#define TARGET_EBFONT 104 -#undef TARGET_ENONET -#define TARGET_ENONET 105 -#undef TARGET_ENOLINK -#define TARGET_ENOLINK 106 -#undef TARGET_EADV -#define TARGET_EADV 107 -#undef TARGET_ESRMNT -#define TARGET_ESRMNT 108 -#undef TARGET_ECOMM -#define TARGET_ECOMM 109 -#undef TARGET_EMULTIHOP -#define TARGET_EMULTIHOP 110 -#undef TARGET_EDOTDOT -#define TARGET_EDOTDOT 111 -#undef TARGET_EOVERFLOW -#define TARGET_EOVERFLOW 112 -#undef TARGET_ENOTUNIQ -#define TARGET_ENOTUNIQ 113 -#undef TARGET_EBADFD -#define TARGET_EBADFD 114 -#undef TARGET_EREMCHG -#define TARGET_EREMCHG 115 -#undef TARGET_EILSEQ -#define TARGET_EILSEQ 116 - -// Same as default 117-121 - -#undef TARGET_ELIBACC -#define TARGET_ELIBACC 122 -#undef TARGET_ELIBBAD -#define TARGET_ELIBBAD 123 -#undef TARGET_ELIBSCN -#define TARGET_ELIBSCN 124 -#undef TARGET_ELIBMAX -#define TARGET_ELIBMAX 125 -#undef TARGET_ELIBEXEC -#define TARGET_ELIBEXEC 126 -#undef TARGET_ERESTART -#define TARGET_ERESTART 127 -#undef TARGET_ESTRPIPE -#define TARGET_ESTRPIPE 128 -#undef TARGET_ENOMEDIUM -#define TARGET_ENOMEDIUM 129 -#undef TARGET_EMEDIUMTYPE -#define TARGET_EMEDIUMTYPE 130 -#undef TARGET_ECANCELED -#define TARGET_ECANCELED 131 -#undef TARGET_ENOKEY -#define TARGET_ENOKEY 132 -#undef TARGET_EKEYEXPIRED -#define TARGET_EKEYEXPIRED 133 -#undef TARGET_EKEYREVOKED -#define TARGET_EKEYREVOKED 134 -#undef TARGET_EKEYREJECTED -#define TARGET_EKEYREJECTED 135 -#undef TARGET_EOWNERDEAD -#define TARGET_EOWNERDEAD 136 -#undef TARGET_ENOTRECOVERABLE -#define TARGET_ENOTRECOVERABLE 137 -#undef TARGET_ERFKILL -#define TARGET_ERFKILL 138 -#undef TARGET_EHWPOISON -#define TARGET_EHWPOISON 139 - // For sys_osf_getsysinfo #define TARGET_GSI_UACPROC 8 #define TARGET_GSI_IEEE_FP_CONTROL 45 diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c index 989d03cd89d..01cb6eb534e 100644 --- a/linux-user/arm/cpu_loop.c +++ b/linux-user/arm/cpu_loop.c @@ -20,9 +20,12 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu.h" +#include "user-internals.h" #include "elf.h" #include "cpu_loop-common.h" +#include "signal-common.h" #include "semihosting/common-semi.h" +#include "target/arm/syndrome.h" #define get_user_code_u32(x, gaddr, env) \ ({ abi_long __r = get_user_u32((x), (gaddr)); \ @@ -92,7 +95,6 @@ static void arm_kernel_cmpxchg64_helper(CPUARMState *env) { uint64_t oldval, newval, val; uint32_t addr, cpsr; - target_siginfo_t info; /* Based on the 32 bit code in do_kernel_trap */ @@ -141,12 +143,9 @@ static void arm_kernel_cmpxchg64_helper(CPUARMState *env) end_exclusive(); /* We get the PC of the entry address - which is as good as anything, on a real kernel what you get depends on which mode it uses. */ - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->exception.vaddress; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, + env->exception.vaddress); } /* Handle a jump to the kernel code page. */ @@ -224,13 +223,66 @@ static bool insn_is_linux_bkpt(uint32_t opcode, bool is_thumb) } } +static bool emulate_arm_fpa11(CPUARMState *env, uint32_t opcode) +{ + TaskState *ts = env_cpu(env)->opaque; + int rc = EmulateAll(opcode, &ts->fpa, env); + int raise, enabled; + + if (rc == 0) { + /* Illegal instruction */ + return false; + } + if (rc > 0) { + /* Everything ok. */ + env->regs[15] += 4; + return true; + } + + /* FP exception */ + rc = -rc; + raise = 0; + + /* Translate softfloat flags to FPSR flags */ + if (rc & float_flag_invalid) { + raise |= BIT_IOC; + } + if (rc & float_flag_divbyzero) { + raise |= BIT_DZC; + } + if (rc & float_flag_overflow) { + raise |= BIT_OFC; + } + if (rc & float_flag_underflow) { + raise |= BIT_UFC; + } + if (rc & float_flag_inexact) { + raise |= BIT_IXC; + } + + /* Accumulate unenabled exceptions */ + enabled = ts->fpa.fpsr >> 16; + ts->fpa.fpsr |= raise & ~enabled; + + if (raise & enabled) { + /* + * The kernel's nwfpe emulator does not pass a real si_code. + * It merely uses send_sig(SIGFPE, current, 1), which results in + * __send_signal() filling out SI_KERNEL with pid and uid 0 (under + * the "SEND_SIG_PRIV" case). That's what our force_sig() does. + */ + force_sig(TARGET_SIGFPE); + } else { + env->regs[15] += 4; + } + return true; +} + void cpu_loop(CPUARMState *env) { CPUState *cs = env_cpu(env); - int trapnr; + int trapnr, si_signo, si_code; unsigned int n, insn; - target_siginfo_t info; - uint32_t addr; abi_ulong ret; for(;;) { @@ -244,9 +296,7 @@ void cpu_loop(CPUARMState *env) case EXCP_NOCP: case EXCP_INVSTATE: { - TaskState *ts = cs->opaque; uint32_t opcode; - int rc; /* we handle the FPU emulation here, as Linux */ /* we get the opcode */ @@ -263,64 +313,12 @@ void cpu_loop(CPUARMState *env) goto excp_debug; } - rc = EmulateAll(opcode, &ts->fpa, env); - if (rc == 0) { /* illegal instruction */ - info.si_signo = TARGET_SIGILL; - info.si_errno = 0; - info.si_code = TARGET_ILL_ILLOPN; - info._sifields._sigfault._addr = env->regs[15]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } else if (rc < 0) { /* FP exception */ - int arm_fpe=0; - - /* translate softfloat flags to FPSR flags */ - if (-rc & float_flag_invalid) - arm_fpe |= BIT_IOC; - if (-rc & float_flag_divbyzero) - arm_fpe |= BIT_DZC; - if (-rc & float_flag_overflow) - arm_fpe |= BIT_OFC; - if (-rc & float_flag_underflow) - arm_fpe |= BIT_UFC; - if (-rc & float_flag_inexact) - arm_fpe |= BIT_IXC; - - FPSR fpsr = ts->fpa.fpsr; - //printf("fpsr 0x%x, arm_fpe 0x%x\n",fpsr,arm_fpe); - - if (fpsr & (arm_fpe << 16)) { /* exception enabled? */ - info.si_signo = TARGET_SIGFPE; - info.si_errno = 0; - - /* ordered by priority, least first */ - if (arm_fpe & BIT_IXC) info.si_code = TARGET_FPE_FLTRES; - if (arm_fpe & BIT_UFC) info.si_code = TARGET_FPE_FLTUND; - if (arm_fpe & BIT_OFC) info.si_code = TARGET_FPE_FLTOVF; - if (arm_fpe & BIT_DZC) info.si_code = TARGET_FPE_FLTDIV; - if (arm_fpe & BIT_IOC) info.si_code = TARGET_FPE_FLTINV; - - info._sifields._sigfault._addr = env->regs[15]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } else { - env->regs[15] += 4; - } - - /* accumulate unenabled exceptions */ - if ((!(fpsr & BIT_IXE)) && (arm_fpe & BIT_IXC)) - fpsr |= BIT_IXC; - if ((!(fpsr & BIT_UFE)) && (arm_fpe & BIT_UFC)) - fpsr |= BIT_UFC; - if ((!(fpsr & BIT_OFE)) && (arm_fpe & BIT_OFC)) - fpsr |= BIT_OFC; - if ((!(fpsr & BIT_DZE)) && (arm_fpe & BIT_DZC)) - fpsr |= BIT_DZC; - if ((!(fpsr & BIT_IOE)) && (arm_fpe & BIT_IOC)) - fpsr |= BIT_IOC; - ts->fpa.fpsr=fpsr; - } else { /* everything OK */ - /* increment PC */ - env->regs[15] += 4; + if (!env->thumb && emulate_arm_fpa11(env, opcode)) { + break; } + + force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, + env->regs[15]); } break; case EXCP_SWI: @@ -388,18 +386,14 @@ void cpu_loop(CPUARMState *env) * Otherwise SIGILL. This includes any SWI with * immediate not originally 0x9fxxxx, because * of the earlier XOR. + * Like the real kernel, we report the addr of the + * SWI in the siginfo si_addr but leave the PC + * pointing at the insn after the SWI. */ - info.si_signo = TARGET_SIGILL; - info.si_errno = 0; - info.si_code = TARGET_ILL_ILLTRP; - info._sifields._sigfault._addr = env->regs[15]; - if (env->thumb) { - info._sifields._sigfault._addr -= 2; - } else { - info._sifields._sigfault._addr -= 4; - } - queue_signal(env, info.si_signo, - QEMU_SI_FAULT, &info); + abi_ulong faultaddr = env->regs[15]; + faultaddr -= env->thumb ? 2 : 4; + force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLTRP, + faultaddr); } break; } @@ -430,23 +424,35 @@ void cpu_loop(CPUARMState *env) break; case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: - addr = env->exception.vaddress; - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = addr; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + /* For user-only we don't set TTBCR_EAE, so look at the FSR. */ + switch (env->exception.fsr & 0x1f) { + case 0x1: /* Alignment */ + si_signo = TARGET_SIGBUS; + si_code = TARGET_BUS_ADRALN; + break; + case 0x3: /* Access flag fault, level 1 */ + case 0x6: /* Access flag fault, level 2 */ + case 0x9: /* Domain fault, level 1 */ + case 0xb: /* Domain fault, level 2 */ + case 0xd: /* Permision fault, level 1 */ + case 0xf: /* Permision fault, level 2 */ + si_signo = TARGET_SIGSEGV; + si_code = TARGET_SEGV_ACCERR; + break; + case 0x5: /* Translation fault, level 1 */ + case 0x7: /* Translation fault, level 2 */ + si_signo = TARGET_SIGSEGV; + si_code = TARGET_SEGV_MAPERR; + break; + default: + g_assert_not_reached(); } + force_sig_fault(si_signo, si_code, env->exception.vaddress); break; case EXCP_DEBUG: case EXCP_BKPT: excp_debug: - info.si_signo = TARGET_SIGTRAP; - info.si_errno = 0; - info.si_code = TARGET_TRAP_BRKPT; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); + force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->regs[15]); break; case EXCP_KERNEL_TRAP: if (do_kernel_trap(env)) diff --git a/linux-user/arm/nwfpe/fpa11.c b/linux-user/arm/nwfpe/fpa11.c index f6f8163eab1..9a93610d245 100644 --- a/linux-user/arm/nwfpe/fpa11.c +++ b/linux-user/arm/nwfpe/fpa11.c @@ -97,37 +97,38 @@ void SetRoundingMode(const unsigned int opcode) void SetRoundingPrecision(const unsigned int opcode) { - int rounding_precision; - FPA11 *fpa11 = GET_FPA11(); + FloatX80RoundPrec rounding_precision; + FPA11 *fpa11 = GET_FPA11(); #ifdef MAINTAIN_FPCR - fpa11->fpcr &= ~MASK_ROUNDING_PRECISION; + fpa11->fpcr &= ~MASK_ROUNDING_PRECISION; #endif - switch (opcode & MASK_ROUNDING_PRECISION) - { - case ROUND_SINGLE: - rounding_precision = 32; + switch (opcode & MASK_ROUNDING_PRECISION) { + case ROUND_SINGLE: + rounding_precision = floatx80_precision_s; #ifdef MAINTAIN_FPCR - fpa11->fpcr |= ROUND_SINGLE; + fpa11->fpcr |= ROUND_SINGLE; #endif - break; + break; - case ROUND_DOUBLE: - rounding_precision = 64; + case ROUND_DOUBLE: + rounding_precision = floatx80_precision_d; #ifdef MAINTAIN_FPCR - fpa11->fpcr |= ROUND_DOUBLE; + fpa11->fpcr |= ROUND_DOUBLE; #endif - break; + break; - case ROUND_EXTENDED: - rounding_precision = 80; + case ROUND_EXTENDED: + rounding_precision = floatx80_precision_x; #ifdef MAINTAIN_FPCR - fpa11->fpcr |= ROUND_EXTENDED; + fpa11->fpcr |= ROUND_EXTENDED; #endif - break; + break; - default: rounding_precision = 80; - } - set_floatx80_rounding_precision(rounding_precision, &fpa11->fp_status); + default: + rounding_precision = floatx80_precision_x; + break; + } + set_floatx80_rounding_precision(rounding_precision, &fpa11->fp_status); } /* Emulate the instruction in the opcode. */ diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c index f21d1535e4d..df9f8e8eb20 100644 --- a/linux-user/arm/signal.c +++ b/linux-user/arm/signal.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" #include "qemu.h" +#include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" @@ -45,15 +46,7 @@ struct target_sigcontext { abi_ulong fault_address; }; -struct target_ucontext_v1 { - abi_ulong tuc_flags; - abi_ulong tuc_link; - target_stack_t tuc_stack; - struct target_sigcontext tuc_mcontext; - target_sigset_t tuc_sigmask; /* mask last for extensibility */ -}; - -struct target_ucontext_v2 { +struct target_ucontext { abi_ulong tuc_flags; abi_ulong tuc_link; target_stack_t tuc_stack; @@ -97,68 +90,30 @@ struct target_iwmmxt_sigframe { #define TARGET_VFP_MAGIC 0x56465001 #define TARGET_IWMMXT_MAGIC 0x12ef842a -struct sigframe_v1 -{ - struct target_sigcontext sc; - abi_ulong extramask[TARGET_NSIG_WORDS-1]; - abi_ulong retcode[4]; -}; - -struct sigframe_v2 -{ - struct target_ucontext_v2 uc; - abi_ulong retcode[4]; -}; - -struct rt_sigframe_v1 +struct sigframe { - abi_ulong pinfo; - abi_ulong puc; - struct target_siginfo info; - struct target_ucontext_v1 uc; + struct target_ucontext uc; abi_ulong retcode[4]; }; -struct rt_sigframe_v2 +struct rt_sigframe { struct target_siginfo info; - struct target_ucontext_v2 uc; - abi_ulong retcode[4]; + struct sigframe sig; }; -/* - * For ARM syscalls, we encode the syscall number into the instruction. - */ -#define SWI_SYS_SIGRETURN (0xef000000|(TARGET_NR_sigreturn + ARM_SYSCALL_BASE)) -#define SWI_SYS_RT_SIGRETURN (0xef000000|(TARGET_NR_rt_sigreturn + ARM_SYSCALL_BASE)) +static abi_ptr sigreturn_fdpic_tramp; /* - * For Thumb syscalls, we pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (TARGET_NR_sigreturn)) -#define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (TARGET_NR_rt_sigreturn)) - -static const abi_ulong retcodes[4] = { - SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, - SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN -}; - -/* - * Stub needed to make sure the FD register (r9) contains the right - * value. + * Up to 3 words of 'retcode' in the sigframe are code, + * with retcode[3] being used by fdpic for the function descriptor. + * This code is not actually executed, but is retained for ABI compat. + * + * We will create a table of 8 retcode variants in the sigtramp page. + * Let each table entry use 3 words. */ -static const unsigned long sigreturn_fdpic_codes[3] = { - 0xe59fc004, /* ldr r12, [pc, #4] to read function descriptor */ - 0xe59c9004, /* ldr r9, [r12, #4] to setup GOT */ - 0xe59cf000 /* ldr pc, [r12] to jump into restorer */ -}; - -static const unsigned long sigreturn_fdpic_thumb_codes[3] = { - 0xc008f8df, /* ldr r12, [pc, #8] to read function descriptor */ - 0x9004f8dc, /* ldr r9, [r12, #4] to setup GOT */ - 0xf000f8dc /* ldr pc, [r12] to jump into restorer */ -}; +#define RETCODE_WORDS 3 +#define RETCODE_BYTES (RETCODE_WORDS * 4) static inline int valid_user_regs(CPUARMState *regs) { @@ -206,15 +161,15 @@ get_sigframe(struct target_sigaction *ka, CPUARMState *regs, int framesize) } static int -setup_return(CPUARMState *env, struct target_sigaction *ka, - abi_ulong *rc, abi_ulong frame_addr, int usig, abi_ulong rc_addr) +setup_return(CPUARMState *env, struct target_sigaction *ka, int usig, + struct sigframe *frame, abi_ulong sp_addr) { abi_ulong handler = 0; abi_ulong handler_fdpic_GOT = 0; abi_ulong retcode; - - int thumb; + int thumb, retcode_idx; int is_fdpic = info_is_fdpic(((TaskState *)thread_cpu->opaque)->info); + bool copy_retcode; if (is_fdpic) { /* In FDPIC mode, ka->_sa_handler points to a function @@ -231,6 +186,7 @@ setup_return(CPUARMState *env, struct target_sigaction *ka, } thumb = handler & 1; + retcode_idx = thumb + (ka->sa_flags & TARGET_SA_SIGINFO ? 2 : 0); uint32_t cpsr = cpsr_read(env); @@ -248,53 +204,37 @@ setup_return(CPUARMState *env, struct target_sigaction *ka, if (ka->sa_flags & TARGET_SA_RESTORER) { if (is_fdpic) { - /* For FDPIC we ensure that the restorer is called with a - * correct r9 value. For that we need to write code on - * the stack that sets r9 and jumps back to restorer - * value. - */ - if (thumb) { - __put_user(sigreturn_fdpic_thumb_codes[0], rc); - __put_user(sigreturn_fdpic_thumb_codes[1], rc + 1); - __put_user(sigreturn_fdpic_thumb_codes[2], rc + 2); - __put_user((abi_ulong)ka->sa_restorer, rc + 3); - } else { - __put_user(sigreturn_fdpic_codes[0], rc); - __put_user(sigreturn_fdpic_codes[1], rc + 1); - __put_user(sigreturn_fdpic_codes[2], rc + 2); - __put_user((abi_ulong)ka->sa_restorer, rc + 3); - } - - retcode = rc_addr + thumb; + __put_user((abi_ulong)ka->sa_restorer, &frame->retcode[3]); + retcode = (sigreturn_fdpic_tramp + + retcode_idx * RETCODE_BYTES + thumb); + copy_retcode = true; } else { retcode = ka->sa_restorer; + copy_retcode = false; } } else { - unsigned int idx = thumb; - - if (ka->sa_flags & TARGET_SA_SIGINFO) { - idx += 2; - } - - __put_user(retcodes[idx], rc); + retcode = default_sigreturn + retcode_idx * RETCODE_BYTES + thumb; + copy_retcode = true; + } - retcode = rc_addr + thumb; + /* Copy the code to the stack slot for ABI compatibility. */ + if (copy_retcode) { + memcpy(frame->retcode, g2h_untagged(retcode & ~1), RETCODE_BYTES); } env->regs[0] = usig; if (is_fdpic) { env->regs[9] = handler_fdpic_GOT; } - env->regs[13] = frame_addr; + env->regs[13] = sp_addr; env->regs[14] = retcode; env->regs[15] = handler & (thumb ? ~1 : ~3); cpsr_write(env, cpsr, CPSR_IT | CPSR_T | CPSR_E, CPSRWriteByInstr); - arm_rebuild_hflags(env); return 0; } -static abi_ulong *setup_sigframe_v2_vfp(abi_ulong *regspace, CPUARMState *env) +static abi_ulong *setup_sigframe_vfp(abi_ulong *regspace, CPUARMState *env) { int i; struct target_vfp_sigframe *vfpframe; @@ -311,8 +251,7 @@ static abi_ulong *setup_sigframe_v2_vfp(abi_ulong *regspace, CPUARMState *env) return (abi_ulong*)(vfpframe+1); } -static abi_ulong *setup_sigframe_v2_iwmmxt(abi_ulong *regspace, - CPUARMState *env) +static abi_ulong *setup_sigframe_iwmmxt(abi_ulong *regspace, CPUARMState *env) { int i; struct target_iwmmxt_sigframe *iwmmxtframe; @@ -331,15 +270,15 @@ static abi_ulong *setup_sigframe_v2_iwmmxt(abi_ulong *regspace, return (abi_ulong*)(iwmmxtframe+1); } -static void setup_sigframe_v2(struct target_ucontext_v2 *uc, - target_sigset_t *set, CPUARMState *env) +static void setup_sigframe(struct target_ucontext *uc, + target_sigset_t *set, CPUARMState *env) { struct target_sigaltstack stack; int i; abi_ulong *regspace; /* Clear all the bits of the ucontext we don't use. */ - memset(uc, 0, offsetof(struct target_ucontext_v2, tuc_mcontext)); + memset(uc, 0, offsetof(struct target_ucontext, tuc_mcontext)); memset(&stack, 0, sizeof(stack)); target_save_altstack(&stack, env); @@ -349,10 +288,10 @@ static void setup_sigframe_v2(struct target_ucontext_v2 *uc, /* Save coprocessor signal frame. */ regspace = uc->tuc_regspace; if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) { - regspace = setup_sigframe_v2_vfp(regspace, env); + regspace = setup_sigframe_vfp(regspace, env); } if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - regspace = setup_sigframe_v2_iwmmxt(regspace, env); + regspace = setup_sigframe_iwmmxt(regspace, env); } /* Write terminating magic word */ @@ -363,114 +302,23 @@ static void setup_sigframe_v2(struct target_ucontext_v2 *uc, } } -/* compare linux/arch/arm/kernel/signal.c:setup_frame() */ -static void setup_frame_v1(int usig, struct target_sigaction *ka, - target_sigset_t *set, CPUARMState *regs) -{ - struct sigframe_v1 *frame; - abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame)); - int i; - - trace_user_setup_frame(regs, frame_addr); - if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) { - goto sigsegv; - } - - setup_sigcontext(&frame->sc, regs, set->sig[0]); - - for(i = 1; i < TARGET_NSIG_WORDS; i++) { - __put_user(set->sig[i], &frame->extramask[i - 1]); - } - - if (setup_return(regs, ka, frame->retcode, frame_addr, usig, - frame_addr + offsetof(struct sigframe_v1, retcode))) { - goto sigsegv; - } - - unlock_user_struct(frame, frame_addr, 1); - return; -sigsegv: - unlock_user_struct(frame, frame_addr, 1); - force_sigsegv(usig); -} - -static void setup_frame_v2(int usig, struct target_sigaction *ka, - target_sigset_t *set, CPUARMState *regs) -{ - struct sigframe_v2 *frame; - abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame)); - - trace_user_setup_frame(regs, frame_addr); - if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) { - goto sigsegv; - } - - setup_sigframe_v2(&frame->uc, set, regs); - - if (setup_return(regs, ka, frame->retcode, frame_addr, usig, - frame_addr + offsetof(struct sigframe_v2, retcode))) { - goto sigsegv; - } - - unlock_user_struct(frame, frame_addr, 1); - return; -sigsegv: - unlock_user_struct(frame, frame_addr, 1); - force_sigsegv(usig); -} - void setup_frame(int usig, struct target_sigaction *ka, target_sigset_t *set, CPUARMState *regs) { - if (get_osversion() >= 0x020612) { - setup_frame_v2(usig, ka, set, regs); - } else { - setup_frame_v1(usig, ka, set, regs); - } -} - -/* compare linux/arch/arm/kernel/signal.c:setup_rt_frame() */ -static void setup_rt_frame_v1(int usig, struct target_sigaction *ka, - target_siginfo_t *info, - target_sigset_t *set, CPUARMState *env) -{ - struct rt_sigframe_v1 *frame; - abi_ulong frame_addr = get_sigframe(ka, env, sizeof(*frame)); - struct target_sigaltstack stack; - int i; - abi_ulong info_addr, uc_addr; + struct sigframe *frame; + abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame)); - trace_user_setup_rt_frame(env, frame_addr); + trace_user_setup_frame(regs, frame_addr); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) { goto sigsegv; } - info_addr = frame_addr + offsetof(struct rt_sigframe_v1, info); - __put_user(info_addr, &frame->pinfo); - uc_addr = frame_addr + offsetof(struct rt_sigframe_v1, uc); - __put_user(uc_addr, &frame->puc); - tswap_siginfo(&frame->info, info); - - /* Clear all the bits of the ucontext we don't use. */ - memset(&frame->uc, 0, offsetof(struct target_ucontext_v1, tuc_mcontext)); - - memset(&stack, 0, sizeof(stack)); - target_save_altstack(&stack, env); - memcpy(&frame->uc.tuc_stack, &stack, sizeof(stack)); - - setup_sigcontext(&frame->uc.tuc_mcontext, env, set->sig[0]); - for(i = 0; i < TARGET_NSIG_WORDS; i++) { - __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); - } + setup_sigframe(&frame->uc, set, regs); - if (setup_return(env, ka, frame->retcode, frame_addr, usig, - frame_addr + offsetof(struct rt_sigframe_v1, retcode))) { + if (setup_return(regs, ka, usig, frame, frame_addr)) { goto sigsegv; } - env->regs[1] = info_addr; - env->regs[2] = uc_addr; - unlock_user_struct(frame, frame_addr, 1); return; sigsegv: @@ -478,11 +326,11 @@ static void setup_rt_frame_v1(int usig, struct target_sigaction *ka, force_sigsegv(usig); } -static void setup_rt_frame_v2(int usig, struct target_sigaction *ka, - target_siginfo_t *info, - target_sigset_t *set, CPUARMState *env) +void setup_rt_frame(int usig, struct target_sigaction *ka, + target_siginfo_t *info, + target_sigset_t *set, CPUARMState *env) { - struct rt_sigframe_v2 *frame; + struct rt_sigframe *frame; abi_ulong frame_addr = get_sigframe(ka, env, sizeof(*frame)); abi_ulong info_addr, uc_addr; @@ -491,14 +339,13 @@ static void setup_rt_frame_v2(int usig, struct target_sigaction *ka, goto sigsegv; } - info_addr = frame_addr + offsetof(struct rt_sigframe_v2, info); - uc_addr = frame_addr + offsetof(struct rt_sigframe_v2, uc); + info_addr = frame_addr + offsetof(struct rt_sigframe, info); + uc_addr = frame_addr + offsetof(struct rt_sigframe, sig.uc); tswap_siginfo(&frame->info, info); - setup_sigframe_v2(&frame->uc, set, env); + setup_sigframe(&frame->sig.uc, set, env); - if (setup_return(env, ka, frame->retcode, frame_addr, usig, - frame_addr + offsetof(struct rt_sigframe_v2, retcode))) { + if (setup_return(env, ka, usig, &frame->sig, frame_addr)) { goto sigsegv; } @@ -512,17 +359,6 @@ static void setup_rt_frame_v2(int usig, struct target_sigaction *ka, force_sigsegv(usig); } -void setup_rt_frame(int usig, struct target_sigaction *ka, - target_siginfo_t *info, - target_sigset_t *set, CPUARMState *env) -{ - if (get_osversion() >= 0x020612) { - setup_rt_frame_v2(usig, ka, info, set, env); - } else { - setup_rt_frame_v1(usig, ka, info, set, env); - } -} - static int restore_sigcontext(CPUARMState *env, struct target_sigcontext *sc) { @@ -547,62 +383,13 @@ restore_sigcontext(CPUARMState *env, struct target_sigcontext *sc) __get_user(env->regs[15], &sc->arm_pc); __get_user(cpsr, &sc->arm_cpsr); cpsr_write(env, cpsr, CPSR_USER | CPSR_EXEC, CPSRWriteByInstr); - arm_rebuild_hflags(env); err |= !valid_user_regs(env); return err; } -static long do_sigreturn_v1(CPUARMState *env) -{ - abi_ulong frame_addr; - struct sigframe_v1 *frame = NULL; - target_sigset_t set; - sigset_t host_set; - int i; - - /* - * Since we stacked the signal on a 64-bit boundary, - * then 'sp' should be word aligned here. If it's - * not, then the user is trying to mess with us. - */ - frame_addr = env->regs[13]; - trace_user_do_sigreturn(env, frame_addr); - if (frame_addr & 7) { - goto badframe; - } - - if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) { - goto badframe; - } - - __get_user(set.sig[0], &frame->sc.oldmask); - for(i = 1; i < TARGET_NSIG_WORDS; i++) { - __get_user(set.sig[i], &frame->extramask[i - 1]); - } - - target_to_host_sigset_internal(&host_set, &set); - set_sigmask(&host_set); - - if (restore_sigcontext(env, &frame->sc)) { - goto badframe; - } - -#if 0 - /* Send SIGTRAP if we're single-stepping */ - if (ptrace_cancel_bpt(current)) - send_sig(SIGTRAP, current, 1); -#endif - unlock_user_struct(frame, frame_addr, 0); - return -TARGET_QEMU_ESIGRETURN; - -badframe: - force_sig(TARGET_SIGSEGV); - return -TARGET_QEMU_ESIGRETURN; -} - -static abi_ulong *restore_sigframe_v2_vfp(CPUARMState *env, abi_ulong *regspace) +static abi_ulong *restore_sigframe_vfp(CPUARMState *env, abi_ulong *regspace) { int i; abi_ulong magic, sz; @@ -632,8 +419,8 @@ static abi_ulong *restore_sigframe_v2_vfp(CPUARMState *env, abi_ulong *regspace) return (abi_ulong*)(vfpframe + 1); } -static abi_ulong *restore_sigframe_v2_iwmmxt(CPUARMState *env, - abi_ulong *regspace) +static abi_ulong *restore_sigframe_iwmmxt(CPUARMState *env, + abi_ulong *regspace) { int i; abi_ulong magic, sz; @@ -657,9 +444,9 @@ static abi_ulong *restore_sigframe_v2_iwmmxt(CPUARMState *env, return (abi_ulong*)(iwmmxtframe + 1); } -static int do_sigframe_return_v2(CPUARMState *env, - target_ulong context_addr, - struct target_ucontext_v2 *uc) +static int do_sigframe_return(CPUARMState *env, + target_ulong context_addr, + struct target_ucontext *uc) { sigset_t host_set; abi_ulong *regspace; @@ -667,29 +454,26 @@ static int do_sigframe_return_v2(CPUARMState *env, target_to_host_sigset(&host_set, &uc->tuc_sigmask); set_sigmask(&host_set); - if (restore_sigcontext(env, &uc->tuc_mcontext)) + if (restore_sigcontext(env, &uc->tuc_mcontext)) { return 1; + } /* Restore coprocessor signal frame */ regspace = uc->tuc_regspace; if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) { - regspace = restore_sigframe_v2_vfp(env, regspace); + regspace = restore_sigframe_vfp(env, regspace); if (!regspace) { return 1; } } if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - regspace = restore_sigframe_v2_iwmmxt(env, regspace); + regspace = restore_sigframe_iwmmxt(env, regspace); if (!regspace) { return 1; } } - if (do_sigaltstack(context_addr - + offsetof(struct target_ucontext_v2, tuc_stack), - 0, get_sp_from_cpustate(env)) == -EFAULT) { - return 1; - } + target_restore_altstack(&uc->tuc_stack, env); #if 0 /* Send SIGTRAP if we're single-stepping */ @@ -700,10 +484,10 @@ static int do_sigframe_return_v2(CPUARMState *env, return 0; } -static long do_sigreturn_v2(CPUARMState *env) +long do_sigreturn(CPUARMState *env) { abi_ulong frame_addr; - struct sigframe_v2 *frame = NULL; + struct sigframe *frame = NULL; /* * Since we stacked the signal on a 64-bit boundary, @@ -720,10 +504,9 @@ static long do_sigreturn_v2(CPUARMState *env) goto badframe; } - if (do_sigframe_return_v2(env, - frame_addr - + offsetof(struct sigframe_v2, uc), - &frame->uc)) { + if (do_sigframe_return(env, + frame_addr + offsetof(struct sigframe, uc), + &frame->uc)) { goto badframe; } @@ -736,20 +519,10 @@ static long do_sigreturn_v2(CPUARMState *env) return -TARGET_QEMU_ESIGRETURN; } -long do_sigreturn(CPUARMState *env) -{ - if (get_osversion() >= 0x020612) { - return do_sigreturn_v2(env); - } else { - return do_sigreturn_v1(env); - } -} - -static long do_rt_sigreturn_v1(CPUARMState *env) +long do_rt_sigreturn(CPUARMState *env) { abi_ulong frame_addr; - struct rt_sigframe_v1 *frame = NULL; - sigset_t host_set; + struct rt_sigframe *frame = NULL; /* * Since we stacked the signal on a 64-bit boundary, @@ -766,21 +539,12 @@ static long do_rt_sigreturn_v1(CPUARMState *env) goto badframe; } - target_to_host_sigset(&host_set, &frame->uc.tuc_sigmask); - set_sigmask(&host_set); - - if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { + if (do_sigframe_return(env, + frame_addr + offsetof(struct rt_sigframe, sig.uc), + &frame->sig.uc)) { goto badframe; } - if (do_sigaltstack(frame_addr + offsetof(struct rt_sigframe_v1, uc.tuc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT) - goto badframe; - -#if 0 - /* Send SIGTRAP if we're single-stepping */ - if (ptrace_cancel_bpt(current)) - send_sig(SIGTRAP, current, 1); -#endif unlock_user_struct(frame, frame_addr, 0); return -TARGET_QEMU_ESIGRETURN; @@ -790,47 +554,77 @@ static long do_rt_sigreturn_v1(CPUARMState *env) return -TARGET_QEMU_ESIGRETURN; } -static long do_rt_sigreturn_v2(CPUARMState *env) -{ - abi_ulong frame_addr; - struct rt_sigframe_v2 *frame = NULL; +/* + * EABI syscalls pass the number via r7. + * Note that the kernel still adds the OABI syscall number to the trap, + * presumably for backward ABI compatibility with unwinders. + */ +#define ARM_MOV_R7_IMM(X) (0xe3a07000 | (X)) +#define ARM_SWI_SYS(X) (0xef000000 | (X) | ARM_SYSCALL_BASE) - /* - * Since we stacked the signal on a 64-bit boundary, - * then 'sp' should be word aligned here. If it's - * not, then the user is trying to mess with us. - */ - frame_addr = env->regs[13]; - trace_user_do_rt_sigreturn(env, frame_addr); - if (frame_addr & 7) { - goto badframe; - } +#define THUMB_MOVS_R7_IMM(X) (0x2700 | (X)) +#define THUMB_SWI_SYS 0xdf00 - if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) { - goto badframe; - } +static void write_arm_sigreturn(uint32_t *rc, int syscall) +{ + __put_user(ARM_MOV_R7_IMM(syscall), rc); + __put_user(ARM_SWI_SYS(syscall), rc + 1); + /* Wrote 8 of 12 bytes */ +} - if (do_sigframe_return_v2(env, - frame_addr - + offsetof(struct rt_sigframe_v2, uc), - &frame->uc)) { - goto badframe; - } +static void write_thm_sigreturn(uint32_t *rc, int syscall) +{ + __put_user(THUMB_SWI_SYS << 16 | THUMB_MOVS_R7_IMM(syscall), rc); + /* Wrote 4 of 12 bytes */ +} - unlock_user_struct(frame, frame_addr, 0); - return -TARGET_QEMU_ESIGRETURN; +/* + * Stub needed to make sure the FD register (r9) contains the right value. + * Use the same instruction sequence as the kernel. + */ +static void write_arm_fdpic_sigreturn(uint32_t *rc, int ofs) +{ + assert(ofs <= 0xfff); + __put_user(0xe59d3000 | ofs, rc + 0); /* ldr r3, [sp, #ofs] */ + __put_user(0xe8930908, rc + 1); /* ldm r3, { r3, r9 } */ + __put_user(0xe12fff13, rc + 2); /* bx r3 */ + /* Wrote 12 of 12 bytes */ +} -badframe: - unlock_user_struct(frame, frame_addr, 0); - force_sig(TARGET_SIGSEGV); - return -TARGET_QEMU_ESIGRETURN; +static void write_thm_fdpic_sigreturn(void *vrc, int ofs) +{ + uint16_t *rc = vrc; + + assert((ofs & ~0x3fc) == 0); + __put_user(0x9b00 | (ofs >> 2), rc + 0); /* ldr r3, [sp, #ofs] */ + __put_user(0xcb0c, rc + 1); /* ldm r3, { r2, r3 } */ + __put_user(0x4699, rc + 2); /* mov r9, r3 */ + __put_user(0x4710, rc + 3); /* bx r2 */ + /* Wrote 8 of 12 bytes */ } -long do_rt_sigreturn(CPUARMState *env) +void setup_sigtramp(abi_ulong sigtramp_page) { - if (get_osversion() >= 0x020612) { - return do_rt_sigreturn_v2(env); - } else { - return do_rt_sigreturn_v1(env); - } + uint32_t total_size = 8 * RETCODE_BYTES; + uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, total_size, 0); + + assert(tramp != NULL); + + default_sigreturn = sigtramp_page; + write_arm_sigreturn(&tramp[0 * RETCODE_WORDS], TARGET_NR_sigreturn); + write_thm_sigreturn(&tramp[1 * RETCODE_WORDS], TARGET_NR_sigreturn); + write_arm_sigreturn(&tramp[2 * RETCODE_WORDS], TARGET_NR_rt_sigreturn); + write_thm_sigreturn(&tramp[3 * RETCODE_WORDS], TARGET_NR_rt_sigreturn); + + sigreturn_fdpic_tramp = sigtramp_page + 4 * RETCODE_BYTES; + write_arm_fdpic_sigreturn(tramp + 4 * RETCODE_WORDS, + offsetof(struct sigframe, retcode[3])); + write_thm_fdpic_sigreturn(tramp + 5 * RETCODE_WORDS, + offsetof(struct sigframe, retcode[3])); + write_arm_fdpic_sigreturn(tramp + 6 * RETCODE_WORDS, + offsetof(struct rt_sigframe, sig.retcode[3])); + write_thm_fdpic_sigreturn(tramp + 7 * RETCODE_WORDS, + offsetof(struct rt_sigframe, sig.retcode[3])); + + unlock_user(tramp, sigtramp_page, total_size); } diff --git a/linux-user/arm/syscall.tbl b/linux-user/arm/syscall.tbl index 171077cbf41..28e03b5fec0 100644 --- a/linux-user/arm/syscall.tbl +++ b/linux-user/arm/syscall.tbl @@ -453,3 +453,10 @@ 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +# 443 reserved for quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/linux-user/arm/target_errno_defs.h b/linux-user/arm/target_errno_defs.h new file mode 100644 index 00000000000..fd843732384 --- /dev/null +++ b/linux-user/arm/target_errno_defs.h @@ -0,0 +1,7 @@ +#ifndef ARM_TARGET_ERRNO_DEFS_H +#define ARM_TARGET_ERRNO_DEFS_H + +/* Target uses generic errno */ +#include "../generic/target_errno_defs.h" + +#endif diff --git a/linux-user/arm/target_signal.h b/linux-user/arm/target_signal.h index 0998dd6dfa7..1e7fb0cecbd 100644 --- a/linux-user/arm/target_signal.h +++ b/linux-user/arm/target_signal.h @@ -22,4 +22,6 @@ typedef struct target_sigaltstack { #include "../generic/signal.h" #define TARGET_ARCH_HAS_SETUP_FRAME +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1 + #endif /* ARM_TARGET_SIGNAL_H */ diff --git a/linux-user/cris/cpu_loop.c b/linux-user/cris/cpu_loop.c index 334edddd1e2..0d5d268609a 100644 --- a/linux-user/cris/cpu_loop.c +++ b/linux-user/cris/cpu_loop.c @@ -20,7 +20,9 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu.h" +#include "user-internals.h" #include "cpu_loop-common.h" +#include "signal-common.h" void cpu_loop(CPUCRISState *env) { @@ -35,16 +37,6 @@ void cpu_loop(CPUCRISState *env) process_queued_cpu_work(cs); switch (trapnr) { - case 0xaa: - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->pregs[PR_EDA]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } - break; case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; diff --git a/linux-user/cris/signal.c b/linux-user/cris/signal.c index 1e02194377b..7f6aca934e1 100644 --- a/linux-user/cris/signal.c +++ b/linux-user/cris/signal.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" #include "qemu.h" +#include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" @@ -96,6 +97,14 @@ static abi_ulong get_sigframe(CPUCRISState *env, int framesize) return sp - framesize; } +static void setup_sigreturn(uint16_t *retcode) +{ + /* This is movu.w __NR_sigreturn, r9; break 13; */ + __put_user(0x9c5f, retcode + 0); + __put_user(TARGET_NR_sigreturn, retcode + 1); + __put_user(0xe93d, retcode + 2); +} + void setup_frame(int sig, struct target_sigaction *ka, target_sigset_t *set, CPUCRISState *env) { @@ -111,14 +120,8 @@ void setup_frame(int sig, struct target_sigaction *ka, /* * The CRIS signal return trampoline. A real linux/CRIS kernel doesn't * use this trampoline anymore but it sets it up for GDB. - * In QEMU, using the trampoline simplifies things a bit so we use it. - * - * This is movu.w __NR_sigreturn, r9; break 13; */ - __put_user(0x9c5f, frame->retcode+0); - __put_user(TARGET_NR_sigreturn, - frame->retcode + 1); - __put_user(0xe93d, frame->retcode + 2); + setup_sigreturn(frame->retcode); /* Save the mask. */ __put_user(set->sig[0], &frame->sc.oldmask); @@ -134,7 +137,7 @@ void setup_frame(int sig, struct target_sigaction *ka, env->regs[10] = sig; env->pc = (unsigned long) ka->_sa_handler; /* Link SRP so the guest returns through the trampoline. */ - env->pregs[PR_SRP] = frame_addr + offsetof(typeof(*frame), retcode); + env->pregs[PR_SRP] = default_sigreturn; unlock_user_struct(frame, frame_addr, 1); return; @@ -186,3 +189,14 @@ long do_rt_sigreturn(CPUCRISState *env) qemu_log_mask(LOG_UNIMP, "do_rt_sigreturn: not implemented\n"); return -TARGET_ENOSYS; } + +void setup_sigtramp(abi_ulong sigtramp_page) +{ + uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6, 0); + assert(tramp != NULL); + + default_sigreturn = sigtramp_page; + setup_sigreturn(tramp); + + unlock_user(tramp, sigtramp_page, 6); +} diff --git a/linux-user/cris/target_errno_defs.h b/linux-user/cris/target_errno_defs.h new file mode 100644 index 00000000000..1cf43b17a50 --- /dev/null +++ b/linux-user/cris/target_errno_defs.h @@ -0,0 +1,7 @@ +#ifndef CRIS_TARGET_ERRNO_DEFS_H +#define CRIS_TARGET_ERRNO_DEFS_H + +/* Target uses generic errno */ +#include "../generic/target_errno_defs.h" + +#endif diff --git a/linux-user/cris/target_signal.h b/linux-user/cris/target_signal.h index 495a1428968..83a51555074 100644 --- a/linux-user/cris/target_signal.h +++ b/linux-user/cris/target_signal.h @@ -22,4 +22,6 @@ typedef struct target_sigaltstack { #include "../generic/signal.h" #define TARGET_ARCH_HAS_SETUP_FRAME +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1 + #endif /* CRIS_TARGET_SIGNAL_H */ diff --git a/linux-user/elfload.c b/linux-user/elfload.c index c325230ee90..ed9eb54bfd1 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -6,6 +6,10 @@ #include #include "qemu.h" +#include "user-internals.h" +#include "signal-common.h" +#include "loader.h" +#include "user-mmap.h" #include "disas/disas.h" #include "qemu/bitops.h" #include "qemu/path.h" @@ -14,6 +18,7 @@ #include "qemu/units.h" #include "qemu/selfmap.h" #include "qapi/error.h" +#include "target_signal.h" #ifdef _ARCH_PPC64 #undef ARCH_DLINFO @@ -172,33 +177,33 @@ typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; */ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env) { - (*regs)[0] = env->regs[15]; - (*regs)[1] = env->regs[14]; - (*regs)[2] = env->regs[13]; - (*regs)[3] = env->regs[12]; - (*regs)[4] = env->regs[R_EBP]; - (*regs)[5] = env->regs[R_EBX]; - (*regs)[6] = env->regs[11]; - (*regs)[7] = env->regs[10]; - (*regs)[8] = env->regs[9]; - (*regs)[9] = env->regs[8]; - (*regs)[10] = env->regs[R_EAX]; - (*regs)[11] = env->regs[R_ECX]; - (*regs)[12] = env->regs[R_EDX]; - (*regs)[13] = env->regs[R_ESI]; - (*regs)[14] = env->regs[R_EDI]; - (*regs)[15] = env->regs[R_EAX]; /* XXX */ - (*regs)[16] = env->eip; - (*regs)[17] = env->segs[R_CS].selector & 0xffff; - (*regs)[18] = env->eflags; - (*regs)[19] = env->regs[R_ESP]; - (*regs)[20] = env->segs[R_SS].selector & 0xffff; - (*regs)[21] = env->segs[R_FS].selector & 0xffff; - (*regs)[22] = env->segs[R_GS].selector & 0xffff; - (*regs)[23] = env->segs[R_DS].selector & 0xffff; - (*regs)[24] = env->segs[R_ES].selector & 0xffff; - (*regs)[25] = env->segs[R_FS].selector & 0xffff; - (*regs)[26] = env->segs[R_GS].selector & 0xffff; + (*regs)[0] = tswapreg(env->regs[15]); + (*regs)[1] = tswapreg(env->regs[14]); + (*regs)[2] = tswapreg(env->regs[13]); + (*regs)[3] = tswapreg(env->regs[12]); + (*regs)[4] = tswapreg(env->regs[R_EBP]); + (*regs)[5] = tswapreg(env->regs[R_EBX]); + (*regs)[6] = tswapreg(env->regs[11]); + (*regs)[7] = tswapreg(env->regs[10]); + (*regs)[8] = tswapreg(env->regs[9]); + (*regs)[9] = tswapreg(env->regs[8]); + (*regs)[10] = tswapreg(env->regs[R_EAX]); + (*regs)[11] = tswapreg(env->regs[R_ECX]); + (*regs)[12] = tswapreg(env->regs[R_EDX]); + (*regs)[13] = tswapreg(env->regs[R_ESI]); + (*regs)[14] = tswapreg(env->regs[R_EDI]); + (*regs)[15] = tswapreg(env->regs[R_EAX]); /* XXX */ + (*regs)[16] = tswapreg(env->eip); + (*regs)[17] = tswapreg(env->segs[R_CS].selector & 0xffff); + (*regs)[18] = tswapreg(env->eflags); + (*regs)[19] = tswapreg(env->regs[R_ESP]); + (*regs)[20] = tswapreg(env->segs[R_SS].selector & 0xffff); + (*regs)[21] = tswapreg(env->segs[R_FS].selector & 0xffff); + (*regs)[22] = tswapreg(env->segs[R_GS].selector & 0xffff); + (*regs)[23] = tswapreg(env->segs[R_DS].selector & 0xffff); + (*regs)[24] = tswapreg(env->segs[R_ES].selector & 0xffff); + (*regs)[25] = tswapreg(env->segs[R_FS].selector & 0xffff); + (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff); } #else @@ -244,23 +249,23 @@ typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; */ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env) { - (*regs)[0] = env->regs[R_EBX]; - (*regs)[1] = env->regs[R_ECX]; - (*regs)[2] = env->regs[R_EDX]; - (*regs)[3] = env->regs[R_ESI]; - (*regs)[4] = env->regs[R_EDI]; - (*regs)[5] = env->regs[R_EBP]; - (*regs)[6] = env->regs[R_EAX]; - (*regs)[7] = env->segs[R_DS].selector & 0xffff; - (*regs)[8] = env->segs[R_ES].selector & 0xffff; - (*regs)[9] = env->segs[R_FS].selector & 0xffff; - (*regs)[10] = env->segs[R_GS].selector & 0xffff; - (*regs)[11] = env->regs[R_EAX]; /* XXX */ - (*regs)[12] = env->eip; - (*regs)[13] = env->segs[R_CS].selector & 0xffff; - (*regs)[14] = env->eflags; - (*regs)[15] = env->regs[R_ESP]; - (*regs)[16] = env->segs[R_SS].selector & 0xffff; + (*regs)[0] = tswapreg(env->regs[R_EBX]); + (*regs)[1] = tswapreg(env->regs[R_ECX]); + (*regs)[2] = tswapreg(env->regs[R_EDX]); + (*regs)[3] = tswapreg(env->regs[R_ESI]); + (*regs)[4] = tswapreg(env->regs[R_EDI]); + (*regs)[5] = tswapreg(env->regs[R_EBP]); + (*regs)[6] = tswapreg(env->regs[R_EAX]); + (*regs)[7] = tswapreg(env->segs[R_DS].selector & 0xffff); + (*regs)[8] = tswapreg(env->segs[R_ES].selector & 0xffff); + (*regs)[9] = tswapreg(env->segs[R_FS].selector & 0xffff); + (*regs)[10] = tswapreg(env->segs[R_GS].selector & 0xffff); + (*regs)[11] = tswapreg(env->regs[R_EAX]); /* XXX */ + (*regs)[12] = tswapreg(env->eip); + (*regs)[13] = tswapreg(env->segs[R_CS].selector & 0xffff); + (*regs)[14] = tswapreg(env->eflags); + (*regs)[15] = tswapreg(env->regs[R_ESP]); + (*regs)[16] = tswapreg(env->segs[R_SS].selector & 0xffff); } #endif @@ -586,6 +591,16 @@ enum { ARM_HWCAP2_A64_SVESM4 = 1 << 6, ARM_HWCAP2_A64_FLAGM2 = 1 << 7, ARM_HWCAP2_A64_FRINT = 1 << 8, + ARM_HWCAP2_A64_SVEI8MM = 1 << 9, + ARM_HWCAP2_A64_SVEF32MM = 1 << 10, + ARM_HWCAP2_A64_SVEF64MM = 1 << 11, + ARM_HWCAP2_A64_SVEBF16 = 1 << 12, + ARM_HWCAP2_A64_I8MM = 1 << 13, + ARM_HWCAP2_A64_BF16 = 1 << 14, + ARM_HWCAP2_A64_DGH = 1 << 15, + ARM_HWCAP2_A64_RNG = 1 << 16, + ARM_HWCAP2_A64_BTI = 1 << 17, + ARM_HWCAP2_A64_MTE = 1 << 18, }; #define ELF_HWCAP get_elf_hwcap() @@ -638,8 +653,23 @@ static uint32_t get_elf_hwcap2(void) uint32_t hwcaps = 0; GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP); + GET_FEATURE_ID(aa64_sve2, ARM_HWCAP2_A64_SVE2); + GET_FEATURE_ID(aa64_sve2_aes, ARM_HWCAP2_A64_SVEAES); + GET_FEATURE_ID(aa64_sve2_pmull128, ARM_HWCAP2_A64_SVEPMULL); + GET_FEATURE_ID(aa64_sve2_bitperm, ARM_HWCAP2_A64_SVEBITPERM); + GET_FEATURE_ID(aa64_sve2_sha3, ARM_HWCAP2_A64_SVESHA3); + GET_FEATURE_ID(aa64_sve2_sm4, ARM_HWCAP2_A64_SVESM4); GET_FEATURE_ID(aa64_condm_5, ARM_HWCAP2_A64_FLAGM2); GET_FEATURE_ID(aa64_frint, ARM_HWCAP2_A64_FRINT); + GET_FEATURE_ID(aa64_sve_i8mm, ARM_HWCAP2_A64_SVEI8MM); + GET_FEATURE_ID(aa64_sve_f32mm, ARM_HWCAP2_A64_SVEF32MM); + GET_FEATURE_ID(aa64_sve_f64mm, ARM_HWCAP2_A64_SVEF64MM); + GET_FEATURE_ID(aa64_sve_bf16, ARM_HWCAP2_A64_SVEBF16); + GET_FEATURE_ID(aa64_i8mm, ARM_HWCAP2_A64_I8MM); + GET_FEATURE_ID(aa64_bf16, ARM_HWCAP2_A64_BF16); + GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG); + GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI); + GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE); return hwcaps; } @@ -663,48 +693,25 @@ static uint32_t get_elf_hwcap2(void) #define ELF_CLASS ELFCLASS64 #define ELF_ARCH EM_SPARCV9 - -#define STACK_BIAS 2047 - -static inline void init_thread(struct target_pt_regs *regs, - struct image_info *infop) -{ -#ifndef TARGET_ABI32 - regs->tstate = 0; -#endif - regs->pc = infop->entry; - regs->npc = regs->pc + 4; - regs->y = 0; -#ifdef TARGET_ABI32 - regs->u_regs[14] = infop->start_stack - 16 * 4; -#else - if (personality(infop->personality) == PER_LINUX32) - regs->u_regs[14] = infop->start_stack - 16 * 4; - else - regs->u_regs[14] = infop->start_stack - 16 * 8 - STACK_BIAS; -#endif -} - #else #define ELF_START_MMAP 0x80000000 #define ELF_HWCAP (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \ | HWCAP_SPARC_MULDIV) - #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_SPARC +#endif /* TARGET_SPARC64 */ static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { - regs->psr = 0; + /* Note that target_cpu_copy_regs does not read psr/tstate. */ regs->pc = infop->entry; regs->npc = regs->pc + 4; regs->y = 0; - regs->u_regs[14] = infop->start_stack - 16 * 4; + regs->u_regs[14] = (infop->start_stack - 16 * sizeof(abi_ulong) + - TARGET_STACK_BIAS); } - -#endif -#endif +#endif /* TARGET_SPARC */ #ifdef TARGET_PPC @@ -828,7 +835,7 @@ static uint32_t get_elf_hwcap2(void) PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07 | QEMU_PPC_FEATURE2_VEC_CRYPTO); GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00 | - QEMU_PPC_FEATURE2_DARN); + QEMU_PPC_FEATURE2_DARN | QEMU_PPC_FEATURE2_HAS_IEEE128); #undef GET_FEATURE #undef GET_FEATURE2 @@ -894,7 +901,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en (*regs)[33] = tswapreg(env->msr); (*regs)[35] = tswapreg(env->ctr); (*regs)[36] = tswapreg(env->lr); - (*regs)[37] = tswapreg(env->xer); + (*regs)[37] = tswapreg(cpu_read_xer(env)); for (i = 0; i < ARRAY_SIZE(env->crf); i++) { ccr |= env->crf[i] << (32 - ((i + 1) * 4)); @@ -918,8 +925,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en #endif #define ELF_ARCH EM_MIPS -#define elf_check_arch(x) ((x) == EM_MIPS || (x) == EM_NANOMIPS) - #ifdef TARGET_ABI_MIPSN32 #define elf_check_abi(x) ((x) & EF_MIPS_ABI2) #else @@ -1374,6 +1379,7 @@ static uint32_t get_elf_hwcap(void) hwcap |= HWCAP_S390_ETF3EH; } GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS); + GET_FEATURE(S390_FEAT_VECTOR_ENH, HWCAP_S390_VXRS_EXT); return hwcap; } @@ -1385,6 +1391,39 @@ static inline void init_thread(struct target_pt_regs *regs, struct image_info *i regs->gprs[15] = infop->start_stack; } +/* See linux kernel: arch/s390/include/uapi/asm/ptrace.h (s390_regs). */ +#define ELF_NREG 27 +typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; + +enum { + TARGET_REG_PSWM = 0, + TARGET_REG_PSWA = 1, + TARGET_REG_GPRS = 2, + TARGET_REG_ARS = 18, + TARGET_REG_ORIG_R2 = 26, +}; + +static void elf_core_copy_regs(target_elf_gregset_t *regs, + const CPUS390XState *env) +{ + int i; + uint32_t *aregs; + + (*regs)[TARGET_REG_PSWM] = tswapreg(env->psw.mask); + (*regs)[TARGET_REG_PSWA] = tswapreg(env->psw.addr); + for (i = 0; i < 16; i++) { + (*regs)[TARGET_REG_GPRS + i] = tswapreg(env->regs[i]); + } + aregs = (uint32_t *)&((*regs)[TARGET_REG_ARS]); + for (i = 0; i < 16; i++) { + aregs[i] = tswap32(env->aregs[i]); + } + (*regs)[TARGET_REG_ORIG_R2] = 0; +} + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + #endif /* TARGET_S390X */ #ifdef TARGET_RISCV @@ -1398,6 +1437,19 @@ static inline void init_thread(struct target_pt_regs *regs, struct image_info *i #define ELF_CLASS ELFCLASS64 #endif +#define ELF_HWCAP get_elf_hwcap() + +static uint32_t get_elf_hwcap(void) +{ +#define MISA_BIT(EXT) (1 << (EXT - 'A')) + RISCVCPU *cpu = RISCV_CPU(thread_cpu); + uint32_t mask = MISA_BIT('I') | MISA_BIT('M') | MISA_BIT('A') + | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C'); + + return cpu->env.misa_ext & mask; +#undef MISA_BIT +} + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { @@ -3197,6 +3249,22 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) #endif } + /* + * TODO: load a vdso, which would also contain the signal trampolines. + * Otherwise, allocate a private page to hold them. + */ + if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) { + abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tramp_page == -1) { + return -errno; + } + + setup_sigtramp(tramp_page); + target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC); + } + bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex, info, (elf_interpreter ? &interp_info : NULL)); info->start_stack = bprm->p; @@ -3386,7 +3454,6 @@ static size_t note_size(const struct memelfnote *); static void free_note_info(struct elf_note_info *); static int fill_note_info(struct elf_note_info *, long, const CPUArchState *); static void fill_thread_info(struct elf_note_info *, const CPUArchState *); -static int core_dump_filename(const TaskState *, char *, size_t); static int dump_write(int, const void *, size_t); static int write_note(struct memelfnote *, int); @@ -3629,11 +3696,12 @@ static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts) (void) memset(psinfo, 0, sizeof (*psinfo)); - len = ts->info->arg_end - ts->info->arg_start; + len = ts->info->env_strings - ts->info->arg_strings; if (len >= ELF_PRARGSZ) len = ELF_PRARGSZ - 1; - if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_start, len)) + if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_strings, len)) { return -EFAULT; + } for (i = 0; i < len; i++) if (psinfo->pr_psargs[i] == 0) psinfo->pr_psargs[i] = ' '; @@ -3685,32 +3753,16 @@ static void fill_auxv_note(struct memelfnote *note, const TaskState *ts) * for the name: * qemu__-